1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the SystemZTargetLowering class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "SystemZISelLowering.h" 15 #include "SystemZCallingConv.h" 16 #include "SystemZConstantPoolValue.h" 17 #include "SystemZMachineFunctionInfo.h" 18 #include "SystemZTargetMachine.h" 19 #include "llvm/CodeGen/CallingConvLower.h" 20 #include "llvm/CodeGen/MachineInstrBuilder.h" 21 #include "llvm/CodeGen/MachineRegisterInfo.h" 22 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 23 #include "llvm/IR/Intrinsics.h" 24 #include <cctype> 25 26 using namespace llvm; 27 28 #define DEBUG_TYPE "systemz-lower" 29 30 namespace { 31 // Represents a sequence for extracting a 0/1 value from an IPM result: 32 // (((X ^ XORValue) + AddValue) >> Bit) 33 struct IPMConversion { 34 IPMConversion(unsigned xorValue, int64_t addValue, unsigned bit) 35 : XORValue(xorValue), AddValue(addValue), Bit(bit) {} 36 37 int64_t XORValue; 38 int64_t AddValue; 39 unsigned Bit; 40 }; 41 42 // Represents information about a comparison. 43 struct Comparison { 44 Comparison(SDValue Op0In, SDValue Op1In) 45 : Op0(Op0In), Op1(Op1In), Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {} 46 47 // The operands to the comparison. 48 SDValue Op0, Op1; 49 50 // The opcode that should be used to compare Op0 and Op1. 51 unsigned Opcode; 52 53 // A SystemZICMP value. Only used for integer comparisons. 54 unsigned ICmpType; 55 56 // The mask of CC values that Opcode can produce. 57 unsigned CCValid; 58 59 // The mask of CC values for which the original condition is true. 60 unsigned CCMask; 61 }; 62 } // end anonymous namespace 63 64 // Classify VT as either 32 or 64 bit. 65 static bool is32Bit(EVT VT) { 66 switch (VT.getSimpleVT().SimpleTy) { 67 case MVT::i32: 68 return true; 69 case MVT::i64: 70 return false; 71 default: 72 llvm_unreachable("Unsupported type"); 73 } 74 } 75 76 // Return a version of MachineOperand that can be safely used before the 77 // final use. 78 static MachineOperand earlyUseOperand(MachineOperand Op) { 79 if (Op.isReg()) 80 Op.setIsKill(false); 81 return Op; 82 } 83 84 SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm, 85 const SystemZSubtarget &STI) 86 : TargetLowering(tm), Subtarget(STI) { 87 MVT PtrVT = getPointerTy(); 88 89 // Set up the register classes. 90 if (Subtarget.hasHighWord()) 91 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass); 92 else 93 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass); 94 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); 95 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); 96 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); 97 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); 98 99 // Compute derived properties from the register classes 100 computeRegisterProperties(Subtarget.getRegisterInfo()); 101 102 // Set up special registers. 103 setExceptionPointerRegister(SystemZ::R6D); 104 setExceptionSelectorRegister(SystemZ::R7D); 105 setStackPointerRegisterToSaveRestore(SystemZ::R15D); 106 107 // TODO: It may be better to default to latency-oriented scheduling, however 108 // LLVM's current latency-oriented scheduler can't handle physreg definitions 109 // such as SystemZ has with CC, so set this to the register-pressure 110 // scheduler, because it can. 111 setSchedulingPreference(Sched::RegPressure); 112 113 setBooleanContents(ZeroOrOneBooleanContent); 114 setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? 115 116 // Instructions are strings of 2-byte aligned 2-byte values. 117 setMinFunctionAlignment(2); 118 119 // Handle operations that are handled in a similar way for all types. 120 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; 121 I <= MVT::LAST_FP_VALUETYPE; 122 ++I) { 123 MVT VT = MVT::SimpleValueType(I); 124 if (isTypeLegal(VT)) { 125 // Lower SET_CC into an IPM-based sequence. 126 setOperationAction(ISD::SETCC, VT, Custom); 127 128 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE). 129 setOperationAction(ISD::SELECT, VT, Expand); 130 131 // Lower SELECT_CC and BR_CC into separate comparisons and branches. 132 setOperationAction(ISD::SELECT_CC, VT, Custom); 133 setOperationAction(ISD::BR_CC, VT, Custom); 134 } 135 } 136 137 // Expand jump table branches as address arithmetic followed by an 138 // indirect jump. 139 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 140 141 // Expand BRCOND into a BR_CC (see above). 142 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 143 144 // Handle integer types. 145 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; 146 I <= MVT::LAST_INTEGER_VALUETYPE; 147 ++I) { 148 MVT VT = MVT::SimpleValueType(I); 149 if (isTypeLegal(VT)) { 150 // Expand individual DIV and REMs into DIVREMs. 151 setOperationAction(ISD::SDIV, VT, Expand); 152 setOperationAction(ISD::UDIV, VT, Expand); 153 setOperationAction(ISD::SREM, VT, Expand); 154 setOperationAction(ISD::UREM, VT, Expand); 155 setOperationAction(ISD::SDIVREM, VT, Custom); 156 setOperationAction(ISD::UDIVREM, VT, Custom); 157 158 // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and 159 // stores, putting a serialization instruction after the stores. 160 setOperationAction(ISD::ATOMIC_LOAD, VT, Custom); 161 setOperationAction(ISD::ATOMIC_STORE, VT, Custom); 162 163 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are 164 // available, or if the operand is constant. 165 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); 166 167 // Use POPCNT on z196 and above. 168 if (Subtarget.hasPopulationCount()) 169 setOperationAction(ISD::CTPOP, VT, Custom); 170 else 171 setOperationAction(ISD::CTPOP, VT, Expand); 172 173 // No special instructions for these. 174 setOperationAction(ISD::CTTZ, VT, Expand); 175 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); 176 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); 177 setOperationAction(ISD::ROTR, VT, Expand); 178 179 // Use *MUL_LOHI where possible instead of MULH*. 180 setOperationAction(ISD::MULHS, VT, Expand); 181 setOperationAction(ISD::MULHU, VT, Expand); 182 setOperationAction(ISD::SMUL_LOHI, VT, Custom); 183 setOperationAction(ISD::UMUL_LOHI, VT, Custom); 184 185 // Only z196 and above have native support for conversions to unsigned. 186 if (!Subtarget.hasFPExtension()) 187 setOperationAction(ISD::FP_TO_UINT, VT, Expand); 188 } 189 } 190 191 // Type legalization will convert 8- and 16-bit atomic operations into 192 // forms that operate on i32s (but still keeping the original memory VT). 193 // Lower them into full i32 operations. 194 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom); 195 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom); 196 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); 197 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom); 198 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom); 199 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom); 200 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom); 201 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom); 202 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom); 203 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom); 204 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom); 205 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); 206 207 // z10 has instructions for signed but not unsigned FP conversion. 208 // Handle unsigned 32-bit types as signed 64-bit types. 209 if (!Subtarget.hasFPExtension()) { 210 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); 211 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 212 } 213 214 // We have native support for a 64-bit CTLZ, via FLOGR. 215 setOperationAction(ISD::CTLZ, MVT::i32, Promote); 216 setOperationAction(ISD::CTLZ, MVT::i64, Legal); 217 218 // Give LowerOperation the chance to replace 64-bit ORs with subregs. 219 setOperationAction(ISD::OR, MVT::i64, Custom); 220 221 // FIXME: Can we support these natively? 222 setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); 223 setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); 224 setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); 225 226 // We have native instructions for i8, i16 and i32 extensions, but not i1. 227 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 228 for (MVT VT : MVT::integer_valuetypes()) { 229 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 230 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 231 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); 232 } 233 234 // Handle the various types of symbolic address. 235 setOperationAction(ISD::ConstantPool, PtrVT, Custom); 236 setOperationAction(ISD::GlobalAddress, PtrVT, Custom); 237 setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom); 238 setOperationAction(ISD::BlockAddress, PtrVT, Custom); 239 setOperationAction(ISD::JumpTable, PtrVT, Custom); 240 241 // We need to handle dynamic allocations specially because of the 242 // 160-byte area at the bottom of the stack. 243 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); 244 245 // Use custom expanders so that we can force the function to use 246 // a frame pointer. 247 setOperationAction(ISD::STACKSAVE, MVT::Other, Custom); 248 setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom); 249 250 // Handle prefetches with PFD or PFDRL. 251 setOperationAction(ISD::PREFETCH, MVT::Other, Custom); 252 253 // Handle floating-point types. 254 for (unsigned I = MVT::FIRST_FP_VALUETYPE; 255 I <= MVT::LAST_FP_VALUETYPE; 256 ++I) { 257 MVT VT = MVT::SimpleValueType(I); 258 if (isTypeLegal(VT)) { 259 // We can use FI for FRINT. 260 setOperationAction(ISD::FRINT, VT, Legal); 261 262 // We can use the extended form of FI for other rounding operations. 263 if (Subtarget.hasFPExtension()) { 264 setOperationAction(ISD::FNEARBYINT, VT, Legal); 265 setOperationAction(ISD::FFLOOR, VT, Legal); 266 setOperationAction(ISD::FCEIL, VT, Legal); 267 setOperationAction(ISD::FTRUNC, VT, Legal); 268 setOperationAction(ISD::FROUND, VT, Legal); 269 } 270 271 // No special instructions for these. 272 setOperationAction(ISD::FSIN, VT, Expand); 273 setOperationAction(ISD::FCOS, VT, Expand); 274 setOperationAction(ISD::FREM, VT, Expand); 275 } 276 } 277 278 // We have fused multiply-addition for f32 and f64 but not f128. 279 setOperationAction(ISD::FMA, MVT::f32, Legal); 280 setOperationAction(ISD::FMA, MVT::f64, Legal); 281 setOperationAction(ISD::FMA, MVT::f128, Expand); 282 283 // Needed so that we don't try to implement f128 constant loads using 284 // a load-and-extend of a f80 constant (in cases where the constant 285 // would fit in an f80). 286 for (MVT VT : MVT::fp_valuetypes()) 287 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand); 288 289 // Floating-point truncation and stores need to be done separately. 290 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 291 setTruncStoreAction(MVT::f128, MVT::f32, Expand); 292 setTruncStoreAction(MVT::f128, MVT::f64, Expand); 293 294 // We have 64-bit FPR<->GPR moves, but need special handling for 295 // 32-bit forms. 296 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 297 setOperationAction(ISD::BITCAST, MVT::f32, Custom); 298 299 // VASTART and VACOPY need to deal with the SystemZ-specific varargs 300 // structure, but VAEND is a no-op. 301 setOperationAction(ISD::VASTART, MVT::Other, Custom); 302 setOperationAction(ISD::VACOPY, MVT::Other, Custom); 303 setOperationAction(ISD::VAEND, MVT::Other, Expand); 304 305 // Codes for which we want to perform some z-specific combinations. 306 setTargetDAGCombine(ISD::SIGN_EXTEND); 307 308 // Handle intrinsics. 309 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 310 311 // We want to use MVC in preference to even a single load/store pair. 312 MaxStoresPerMemcpy = 0; 313 MaxStoresPerMemcpyOptSize = 0; 314 315 // The main memset sequence is a byte store followed by an MVC. 316 // Two STC or MV..I stores win over that, but the kind of fused stores 317 // generated by target-independent code don't when the byte value is 318 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better 319 // than "STC;MVC". Handle the choice in target-specific code instead. 320 MaxStoresPerMemset = 0; 321 MaxStoresPerMemsetOptSize = 0; 322 } 323 324 EVT SystemZTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { 325 if (!VT.isVector()) 326 return MVT::i32; 327 return VT.changeVectorElementTypeToInteger(); 328 } 329 330 bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { 331 VT = VT.getScalarType(); 332 333 if (!VT.isSimple()) 334 return false; 335 336 switch (VT.getSimpleVT().SimpleTy) { 337 case MVT::f32: 338 case MVT::f64: 339 return true; 340 case MVT::f128: 341 return false; 342 default: 343 break; 344 } 345 346 return false; 347 } 348 349 bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 350 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR. 351 return Imm.isZero() || Imm.isNegZero(); 352 } 353 354 bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 355 // We can use CGFI or CLGFI. 356 return isInt<32>(Imm) || isUInt<32>(Imm); 357 } 358 359 bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const { 360 // We can use ALGFI or SLGFI. 361 return isUInt<32>(Imm) || isUInt<32>(-Imm); 362 } 363 364 bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, 365 unsigned, 366 unsigned, 367 bool *Fast) const { 368 // Unaligned accesses should never be slower than the expanded version. 369 // We check specifically for aligned accesses in the few cases where 370 // they are required. 371 if (Fast) 372 *Fast = true; 373 return true; 374 } 375 376 bool SystemZTargetLowering::isLegalAddressingMode(const AddrMode &AM, 377 Type *Ty) const { 378 // Punt on globals for now, although they can be used in limited 379 // RELATIVE LONG cases. 380 if (AM.BaseGV) 381 return false; 382 383 // Require a 20-bit signed offset. 384 if (!isInt<20>(AM.BaseOffs)) 385 return false; 386 387 // Indexing is OK but no scale factor can be applied. 388 return AM.Scale == 0 || AM.Scale == 1; 389 } 390 391 bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const { 392 if (!FromType->isIntegerTy() || !ToType->isIntegerTy()) 393 return false; 394 unsigned FromBits = FromType->getPrimitiveSizeInBits(); 395 unsigned ToBits = ToType->getPrimitiveSizeInBits(); 396 return FromBits > ToBits; 397 } 398 399 bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const { 400 if (!FromVT.isInteger() || !ToVT.isInteger()) 401 return false; 402 unsigned FromBits = FromVT.getSizeInBits(); 403 unsigned ToBits = ToVT.getSizeInBits(); 404 return FromBits > ToBits; 405 } 406 407 //===----------------------------------------------------------------------===// 408 // Inline asm support 409 //===----------------------------------------------------------------------===// 410 411 TargetLowering::ConstraintType 412 SystemZTargetLowering::getConstraintType(const std::string &Constraint) const { 413 if (Constraint.size() == 1) { 414 switch (Constraint[0]) { 415 case 'a': // Address register 416 case 'd': // Data register (equivalent to 'r') 417 case 'f': // Floating-point register 418 case 'h': // High-part register 419 case 'r': // General-purpose register 420 return C_RegisterClass; 421 422 case 'Q': // Memory with base and unsigned 12-bit displacement 423 case 'R': // Likewise, plus an index 424 case 'S': // Memory with base and signed 20-bit displacement 425 case 'T': // Likewise, plus an index 426 case 'm': // Equivalent to 'T'. 427 return C_Memory; 428 429 case 'I': // Unsigned 8-bit constant 430 case 'J': // Unsigned 12-bit constant 431 case 'K': // Signed 16-bit constant 432 case 'L': // Signed 20-bit displacement (on all targets we support) 433 case 'M': // 0x7fffffff 434 return C_Other; 435 436 default: 437 break; 438 } 439 } 440 return TargetLowering::getConstraintType(Constraint); 441 } 442 443 TargetLowering::ConstraintWeight SystemZTargetLowering:: 444 getSingleConstraintMatchWeight(AsmOperandInfo &info, 445 const char *constraint) const { 446 ConstraintWeight weight = CW_Invalid; 447 Value *CallOperandVal = info.CallOperandVal; 448 // If we don't have a value, we can't do a match, 449 // but allow it at the lowest weight. 450 if (!CallOperandVal) 451 return CW_Default; 452 Type *type = CallOperandVal->getType(); 453 // Look at the constraint type. 454 switch (*constraint) { 455 default: 456 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 457 break; 458 459 case 'a': // Address register 460 case 'd': // Data register (equivalent to 'r') 461 case 'h': // High-part register 462 case 'r': // General-purpose register 463 if (CallOperandVal->getType()->isIntegerTy()) 464 weight = CW_Register; 465 break; 466 467 case 'f': // Floating-point register 468 if (type->isFloatingPointTy()) 469 weight = CW_Register; 470 break; 471 472 case 'I': // Unsigned 8-bit constant 473 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 474 if (isUInt<8>(C->getZExtValue())) 475 weight = CW_Constant; 476 break; 477 478 case 'J': // Unsigned 12-bit constant 479 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 480 if (isUInt<12>(C->getZExtValue())) 481 weight = CW_Constant; 482 break; 483 484 case 'K': // Signed 16-bit constant 485 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 486 if (isInt<16>(C->getSExtValue())) 487 weight = CW_Constant; 488 break; 489 490 case 'L': // Signed 20-bit displacement (on all targets we support) 491 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 492 if (isInt<20>(C->getSExtValue())) 493 weight = CW_Constant; 494 break; 495 496 case 'M': // 0x7fffffff 497 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 498 if (C->getZExtValue() == 0x7fffffff) 499 weight = CW_Constant; 500 break; 501 } 502 return weight; 503 } 504 505 // Parse a "{tNNN}" register constraint for which the register type "t" 506 // has already been verified. MC is the class associated with "t" and 507 // Map maps 0-based register numbers to LLVM register numbers. 508 static std::pair<unsigned, const TargetRegisterClass *> 509 parseRegisterNumber(const std::string &Constraint, 510 const TargetRegisterClass *RC, const unsigned *Map) { 511 assert(*(Constraint.end()-1) == '}' && "Missing '}'"); 512 if (isdigit(Constraint[2])) { 513 std::string Suffix(Constraint.data() + 2, Constraint.size() - 2); 514 unsigned Index = atoi(Suffix.c_str()); 515 if (Index < 16 && Map[Index]) 516 return std::make_pair(Map[Index], RC); 517 } 518 return std::make_pair(0U, nullptr); 519 } 520 521 std::pair<unsigned, const TargetRegisterClass *> 522 SystemZTargetLowering::getRegForInlineAsmConstraint( 523 const TargetRegisterInfo *TRI, const std::string &Constraint, 524 MVT VT) const { 525 if (Constraint.size() == 1) { 526 // GCC Constraint Letters 527 switch (Constraint[0]) { 528 default: break; 529 case 'd': // Data register (equivalent to 'r') 530 case 'r': // General-purpose register 531 if (VT == MVT::i64) 532 return std::make_pair(0U, &SystemZ::GR64BitRegClass); 533 else if (VT == MVT::i128) 534 return std::make_pair(0U, &SystemZ::GR128BitRegClass); 535 return std::make_pair(0U, &SystemZ::GR32BitRegClass); 536 537 case 'a': // Address register 538 if (VT == MVT::i64) 539 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass); 540 else if (VT == MVT::i128) 541 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass); 542 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass); 543 544 case 'h': // High-part register (an LLVM extension) 545 return std::make_pair(0U, &SystemZ::GRH32BitRegClass); 546 547 case 'f': // Floating-point register 548 if (VT == MVT::f64) 549 return std::make_pair(0U, &SystemZ::FP64BitRegClass); 550 else if (VT == MVT::f128) 551 return std::make_pair(0U, &SystemZ::FP128BitRegClass); 552 return std::make_pair(0U, &SystemZ::FP32BitRegClass); 553 } 554 } 555 if (Constraint[0] == '{') { 556 // We need to override the default register parsing for GPRs and FPRs 557 // because the interpretation depends on VT. The internal names of 558 // the registers are also different from the external names 559 // (F0D and F0S instead of F0, etc.). 560 if (Constraint[1] == 'r') { 561 if (VT == MVT::i32) 562 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass, 563 SystemZMC::GR32Regs); 564 if (VT == MVT::i128) 565 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass, 566 SystemZMC::GR128Regs); 567 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass, 568 SystemZMC::GR64Regs); 569 } 570 if (Constraint[1] == 'f') { 571 if (VT == MVT::f32) 572 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass, 573 SystemZMC::FP32Regs); 574 if (VT == MVT::f128) 575 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass, 576 SystemZMC::FP128Regs); 577 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass, 578 SystemZMC::FP64Regs); 579 } 580 } 581 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 582 } 583 584 void SystemZTargetLowering:: 585 LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, 586 std::vector<SDValue> &Ops, 587 SelectionDAG &DAG) const { 588 // Only support length 1 constraints for now. 589 if (Constraint.length() == 1) { 590 switch (Constraint[0]) { 591 case 'I': // Unsigned 8-bit constant 592 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 593 if (isUInt<8>(C->getZExtValue())) 594 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), 595 Op.getValueType())); 596 return; 597 598 case 'J': // Unsigned 12-bit constant 599 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 600 if (isUInt<12>(C->getZExtValue())) 601 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), 602 Op.getValueType())); 603 return; 604 605 case 'K': // Signed 16-bit constant 606 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 607 if (isInt<16>(C->getSExtValue())) 608 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), 609 Op.getValueType())); 610 return; 611 612 case 'L': // Signed 20-bit displacement (on all targets we support) 613 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 614 if (isInt<20>(C->getSExtValue())) 615 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), 616 Op.getValueType())); 617 return; 618 619 case 'M': // 0x7fffffff 620 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 621 if (C->getZExtValue() == 0x7fffffff) 622 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), 623 Op.getValueType())); 624 return; 625 } 626 } 627 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 628 } 629 630 //===----------------------------------------------------------------------===// 631 // Calling conventions 632 //===----------------------------------------------------------------------===// 633 634 #include "SystemZGenCallingConv.inc" 635 636 bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType, 637 Type *ToType) const { 638 return isTruncateFree(FromType, ToType); 639 } 640 641 bool SystemZTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { 642 if (!CI->isTailCall()) 643 return false; 644 return true; 645 } 646 647 // Value is a value that has been passed to us in the location described by VA 648 // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining 649 // any loads onto Chain. 650 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDLoc DL, 651 CCValAssign &VA, SDValue Chain, 652 SDValue Value) { 653 // If the argument has been promoted from a smaller type, insert an 654 // assertion to capture this. 655 if (VA.getLocInfo() == CCValAssign::SExt) 656 Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value, 657 DAG.getValueType(VA.getValVT())); 658 else if (VA.getLocInfo() == CCValAssign::ZExt) 659 Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value, 660 DAG.getValueType(VA.getValVT())); 661 662 if (VA.isExtInLoc()) 663 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value); 664 else if (VA.getLocInfo() == CCValAssign::Indirect) 665 Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value, 666 MachinePointerInfo(), false, false, false, 0); 667 else 668 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo"); 669 return Value; 670 } 671 672 // Value is a value of type VA.getValVT() that we need to copy into 673 // the location described by VA. Return a copy of Value converted to 674 // VA.getValVT(). The caller is responsible for handling indirect values. 675 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDLoc DL, 676 CCValAssign &VA, SDValue Value) { 677 switch (VA.getLocInfo()) { 678 case CCValAssign::SExt: 679 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value); 680 case CCValAssign::ZExt: 681 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value); 682 case CCValAssign::AExt: 683 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value); 684 case CCValAssign::Full: 685 return Value; 686 default: 687 llvm_unreachable("Unhandled getLocInfo()"); 688 } 689 } 690 691 SDValue SystemZTargetLowering:: 692 LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 693 const SmallVectorImpl<ISD::InputArg> &Ins, 694 SDLoc DL, SelectionDAG &DAG, 695 SmallVectorImpl<SDValue> &InVals) const { 696 MachineFunction &MF = DAG.getMachineFunction(); 697 MachineFrameInfo *MFI = MF.getFrameInfo(); 698 MachineRegisterInfo &MRI = MF.getRegInfo(); 699 SystemZMachineFunctionInfo *FuncInfo = 700 MF.getInfo<SystemZMachineFunctionInfo>(); 701 auto *TFL = 702 static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering()); 703 704 // Assign locations to all of the incoming arguments. 705 SmallVector<CCValAssign, 16> ArgLocs; 706 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 707 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ); 708 709 unsigned NumFixedGPRs = 0; 710 unsigned NumFixedFPRs = 0; 711 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 712 SDValue ArgValue; 713 CCValAssign &VA = ArgLocs[I]; 714 EVT LocVT = VA.getLocVT(); 715 if (VA.isRegLoc()) { 716 // Arguments passed in registers 717 const TargetRegisterClass *RC; 718 switch (LocVT.getSimpleVT().SimpleTy) { 719 default: 720 // Integers smaller than i64 should be promoted to i64. 721 llvm_unreachable("Unexpected argument type"); 722 case MVT::i32: 723 NumFixedGPRs += 1; 724 RC = &SystemZ::GR32BitRegClass; 725 break; 726 case MVT::i64: 727 NumFixedGPRs += 1; 728 RC = &SystemZ::GR64BitRegClass; 729 break; 730 case MVT::f32: 731 NumFixedFPRs += 1; 732 RC = &SystemZ::FP32BitRegClass; 733 break; 734 case MVT::f64: 735 NumFixedFPRs += 1; 736 RC = &SystemZ::FP64BitRegClass; 737 break; 738 } 739 740 unsigned VReg = MRI.createVirtualRegister(RC); 741 MRI.addLiveIn(VA.getLocReg(), VReg); 742 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 743 } else { 744 assert(VA.isMemLoc() && "Argument not register or memory"); 745 746 // Create the frame index object for this incoming parameter. 747 int FI = MFI->CreateFixedObject(LocVT.getSizeInBits() / 8, 748 VA.getLocMemOffset(), true); 749 750 // Create the SelectionDAG nodes corresponding to a load 751 // from this parameter. Unpromoted ints and floats are 752 // passed as right-justified 8-byte values. 753 EVT PtrVT = getPointerTy(); 754 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 755 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) 756 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4)); 757 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN, 758 MachinePointerInfo::getFixedStack(FI), 759 false, false, false, 0); 760 } 761 762 // Convert the value of the argument register into the value that's 763 // being passed. 764 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue)); 765 } 766 767 if (IsVarArg) { 768 // Save the number of non-varargs registers for later use by va_start, etc. 769 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs); 770 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs); 771 772 // Likewise the address (in the form of a frame index) of where the 773 // first stack vararg would be. The 1-byte size here is arbitrary. 774 int64_t StackSize = CCInfo.getNextStackOffset(); 775 FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize, true)); 776 777 // ...and a similar frame index for the caller-allocated save area 778 // that will be used to store the incoming registers. 779 int64_t RegSaveOffset = TFL->getOffsetOfLocalArea(); 780 unsigned RegSaveIndex = MFI->CreateFixedObject(1, RegSaveOffset, true); 781 FuncInfo->setRegSaveFrameIndex(RegSaveIndex); 782 783 // Store the FPR varargs in the reserved frame slots. (We store the 784 // GPRs as part of the prologue.) 785 if (NumFixedFPRs < SystemZ::NumArgFPRs) { 786 SDValue MemOps[SystemZ::NumArgFPRs]; 787 for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) { 788 unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]); 789 int FI = MFI->CreateFixedObject(8, RegSaveOffset + Offset, true); 790 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 791 unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I], 792 &SystemZ::FP64BitRegClass); 793 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64); 794 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN, 795 MachinePointerInfo::getFixedStack(FI), 796 false, false, 0); 797 798 } 799 // Join the stores, which are independent of one another. 800 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 801 makeArrayRef(&MemOps[NumFixedFPRs], 802 SystemZ::NumArgFPRs-NumFixedFPRs)); 803 } 804 } 805 806 return Chain; 807 } 808 809 static bool canUseSiblingCall(const CCState &ArgCCInfo, 810 SmallVectorImpl<CCValAssign> &ArgLocs) { 811 // Punt if there are any indirect or stack arguments, or if the call 812 // needs the call-saved argument register R6. 813 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 814 CCValAssign &VA = ArgLocs[I]; 815 if (VA.getLocInfo() == CCValAssign::Indirect) 816 return false; 817 if (!VA.isRegLoc()) 818 return false; 819 unsigned Reg = VA.getLocReg(); 820 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D) 821 return false; 822 } 823 return true; 824 } 825 826 SDValue 827 SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, 828 SmallVectorImpl<SDValue> &InVals) const { 829 SelectionDAG &DAG = CLI.DAG; 830 SDLoc &DL = CLI.DL; 831 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 832 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 833 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 834 SDValue Chain = CLI.Chain; 835 SDValue Callee = CLI.Callee; 836 bool &IsTailCall = CLI.IsTailCall; 837 CallingConv::ID CallConv = CLI.CallConv; 838 bool IsVarArg = CLI.IsVarArg; 839 MachineFunction &MF = DAG.getMachineFunction(); 840 EVT PtrVT = getPointerTy(); 841 842 // Analyze the operands of the call, assigning locations to each operand. 843 SmallVector<CCValAssign, 16> ArgLocs; 844 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 845 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); 846 847 // We don't support GuaranteedTailCallOpt, only automatically-detected 848 // sibling calls. 849 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs)) 850 IsTailCall = false; 851 852 // Get a count of how many bytes are to be pushed on the stack. 853 unsigned NumBytes = ArgCCInfo.getNextStackOffset(); 854 855 // Mark the start of the call. 856 if (!IsTailCall) 857 Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, PtrVT, true), 858 DL); 859 860 // Copy argument values to their designated locations. 861 SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass; 862 SmallVector<SDValue, 8> MemOpChains; 863 SDValue StackPtr; 864 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 865 CCValAssign &VA = ArgLocs[I]; 866 SDValue ArgValue = OutVals[I]; 867 868 if (VA.getLocInfo() == CCValAssign::Indirect) { 869 // Store the argument in a stack slot and pass its address. 870 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); 871 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 872 MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, SpillSlot, 873 MachinePointerInfo::getFixedStack(FI), 874 false, false, 0)); 875 ArgValue = SpillSlot; 876 } else 877 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue); 878 879 if (VA.isRegLoc()) 880 // Queue up the argument copies and emit them at the end. 881 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 882 else { 883 assert(VA.isMemLoc() && "Argument not register or memory"); 884 885 // Work out the address of the stack slot. Unpromoted ints and 886 // floats are passed as right-justified 8-byte values. 887 if (!StackPtr.getNode()) 888 StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT); 889 unsigned Offset = SystemZMC::CallFrameSize + VA.getLocMemOffset(); 890 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) 891 Offset += 4; 892 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 893 DAG.getIntPtrConstant(Offset)); 894 895 // Emit the store. 896 MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, Address, 897 MachinePointerInfo(), 898 false, false, 0)); 899 } 900 } 901 902 // Join the stores, which are independent of one another. 903 if (!MemOpChains.empty()) 904 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 905 906 // Accept direct calls by converting symbolic call addresses to the 907 // associated Target* opcodes. Force %r1 to be used for indirect 908 // tail calls. 909 SDValue Glue; 910 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 911 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT); 912 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); 913 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) { 914 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT); 915 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); 916 } else if (IsTailCall) { 917 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue); 918 Glue = Chain.getValue(1); 919 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType()); 920 } 921 922 // Build a sequence of copy-to-reg nodes, chained and glued together. 923 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) { 924 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first, 925 RegsToPass[I].second, Glue); 926 Glue = Chain.getValue(1); 927 } 928 929 // The first call operand is the chain and the second is the target address. 930 SmallVector<SDValue, 8> Ops; 931 Ops.push_back(Chain); 932 Ops.push_back(Callee); 933 934 // Add argument registers to the end of the list so that they are 935 // known live into the call. 936 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) 937 Ops.push_back(DAG.getRegister(RegsToPass[I].first, 938 RegsToPass[I].second.getValueType())); 939 940 // Add a register mask operand representing the call-preserved registers. 941 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 942 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 943 assert(Mask && "Missing call preserved mask for calling convention"); 944 Ops.push_back(DAG.getRegisterMask(Mask)); 945 946 // Glue the call to the argument copies, if any. 947 if (Glue.getNode()) 948 Ops.push_back(Glue); 949 950 // Emit the call. 951 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 952 if (IsTailCall) 953 return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops); 954 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops); 955 Glue = Chain.getValue(1); 956 957 // Mark the end of the call, which is glued to the call itself. 958 Chain = DAG.getCALLSEQ_END(Chain, 959 DAG.getConstant(NumBytes, PtrVT, true), 960 DAG.getConstant(0, PtrVT, true), 961 Glue, DL); 962 Glue = Chain.getValue(1); 963 964 // Assign locations to each value returned by this call. 965 SmallVector<CCValAssign, 16> RetLocs; 966 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); 967 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ); 968 969 // Copy all of the result registers out of their specified physreg. 970 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) { 971 CCValAssign &VA = RetLocs[I]; 972 973 // Copy the value out, gluing the copy to the end of the call sequence. 974 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), 975 VA.getLocVT(), Glue); 976 Chain = RetValue.getValue(1); 977 Glue = RetValue.getValue(2); 978 979 // Convert the value of the return register into the value that's 980 // being returned. 981 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue)); 982 } 983 984 return Chain; 985 } 986 987 SDValue 988 SystemZTargetLowering::LowerReturn(SDValue Chain, 989 CallingConv::ID CallConv, bool IsVarArg, 990 const SmallVectorImpl<ISD::OutputArg> &Outs, 991 const SmallVectorImpl<SDValue> &OutVals, 992 SDLoc DL, SelectionDAG &DAG) const { 993 MachineFunction &MF = DAG.getMachineFunction(); 994 995 // Assign locations to each returned value. 996 SmallVector<CCValAssign, 16> RetLocs; 997 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); 998 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ); 999 1000 // Quick exit for void returns 1001 if (RetLocs.empty()) 1002 return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain); 1003 1004 // Copy the result values into the output registers. 1005 SDValue Glue; 1006 SmallVector<SDValue, 4> RetOps; 1007 RetOps.push_back(Chain); 1008 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) { 1009 CCValAssign &VA = RetLocs[I]; 1010 SDValue RetValue = OutVals[I]; 1011 1012 // Make the return register live on exit. 1013 assert(VA.isRegLoc() && "Can only return in registers!"); 1014 1015 // Promote the value as required. 1016 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue); 1017 1018 // Chain and glue the copies together. 1019 unsigned Reg = VA.getLocReg(); 1020 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue); 1021 Glue = Chain.getValue(1); 1022 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT())); 1023 } 1024 1025 // Update chain and glue. 1026 RetOps[0] = Chain; 1027 if (Glue.getNode()) 1028 RetOps.push_back(Glue); 1029 1030 return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps); 1031 } 1032 1033 SDValue SystemZTargetLowering:: 1034 prepareVolatileOrAtomicLoad(SDValue Chain, SDLoc DL, SelectionDAG &DAG) const { 1035 return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain); 1036 } 1037 1038 // Return true if Op is an intrinsic node with chain that returns the CC value 1039 // as its only (other) argument. Provide the associated SystemZISD opcode and 1040 // the mask of valid CC values if so. 1041 static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, 1042 unsigned &CCValid) { 1043 unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 1044 switch (Id) { 1045 case Intrinsic::s390_tbegin: 1046 Opcode = SystemZISD::TBEGIN; 1047 CCValid = SystemZ::CCMASK_TBEGIN; 1048 return true; 1049 1050 case Intrinsic::s390_tbegin_nofloat: 1051 Opcode = SystemZISD::TBEGIN_NOFLOAT; 1052 CCValid = SystemZ::CCMASK_TBEGIN; 1053 return true; 1054 1055 case Intrinsic::s390_tend: 1056 Opcode = SystemZISD::TEND; 1057 CCValid = SystemZ::CCMASK_TEND; 1058 return true; 1059 1060 default: 1061 return false; 1062 } 1063 } 1064 1065 // Emit an intrinsic with chain with a glued value instead of its CC result. 1066 static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op, 1067 unsigned Opcode) { 1068 // Copy all operands except the intrinsic ID. 1069 unsigned NumOps = Op.getNumOperands(); 1070 SmallVector<SDValue, 6> Ops; 1071 Ops.reserve(NumOps - 1); 1072 Ops.push_back(Op.getOperand(0)); 1073 for (unsigned I = 2; I < NumOps; ++I) 1074 Ops.push_back(Op.getOperand(I)); 1075 1076 assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); 1077 SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue); 1078 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); 1079 SDValue OldChain = SDValue(Op.getNode(), 1); 1080 SDValue NewChain = SDValue(Intr.getNode(), 0); 1081 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain); 1082 return Intr; 1083 } 1084 1085 // CC is a comparison that will be implemented using an integer or 1086 // floating-point comparison. Return the condition code mask for 1087 // a branch on true. In the integer case, CCMASK_CMP_UO is set for 1088 // unsigned comparisons and clear for signed ones. In the floating-point 1089 // case, CCMASK_CMP_UO has its normal mask meaning (unordered). 1090 static unsigned CCMaskForCondCode(ISD::CondCode CC) { 1091 #define CONV(X) \ 1092 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \ 1093 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \ 1094 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X 1095 1096 switch (CC) { 1097 default: 1098 llvm_unreachable("Invalid integer condition!"); 1099 1100 CONV(EQ); 1101 CONV(NE); 1102 CONV(GT); 1103 CONV(GE); 1104 CONV(LT); 1105 CONV(LE); 1106 1107 case ISD::SETO: return SystemZ::CCMASK_CMP_O; 1108 case ISD::SETUO: return SystemZ::CCMASK_CMP_UO; 1109 } 1110 #undef CONV 1111 } 1112 1113 // Return a sequence for getting a 1 from an IPM result when CC has a 1114 // value in CCMask and a 0 when CC has a value in CCValid & ~CCMask. 1115 // The handling of CC values outside CCValid doesn't matter. 1116 static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) { 1117 // Deal with cases where the result can be taken directly from a bit 1118 // of the IPM result. 1119 if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_3))) 1120 return IPMConversion(0, 0, SystemZ::IPM_CC); 1121 if (CCMask == (CCValid & (SystemZ::CCMASK_2 | SystemZ::CCMASK_3))) 1122 return IPMConversion(0, 0, SystemZ::IPM_CC + 1); 1123 1124 // Deal with cases where we can add a value to force the sign bit 1125 // to contain the right value. Putting the bit in 31 means we can 1126 // use SRL rather than RISBG(L), and also makes it easier to get a 1127 // 0/-1 value, so it has priority over the other tests below. 1128 // 1129 // These sequences rely on the fact that the upper two bits of the 1130 // IPM result are zero. 1131 uint64_t TopBit = uint64_t(1) << 31; 1132 if (CCMask == (CCValid & SystemZ::CCMASK_0)) 1133 return IPMConversion(0, -(1 << SystemZ::IPM_CC), 31); 1134 if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_1))) 1135 return IPMConversion(0, -(2 << SystemZ::IPM_CC), 31); 1136 if (CCMask == (CCValid & (SystemZ::CCMASK_0 1137 | SystemZ::CCMASK_1 1138 | SystemZ::CCMASK_2))) 1139 return IPMConversion(0, -(3 << SystemZ::IPM_CC), 31); 1140 if (CCMask == (CCValid & SystemZ::CCMASK_3)) 1141 return IPMConversion(0, TopBit - (3 << SystemZ::IPM_CC), 31); 1142 if (CCMask == (CCValid & (SystemZ::CCMASK_1 1143 | SystemZ::CCMASK_2 1144 | SystemZ::CCMASK_3))) 1145 return IPMConversion(0, TopBit - (1 << SystemZ::IPM_CC), 31); 1146 1147 // Next try inverting the value and testing a bit. 0/1 could be 1148 // handled this way too, but we dealt with that case above. 1149 if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_2))) 1150 return IPMConversion(-1, 0, SystemZ::IPM_CC); 1151 1152 // Handle cases where adding a value forces a non-sign bit to contain 1153 // the right value. 1154 if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_2))) 1155 return IPMConversion(0, 1 << SystemZ::IPM_CC, SystemZ::IPM_CC + 1); 1156 if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_3))) 1157 return IPMConversion(0, -(1 << SystemZ::IPM_CC), SystemZ::IPM_CC + 1); 1158 1159 // The remaining cases are 1, 2, 0/1/3 and 0/2/3. All these are 1160 // can be done by inverting the low CC bit and applying one of the 1161 // sign-based extractions above. 1162 if (CCMask == (CCValid & SystemZ::CCMASK_1)) 1163 return IPMConversion(1 << SystemZ::IPM_CC, -(1 << SystemZ::IPM_CC), 31); 1164 if (CCMask == (CCValid & SystemZ::CCMASK_2)) 1165 return IPMConversion(1 << SystemZ::IPM_CC, 1166 TopBit - (3 << SystemZ::IPM_CC), 31); 1167 if (CCMask == (CCValid & (SystemZ::CCMASK_0 1168 | SystemZ::CCMASK_1 1169 | SystemZ::CCMASK_3))) 1170 return IPMConversion(1 << SystemZ::IPM_CC, -(3 << SystemZ::IPM_CC), 31); 1171 if (CCMask == (CCValid & (SystemZ::CCMASK_0 1172 | SystemZ::CCMASK_2 1173 | SystemZ::CCMASK_3))) 1174 return IPMConversion(1 << SystemZ::IPM_CC, 1175 TopBit - (1 << SystemZ::IPM_CC), 31); 1176 1177 llvm_unreachable("Unexpected CC combination"); 1178 } 1179 1180 // If C can be converted to a comparison against zero, adjust the operands 1181 // as necessary. 1182 static void adjustZeroCmp(SelectionDAG &DAG, Comparison &C) { 1183 if (C.ICmpType == SystemZICMP::UnsignedOnly) 1184 return; 1185 1186 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode()); 1187 if (!ConstOp1) 1188 return; 1189 1190 int64_t Value = ConstOp1->getSExtValue(); 1191 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) || 1192 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) || 1193 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) || 1194 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) { 1195 C.CCMask ^= SystemZ::CCMASK_CMP_EQ; 1196 C.Op1 = DAG.getConstant(0, C.Op1.getValueType()); 1197 } 1198 } 1199 1200 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI, 1201 // adjust the operands as necessary. 1202 static void adjustSubwordCmp(SelectionDAG &DAG, Comparison &C) { 1203 // For us to make any changes, it must a comparison between a single-use 1204 // load and a constant. 1205 if (!C.Op0.hasOneUse() || 1206 C.Op0.getOpcode() != ISD::LOAD || 1207 C.Op1.getOpcode() != ISD::Constant) 1208 return; 1209 1210 // We must have an 8- or 16-bit load. 1211 auto *Load = cast<LoadSDNode>(C.Op0); 1212 unsigned NumBits = Load->getMemoryVT().getStoreSizeInBits(); 1213 if (NumBits != 8 && NumBits != 16) 1214 return; 1215 1216 // The load must be an extending one and the constant must be within the 1217 // range of the unextended value. 1218 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1); 1219 uint64_t Value = ConstOp1->getZExtValue(); 1220 uint64_t Mask = (1 << NumBits) - 1; 1221 if (Load->getExtensionType() == ISD::SEXTLOAD) { 1222 // Make sure that ConstOp1 is in range of C.Op0. 1223 int64_t SignedValue = ConstOp1->getSExtValue(); 1224 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask) 1225 return; 1226 if (C.ICmpType != SystemZICMP::SignedOnly) { 1227 // Unsigned comparison between two sign-extended values is equivalent 1228 // to unsigned comparison between two zero-extended values. 1229 Value &= Mask; 1230 } else if (NumBits == 8) { 1231 // Try to treat the comparison as unsigned, so that we can use CLI. 1232 // Adjust CCMask and Value as necessary. 1233 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT) 1234 // Test whether the high bit of the byte is set. 1235 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT; 1236 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE) 1237 // Test whether the high bit of the byte is clear. 1238 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT; 1239 else 1240 // No instruction exists for this combination. 1241 return; 1242 C.ICmpType = SystemZICMP::UnsignedOnly; 1243 } 1244 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) { 1245 if (Value > Mask) 1246 return; 1247 assert(C.ICmpType == SystemZICMP::Any && 1248 "Signedness shouldn't matter here."); 1249 } else 1250 return; 1251 1252 // Make sure that the first operand is an i32 of the right extension type. 1253 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ? 1254 ISD::SEXTLOAD : 1255 ISD::ZEXTLOAD); 1256 if (C.Op0.getValueType() != MVT::i32 || 1257 Load->getExtensionType() != ExtType) 1258 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, 1259 Load->getChain(), Load->getBasePtr(), 1260 Load->getPointerInfo(), Load->getMemoryVT(), 1261 Load->isVolatile(), Load->isNonTemporal(), 1262 Load->isInvariant(), Load->getAlignment()); 1263 1264 // Make sure that the second operand is an i32 with the right value. 1265 if (C.Op1.getValueType() != MVT::i32 || 1266 Value != ConstOp1->getZExtValue()) 1267 C.Op1 = DAG.getConstant(Value, MVT::i32); 1268 } 1269 1270 // Return true if Op is either an unextended load, or a load suitable 1271 // for integer register-memory comparisons of type ICmpType. 1272 static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) { 1273 auto *Load = dyn_cast<LoadSDNode>(Op.getNode()); 1274 if (Load) { 1275 // There are no instructions to compare a register with a memory byte. 1276 if (Load->getMemoryVT() == MVT::i8) 1277 return false; 1278 // Otherwise decide on extension type. 1279 switch (Load->getExtensionType()) { 1280 case ISD::NON_EXTLOAD: 1281 return true; 1282 case ISD::SEXTLOAD: 1283 return ICmpType != SystemZICMP::UnsignedOnly; 1284 case ISD::ZEXTLOAD: 1285 return ICmpType != SystemZICMP::SignedOnly; 1286 default: 1287 break; 1288 } 1289 } 1290 return false; 1291 } 1292 1293 // Return true if it is better to swap the operands of C. 1294 static bool shouldSwapCmpOperands(const Comparison &C) { 1295 // Leave f128 comparisons alone, since they have no memory forms. 1296 if (C.Op0.getValueType() == MVT::f128) 1297 return false; 1298 1299 // Always keep a floating-point constant second, since comparisons with 1300 // zero can use LOAD TEST and comparisons with other constants make a 1301 // natural memory operand. 1302 if (isa<ConstantFPSDNode>(C.Op1)) 1303 return false; 1304 1305 // Never swap comparisons with zero since there are many ways to optimize 1306 // those later. 1307 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1); 1308 if (ConstOp1 && ConstOp1->getZExtValue() == 0) 1309 return false; 1310 1311 // Also keep natural memory operands second if the loaded value is 1312 // only used here. Several comparisons have memory forms. 1313 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse()) 1314 return false; 1315 1316 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't. 1317 // In that case we generally prefer the memory to be second. 1318 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) { 1319 // The only exceptions are when the second operand is a constant and 1320 // we can use things like CHHSI. 1321 if (!ConstOp1) 1322 return true; 1323 // The unsigned memory-immediate instructions can handle 16-bit 1324 // unsigned integers. 1325 if (C.ICmpType != SystemZICMP::SignedOnly && 1326 isUInt<16>(ConstOp1->getZExtValue())) 1327 return false; 1328 // The signed memory-immediate instructions can handle 16-bit 1329 // signed integers. 1330 if (C.ICmpType != SystemZICMP::UnsignedOnly && 1331 isInt<16>(ConstOp1->getSExtValue())) 1332 return false; 1333 return true; 1334 } 1335 1336 // Try to promote the use of CGFR and CLGFR. 1337 unsigned Opcode0 = C.Op0.getOpcode(); 1338 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND) 1339 return true; 1340 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND) 1341 return true; 1342 if (C.ICmpType != SystemZICMP::SignedOnly && 1343 Opcode0 == ISD::AND && 1344 C.Op0.getOperand(1).getOpcode() == ISD::Constant && 1345 cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff) 1346 return true; 1347 1348 return false; 1349 } 1350 1351 // Return a version of comparison CC mask CCMask in which the LT and GT 1352 // actions are swapped. 1353 static unsigned reverseCCMask(unsigned CCMask) { 1354 return ((CCMask & SystemZ::CCMASK_CMP_EQ) | 1355 (CCMask & SystemZ::CCMASK_CMP_GT ? SystemZ::CCMASK_CMP_LT : 0) | 1356 (CCMask & SystemZ::CCMASK_CMP_LT ? SystemZ::CCMASK_CMP_GT : 0) | 1357 (CCMask & SystemZ::CCMASK_CMP_UO)); 1358 } 1359 1360 // Check whether C tests for equality between X and Y and whether X - Y 1361 // or Y - X is also computed. In that case it's better to compare the 1362 // result of the subtraction against zero. 1363 static void adjustForSubtraction(SelectionDAG &DAG, Comparison &C) { 1364 if (C.CCMask == SystemZ::CCMASK_CMP_EQ || 1365 C.CCMask == SystemZ::CCMASK_CMP_NE) { 1366 for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) { 1367 SDNode *N = *I; 1368 if (N->getOpcode() == ISD::SUB && 1369 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) || 1370 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) { 1371 C.Op0 = SDValue(N, 0); 1372 C.Op1 = DAG.getConstant(0, N->getValueType(0)); 1373 return; 1374 } 1375 } 1376 } 1377 } 1378 1379 // Check whether C compares a floating-point value with zero and if that 1380 // floating-point value is also negated. In this case we can use the 1381 // negation to set CC, so avoiding separate LOAD AND TEST and 1382 // LOAD (NEGATIVE/COMPLEMENT) instructions. 1383 static void adjustForFNeg(Comparison &C) { 1384 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1); 1385 if (C1 && C1->isZero()) { 1386 for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) { 1387 SDNode *N = *I; 1388 if (N->getOpcode() == ISD::FNEG) { 1389 C.Op0 = SDValue(N, 0); 1390 C.CCMask = reverseCCMask(C.CCMask); 1391 return; 1392 } 1393 } 1394 } 1395 } 1396 1397 // Check whether C compares (shl X, 32) with 0 and whether X is 1398 // also sign-extended. In that case it is better to test the result 1399 // of the sign extension using LTGFR. 1400 // 1401 // This case is important because InstCombine transforms a comparison 1402 // with (sext (trunc X)) into a comparison with (shl X, 32). 1403 static void adjustForLTGFR(Comparison &C) { 1404 // Check for a comparison between (shl X, 32) and 0. 1405 if (C.Op0.getOpcode() == ISD::SHL && 1406 C.Op0.getValueType() == MVT::i64 && 1407 C.Op1.getOpcode() == ISD::Constant && 1408 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) { 1409 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1)); 1410 if (C1 && C1->getZExtValue() == 32) { 1411 SDValue ShlOp0 = C.Op0.getOperand(0); 1412 // See whether X has any SIGN_EXTEND_INREG uses. 1413 for (auto I = ShlOp0->use_begin(), E = ShlOp0->use_end(); I != E; ++I) { 1414 SDNode *N = *I; 1415 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG && 1416 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) { 1417 C.Op0 = SDValue(N, 0); 1418 return; 1419 } 1420 } 1421 } 1422 } 1423 } 1424 1425 // If C compares the truncation of an extending load, try to compare 1426 // the untruncated value instead. This exposes more opportunities to 1427 // reuse CC. 1428 static void adjustICmpTruncate(SelectionDAG &DAG, Comparison &C) { 1429 if (C.Op0.getOpcode() == ISD::TRUNCATE && 1430 C.Op0.getOperand(0).getOpcode() == ISD::LOAD && 1431 C.Op1.getOpcode() == ISD::Constant && 1432 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) { 1433 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0)); 1434 if (L->getMemoryVT().getStoreSizeInBits() 1435 <= C.Op0.getValueType().getSizeInBits()) { 1436 unsigned Type = L->getExtensionType(); 1437 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) || 1438 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) { 1439 C.Op0 = C.Op0.getOperand(0); 1440 C.Op1 = DAG.getConstant(0, C.Op0.getValueType()); 1441 } 1442 } 1443 } 1444 } 1445 1446 // Return true if shift operation N has an in-range constant shift value. 1447 // Store it in ShiftVal if so. 1448 static bool isSimpleShift(SDValue N, unsigned &ShiftVal) { 1449 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1450 if (!Shift) 1451 return false; 1452 1453 uint64_t Amount = Shift->getZExtValue(); 1454 if (Amount >= N.getValueType().getSizeInBits()) 1455 return false; 1456 1457 ShiftVal = Amount; 1458 return true; 1459 } 1460 1461 // Check whether an AND with Mask is suitable for a TEST UNDER MASK 1462 // instruction and whether the CC value is descriptive enough to handle 1463 // a comparison of type Opcode between the AND result and CmpVal. 1464 // CCMask says which comparison result is being tested and BitSize is 1465 // the number of bits in the operands. If TEST UNDER MASK can be used, 1466 // return the corresponding CC mask, otherwise return 0. 1467 static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, 1468 uint64_t Mask, uint64_t CmpVal, 1469 unsigned ICmpType) { 1470 assert(Mask != 0 && "ANDs with zero should have been removed by now"); 1471 1472 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL. 1473 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) && 1474 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask)) 1475 return 0; 1476 1477 // Work out the masks for the lowest and highest bits. 1478 unsigned HighShift = 63 - countLeadingZeros(Mask); 1479 uint64_t High = uint64_t(1) << HighShift; 1480 uint64_t Low = uint64_t(1) << countTrailingZeros(Mask); 1481 1482 // Signed ordered comparisons are effectively unsigned if the sign 1483 // bit is dropped. 1484 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly); 1485 1486 // Check for equality comparisons with 0, or the equivalent. 1487 if (CmpVal == 0) { 1488 if (CCMask == SystemZ::CCMASK_CMP_EQ) 1489 return SystemZ::CCMASK_TM_ALL_0; 1490 if (CCMask == SystemZ::CCMASK_CMP_NE) 1491 return SystemZ::CCMASK_TM_SOME_1; 1492 } 1493 if (EffectivelyUnsigned && CmpVal <= Low) { 1494 if (CCMask == SystemZ::CCMASK_CMP_LT) 1495 return SystemZ::CCMASK_TM_ALL_0; 1496 if (CCMask == SystemZ::CCMASK_CMP_GE) 1497 return SystemZ::CCMASK_TM_SOME_1; 1498 } 1499 if (EffectivelyUnsigned && CmpVal < Low) { 1500 if (CCMask == SystemZ::CCMASK_CMP_LE) 1501 return SystemZ::CCMASK_TM_ALL_0; 1502 if (CCMask == SystemZ::CCMASK_CMP_GT) 1503 return SystemZ::CCMASK_TM_SOME_1; 1504 } 1505 1506 // Check for equality comparisons with the mask, or the equivalent. 1507 if (CmpVal == Mask) { 1508 if (CCMask == SystemZ::CCMASK_CMP_EQ) 1509 return SystemZ::CCMASK_TM_ALL_1; 1510 if (CCMask == SystemZ::CCMASK_CMP_NE) 1511 return SystemZ::CCMASK_TM_SOME_0; 1512 } 1513 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) { 1514 if (CCMask == SystemZ::CCMASK_CMP_GT) 1515 return SystemZ::CCMASK_TM_ALL_1; 1516 if (CCMask == SystemZ::CCMASK_CMP_LE) 1517 return SystemZ::CCMASK_TM_SOME_0; 1518 } 1519 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) { 1520 if (CCMask == SystemZ::CCMASK_CMP_GE) 1521 return SystemZ::CCMASK_TM_ALL_1; 1522 if (CCMask == SystemZ::CCMASK_CMP_LT) 1523 return SystemZ::CCMASK_TM_SOME_0; 1524 } 1525 1526 // Check for ordered comparisons with the top bit. 1527 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) { 1528 if (CCMask == SystemZ::CCMASK_CMP_LE) 1529 return SystemZ::CCMASK_TM_MSB_0; 1530 if (CCMask == SystemZ::CCMASK_CMP_GT) 1531 return SystemZ::CCMASK_TM_MSB_1; 1532 } 1533 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) { 1534 if (CCMask == SystemZ::CCMASK_CMP_LT) 1535 return SystemZ::CCMASK_TM_MSB_0; 1536 if (CCMask == SystemZ::CCMASK_CMP_GE) 1537 return SystemZ::CCMASK_TM_MSB_1; 1538 } 1539 1540 // If there are just two bits, we can do equality checks for Low and High 1541 // as well. 1542 if (Mask == Low + High) { 1543 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low) 1544 return SystemZ::CCMASK_TM_MIXED_MSB_0; 1545 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low) 1546 return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY; 1547 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High) 1548 return SystemZ::CCMASK_TM_MIXED_MSB_1; 1549 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High) 1550 return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY; 1551 } 1552 1553 // Looks like we've exhausted our options. 1554 return 0; 1555 } 1556 1557 // See whether C can be implemented as a TEST UNDER MASK instruction. 1558 // Update the arguments with the TM version if so. 1559 static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) { 1560 // Check that we have a comparison with a constant. 1561 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1); 1562 if (!ConstOp1) 1563 return; 1564 uint64_t CmpVal = ConstOp1->getZExtValue(); 1565 1566 // Check whether the nonconstant input is an AND with a constant mask. 1567 Comparison NewC(C); 1568 uint64_t MaskVal; 1569 ConstantSDNode *Mask = nullptr; 1570 if (C.Op0.getOpcode() == ISD::AND) { 1571 NewC.Op0 = C.Op0.getOperand(0); 1572 NewC.Op1 = C.Op0.getOperand(1); 1573 Mask = dyn_cast<ConstantSDNode>(NewC.Op1); 1574 if (!Mask) 1575 return; 1576 MaskVal = Mask->getZExtValue(); 1577 } else { 1578 // There is no instruction to compare with a 64-bit immediate 1579 // so use TMHH instead if possible. We need an unsigned ordered 1580 // comparison with an i64 immediate. 1581 if (NewC.Op0.getValueType() != MVT::i64 || 1582 NewC.CCMask == SystemZ::CCMASK_CMP_EQ || 1583 NewC.CCMask == SystemZ::CCMASK_CMP_NE || 1584 NewC.ICmpType == SystemZICMP::SignedOnly) 1585 return; 1586 // Convert LE and GT comparisons into LT and GE. 1587 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE || 1588 NewC.CCMask == SystemZ::CCMASK_CMP_GT) { 1589 if (CmpVal == uint64_t(-1)) 1590 return; 1591 CmpVal += 1; 1592 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ; 1593 } 1594 // If the low N bits of Op1 are zero than the low N bits of Op0 can 1595 // be masked off without changing the result. 1596 MaskVal = -(CmpVal & -CmpVal); 1597 NewC.ICmpType = SystemZICMP::UnsignedOnly; 1598 } 1599 if (!MaskVal) 1600 return; 1601 1602 // Check whether the combination of mask, comparison value and comparison 1603 // type are suitable. 1604 unsigned BitSize = NewC.Op0.getValueType().getSizeInBits(); 1605 unsigned NewCCMask, ShiftVal; 1606 if (NewC.ICmpType != SystemZICMP::SignedOnly && 1607 NewC.Op0.getOpcode() == ISD::SHL && 1608 isSimpleShift(NewC.Op0, ShiftVal) && 1609 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, 1610 MaskVal >> ShiftVal, 1611 CmpVal >> ShiftVal, 1612 SystemZICMP::Any))) { 1613 NewC.Op0 = NewC.Op0.getOperand(0); 1614 MaskVal >>= ShiftVal; 1615 } else if (NewC.ICmpType != SystemZICMP::SignedOnly && 1616 NewC.Op0.getOpcode() == ISD::SRL && 1617 isSimpleShift(NewC.Op0, ShiftVal) && 1618 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, 1619 MaskVal << ShiftVal, 1620 CmpVal << ShiftVal, 1621 SystemZICMP::UnsignedOnly))) { 1622 NewC.Op0 = NewC.Op0.getOperand(0); 1623 MaskVal <<= ShiftVal; 1624 } else { 1625 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal, 1626 NewC.ICmpType); 1627 if (!NewCCMask) 1628 return; 1629 } 1630 1631 // Go ahead and make the change. 1632 C.Opcode = SystemZISD::TM; 1633 C.Op0 = NewC.Op0; 1634 if (Mask && Mask->getZExtValue() == MaskVal) 1635 C.Op1 = SDValue(Mask, 0); 1636 else 1637 C.Op1 = DAG.getConstant(MaskVal, C.Op0.getValueType()); 1638 C.CCValid = SystemZ::CCMASK_TM; 1639 C.CCMask = NewCCMask; 1640 } 1641 1642 // Return a Comparison that tests the condition-code result of intrinsic 1643 // node Call against constant integer CC using comparison code Cond. 1644 // Opcode is the opcode of the SystemZISD operation for the intrinsic 1645 // and CCValid is the set of possible condition-code results. 1646 static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, 1647 SDValue Call, unsigned CCValid, uint64_t CC, 1648 ISD::CondCode Cond) { 1649 Comparison C(Call, SDValue()); 1650 C.Opcode = Opcode; 1651 C.CCValid = CCValid; 1652 if (Cond == ISD::SETEQ) 1653 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3. 1654 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0; 1655 else if (Cond == ISD::SETNE) 1656 // ...and the inverse of that. 1657 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1; 1658 else if (Cond == ISD::SETLT || Cond == ISD::SETULT) 1659 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3, 1660 // always true for CC>3. 1661 C.CCMask = CC < 4 ? -1 << (4 - CC) : -1; 1662 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE) 1663 // ...and the inverse of that. 1664 C.CCMask = CC < 4 ? ~(-1 << (4 - CC)) : 0; 1665 else if (Cond == ISD::SETLE || Cond == ISD::SETULE) 1666 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true), 1667 // always true for CC>3. 1668 C.CCMask = CC < 4 ? -1 << (3 - CC) : -1; 1669 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT) 1670 // ...and the inverse of that. 1671 C.CCMask = CC < 4 ? ~(-1 << (3 - CC)) : 0; 1672 else 1673 llvm_unreachable("Unexpected integer comparison type"); 1674 C.CCMask &= CCValid; 1675 return C; 1676 } 1677 1678 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. 1679 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, 1680 ISD::CondCode Cond) { 1681 if (CmpOp1.getOpcode() == ISD::Constant) { 1682 uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue(); 1683 unsigned Opcode, CCValid; 1684 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN && 1685 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) && 1686 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid)) 1687 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond); 1688 } 1689 Comparison C(CmpOp0, CmpOp1); 1690 C.CCMask = CCMaskForCondCode(Cond); 1691 if (C.Op0.getValueType().isFloatingPoint()) { 1692 C.CCValid = SystemZ::CCMASK_FCMP; 1693 C.Opcode = SystemZISD::FCMP; 1694 adjustForFNeg(C); 1695 } else { 1696 C.CCValid = SystemZ::CCMASK_ICMP; 1697 C.Opcode = SystemZISD::ICMP; 1698 // Choose the type of comparison. Equality and inequality tests can 1699 // use either signed or unsigned comparisons. The choice also doesn't 1700 // matter if both sign bits are known to be clear. In those cases we 1701 // want to give the main isel code the freedom to choose whichever 1702 // form fits best. 1703 if (C.CCMask == SystemZ::CCMASK_CMP_EQ || 1704 C.CCMask == SystemZ::CCMASK_CMP_NE || 1705 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1))) 1706 C.ICmpType = SystemZICMP::Any; 1707 else if (C.CCMask & SystemZ::CCMASK_CMP_UO) 1708 C.ICmpType = SystemZICMP::UnsignedOnly; 1709 else 1710 C.ICmpType = SystemZICMP::SignedOnly; 1711 C.CCMask &= ~SystemZ::CCMASK_CMP_UO; 1712 adjustZeroCmp(DAG, C); 1713 adjustSubwordCmp(DAG, C); 1714 adjustForSubtraction(DAG, C); 1715 adjustForLTGFR(C); 1716 adjustICmpTruncate(DAG, C); 1717 } 1718 1719 if (shouldSwapCmpOperands(C)) { 1720 std::swap(C.Op0, C.Op1); 1721 C.CCMask = reverseCCMask(C.CCMask); 1722 } 1723 1724 adjustForTestUnderMask(DAG, C); 1725 return C; 1726 } 1727 1728 // Emit the comparison instruction described by C. 1729 static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { 1730 if (!C.Op1.getNode()) { 1731 SDValue Op; 1732 switch (C.Op0.getOpcode()) { 1733 case ISD::INTRINSIC_W_CHAIN: 1734 Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode); 1735 break; 1736 default: 1737 llvm_unreachable("Invalid comparison operands"); 1738 } 1739 return SDValue(Op.getNode(), Op->getNumValues() - 1); 1740 } 1741 if (C.Opcode == SystemZISD::ICMP) 1742 return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1, 1743 DAG.getConstant(C.ICmpType, MVT::i32)); 1744 if (C.Opcode == SystemZISD::TM) { 1745 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) != 1746 bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1)); 1747 return DAG.getNode(SystemZISD::TM, DL, MVT::Glue, C.Op0, C.Op1, 1748 DAG.getConstant(RegisterOnly, MVT::i32)); 1749 } 1750 return DAG.getNode(C.Opcode, DL, MVT::Glue, C.Op0, C.Op1); 1751 } 1752 1753 // Implement a 32-bit *MUL_LOHI operation by extending both operands to 1754 // 64 bits. Extend is the extension type to use. Store the high part 1755 // in Hi and the low part in Lo. 1756 static void lowerMUL_LOHI32(SelectionDAG &DAG, SDLoc DL, 1757 unsigned Extend, SDValue Op0, SDValue Op1, 1758 SDValue &Hi, SDValue &Lo) { 1759 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0); 1760 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1); 1761 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1); 1762 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, DAG.getConstant(32, MVT::i64)); 1763 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi); 1764 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul); 1765 } 1766 1767 // Lower a binary operation that produces two VT results, one in each 1768 // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation, 1769 // Extend extends Op0 to a GR128, and Opcode performs the GR128 operation 1770 // on the extended Op0 and (unextended) Op1. Store the even register result 1771 // in Even and the odd register result in Odd. 1772 static void lowerGR128Binary(SelectionDAG &DAG, SDLoc DL, EVT VT, 1773 unsigned Extend, unsigned Opcode, 1774 SDValue Op0, SDValue Op1, 1775 SDValue &Even, SDValue &Odd) { 1776 SDNode *In128 = DAG.getMachineNode(Extend, DL, MVT::Untyped, Op0); 1777 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, 1778 SDValue(In128, 0), Op1); 1779 bool Is32Bit = is32Bit(VT); 1780 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result); 1781 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result); 1782 } 1783 1784 // Return an i32 value that is 1 if the CC value produced by Glue is 1785 // in the mask CCMask and 0 otherwise. CC is known to have a value 1786 // in CCValid, so other values can be ignored. 1787 static SDValue emitSETCC(SelectionDAG &DAG, SDLoc DL, SDValue Glue, 1788 unsigned CCValid, unsigned CCMask) { 1789 IPMConversion Conversion = getIPMConversion(CCValid, CCMask); 1790 SDValue Result = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); 1791 1792 if (Conversion.XORValue) 1793 Result = DAG.getNode(ISD::XOR, DL, MVT::i32, Result, 1794 DAG.getConstant(Conversion.XORValue, MVT::i32)); 1795 1796 if (Conversion.AddValue) 1797 Result = DAG.getNode(ISD::ADD, DL, MVT::i32, Result, 1798 DAG.getConstant(Conversion.AddValue, MVT::i32)); 1799 1800 // The SHR/AND sequence should get optimized to an RISBG. 1801 Result = DAG.getNode(ISD::SRL, DL, MVT::i32, Result, 1802 DAG.getConstant(Conversion.Bit, MVT::i32)); 1803 if (Conversion.Bit != 31) 1804 Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result, 1805 DAG.getConstant(1, MVT::i32)); 1806 return Result; 1807 } 1808 1809 SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, 1810 SelectionDAG &DAG) const { 1811 SDValue CmpOp0 = Op.getOperand(0); 1812 SDValue CmpOp1 = Op.getOperand(1); 1813 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 1814 SDLoc DL(Op); 1815 1816 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC)); 1817 SDValue Glue = emitCmp(DAG, DL, C); 1818 return emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask); 1819 } 1820 1821 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 1822 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 1823 SDValue CmpOp0 = Op.getOperand(2); 1824 SDValue CmpOp1 = Op.getOperand(3); 1825 SDValue Dest = Op.getOperand(4); 1826 SDLoc DL(Op); 1827 1828 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC)); 1829 SDValue Glue = emitCmp(DAG, DL, C); 1830 return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(), 1831 Op.getOperand(0), DAG.getConstant(C.CCValid, MVT::i32), 1832 DAG.getConstant(C.CCMask, MVT::i32), Dest, Glue); 1833 } 1834 1835 // Return true if Pos is CmpOp and Neg is the negative of CmpOp, 1836 // allowing Pos and Neg to be wider than CmpOp. 1837 static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) { 1838 return (Neg.getOpcode() == ISD::SUB && 1839 Neg.getOperand(0).getOpcode() == ISD::Constant && 1840 cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 && 1841 Neg.getOperand(1) == Pos && 1842 (Pos == CmpOp || 1843 (Pos.getOpcode() == ISD::SIGN_EXTEND && 1844 Pos.getOperand(0) == CmpOp))); 1845 } 1846 1847 // Return the absolute or negative absolute of Op; IsNegative decides which. 1848 static SDValue getAbsolute(SelectionDAG &DAG, SDLoc DL, SDValue Op, 1849 bool IsNegative) { 1850 Op = DAG.getNode(SystemZISD::IABS, DL, Op.getValueType(), Op); 1851 if (IsNegative) 1852 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(), 1853 DAG.getConstant(0, Op.getValueType()), Op); 1854 return Op; 1855 } 1856 1857 SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, 1858 SelectionDAG &DAG) const { 1859 SDValue CmpOp0 = Op.getOperand(0); 1860 SDValue CmpOp1 = Op.getOperand(1); 1861 SDValue TrueOp = Op.getOperand(2); 1862 SDValue FalseOp = Op.getOperand(3); 1863 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 1864 SDLoc DL(Op); 1865 1866 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC)); 1867 1868 // Check for absolute and negative-absolute selections, including those 1869 // where the comparison value is sign-extended (for LPGFR and LNGFR). 1870 // This check supplements the one in DAGCombiner. 1871 if (C.Opcode == SystemZISD::ICMP && 1872 C.CCMask != SystemZ::CCMASK_CMP_EQ && 1873 C.CCMask != SystemZ::CCMASK_CMP_NE && 1874 C.Op1.getOpcode() == ISD::Constant && 1875 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) { 1876 if (isAbsolute(C.Op0, TrueOp, FalseOp)) 1877 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT); 1878 if (isAbsolute(C.Op0, FalseOp, TrueOp)) 1879 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT); 1880 } 1881 1882 SDValue Glue = emitCmp(DAG, DL, C); 1883 1884 // Special case for handling -1/0 results. The shifts we use here 1885 // should get optimized with the IPM conversion sequence. 1886 auto *TrueC = dyn_cast<ConstantSDNode>(TrueOp); 1887 auto *FalseC = dyn_cast<ConstantSDNode>(FalseOp); 1888 if (TrueC && FalseC) { 1889 int64_t TrueVal = TrueC->getSExtValue(); 1890 int64_t FalseVal = FalseC->getSExtValue(); 1891 if ((TrueVal == -1 && FalseVal == 0) || (TrueVal == 0 && FalseVal == -1)) { 1892 // Invert the condition if we want -1 on false. 1893 if (TrueVal == 0) 1894 C.CCMask ^= C.CCValid; 1895 SDValue Result = emitSETCC(DAG, DL, Glue, C.CCValid, C.CCMask); 1896 EVT VT = Op.getValueType(); 1897 // Extend the result to VT. Upper bits are ignored. 1898 if (!is32Bit(VT)) 1899 Result = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Result); 1900 // Sign-extend from the low bit. 1901 SDValue ShAmt = DAG.getConstant(VT.getSizeInBits() - 1, MVT::i32); 1902 SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Result, ShAmt); 1903 return DAG.getNode(ISD::SRA, DL, VT, Shl, ShAmt); 1904 } 1905 } 1906 1907 SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, MVT::i32), 1908 DAG.getConstant(C.CCMask, MVT::i32), Glue}; 1909 1910 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); 1911 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops); 1912 } 1913 1914 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, 1915 SelectionDAG &DAG) const { 1916 SDLoc DL(Node); 1917 const GlobalValue *GV = Node->getGlobal(); 1918 int64_t Offset = Node->getOffset(); 1919 EVT PtrVT = getPointerTy(); 1920 Reloc::Model RM = DAG.getTarget().getRelocationModel(); 1921 CodeModel::Model CM = DAG.getTarget().getCodeModel(); 1922 1923 SDValue Result; 1924 if (Subtarget.isPC32DBLSymbol(GV, RM, CM)) { 1925 // Assign anchors at 1<<12 byte boundaries. 1926 uint64_t Anchor = Offset & ~uint64_t(0xfff); 1927 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor); 1928 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 1929 1930 // The offset can be folded into the address if it is aligned to a halfword. 1931 Offset -= Anchor; 1932 if (Offset != 0 && (Offset & 1) == 0) { 1933 SDValue Full = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset); 1934 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result); 1935 Offset = 0; 1936 } 1937 } else { 1938 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT); 1939 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 1940 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, 1941 MachinePointerInfo::getGOT(), false, false, false, 0); 1942 } 1943 1944 // If there was a non-zero offset that we didn't fold, create an explicit 1945 // addition for it. 1946 if (Offset != 0) 1947 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result, 1948 DAG.getConstant(Offset, PtrVT)); 1949 1950 return Result; 1951 } 1952 1953 SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, 1954 SelectionDAG &DAG, 1955 unsigned Opcode, 1956 SDValue GOTOffset) const { 1957 SDLoc DL(Node); 1958 EVT PtrVT = getPointerTy(); 1959 SDValue Chain = DAG.getEntryNode(); 1960 SDValue Glue; 1961 1962 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12. 1963 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 1964 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue); 1965 Glue = Chain.getValue(1); 1966 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue); 1967 Glue = Chain.getValue(1); 1968 1969 // The first call operand is the chain and the second is the TLS symbol. 1970 SmallVector<SDValue, 8> Ops; 1971 Ops.push_back(Chain); 1972 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL, 1973 Node->getValueType(0), 1974 0, 0)); 1975 1976 // Add argument registers to the end of the list so that they are 1977 // known live into the call. 1978 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT)); 1979 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT)); 1980 1981 // Add a register mask operand representing the call-preserved registers. 1982 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1983 const uint32_t *Mask = 1984 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C); 1985 assert(Mask && "Missing call preserved mask for calling convention"); 1986 Ops.push_back(DAG.getRegisterMask(Mask)); 1987 1988 // Glue the call to the argument copies. 1989 Ops.push_back(Glue); 1990 1991 // Emit the call. 1992 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1993 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops); 1994 Glue = Chain.getValue(1); 1995 1996 // Copy the return value from %r2. 1997 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue); 1998 } 1999 2000 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, 2001 SelectionDAG &DAG) const { 2002 SDLoc DL(Node); 2003 const GlobalValue *GV = Node->getGlobal(); 2004 EVT PtrVT = getPointerTy(); 2005 TLSModel::Model model = DAG.getTarget().getTLSModel(GV); 2006 2007 // The high part of the thread pointer is in access register 0. 2008 SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32, 2009 DAG.getConstant(0, MVT::i32)); 2010 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi); 2011 2012 // The low part of the thread pointer is in access register 1. 2013 SDValue TPLo = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32, 2014 DAG.getConstant(1, MVT::i32)); 2015 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo); 2016 2017 // Merge them into a single 64-bit address. 2018 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi, 2019 DAG.getConstant(32, PtrVT)); 2020 SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo); 2021 2022 // Get the offset of GA from the thread pointer, based on the TLS model. 2023 SDValue Offset; 2024 switch (model) { 2025 case TLSModel::GeneralDynamic: { 2026 // Load the GOT offset of the tls_index (module ID / per-symbol offset). 2027 SystemZConstantPoolValue *CPV = 2028 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD); 2029 2030 Offset = DAG.getConstantPool(CPV, PtrVT, 8); 2031 Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), 2032 Offset, MachinePointerInfo::getConstantPool(), 2033 false, false, false, 0); 2034 2035 // Call __tls_get_offset to retrieve the offset. 2036 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset); 2037 break; 2038 } 2039 2040 case TLSModel::LocalDynamic: { 2041 // Load the GOT offset of the module ID. 2042 SystemZConstantPoolValue *CPV = 2043 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM); 2044 2045 Offset = DAG.getConstantPool(CPV, PtrVT, 8); 2046 Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), 2047 Offset, MachinePointerInfo::getConstantPool(), 2048 false, false, false, 0); 2049 2050 // Call __tls_get_offset to retrieve the module base offset. 2051 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset); 2052 2053 // Note: The SystemZLDCleanupPass will remove redundant computations 2054 // of the module base offset. Count total number of local-dynamic 2055 // accesses to trigger execution of that pass. 2056 SystemZMachineFunctionInfo* MFI = 2057 DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>(); 2058 MFI->incNumLocalDynamicTLSAccesses(); 2059 2060 // Add the per-symbol offset. 2061 CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF); 2062 2063 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8); 2064 DTPOffset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), 2065 DTPOffset, MachinePointerInfo::getConstantPool(), 2066 false, false, false, 0); 2067 2068 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset); 2069 break; 2070 } 2071 2072 case TLSModel::InitialExec: { 2073 // Load the offset from the GOT. 2074 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 2075 SystemZII::MO_INDNTPOFF); 2076 Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset); 2077 Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), 2078 Offset, MachinePointerInfo::getGOT(), 2079 false, false, false, 0); 2080 break; 2081 } 2082 2083 case TLSModel::LocalExec: { 2084 // Force the offset into the constant pool and load it from there. 2085 SystemZConstantPoolValue *CPV = 2086 SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); 2087 2088 Offset = DAG.getConstantPool(CPV, PtrVT, 8); 2089 Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), 2090 Offset, MachinePointerInfo::getConstantPool(), 2091 false, false, false, 0); 2092 break; 2093 } 2094 } 2095 2096 // Add the base and offset together. 2097 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset); 2098 } 2099 2100 SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node, 2101 SelectionDAG &DAG) const { 2102 SDLoc DL(Node); 2103 const BlockAddress *BA = Node->getBlockAddress(); 2104 int64_t Offset = Node->getOffset(); 2105 EVT PtrVT = getPointerTy(); 2106 2107 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset); 2108 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 2109 return Result; 2110 } 2111 2112 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT, 2113 SelectionDAG &DAG) const { 2114 SDLoc DL(JT); 2115 EVT PtrVT = getPointerTy(); 2116 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 2117 2118 // Use LARL to load the address of the table. 2119 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 2120 } 2121 2122 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP, 2123 SelectionDAG &DAG) const { 2124 SDLoc DL(CP); 2125 EVT PtrVT = getPointerTy(); 2126 2127 SDValue Result; 2128 if (CP->isMachineConstantPoolEntry()) 2129 Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 2130 CP->getAlignment()); 2131 else 2132 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 2133 CP->getAlignment(), CP->getOffset()); 2134 2135 // Use LARL to load the address of the constant pool entry. 2136 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 2137 } 2138 2139 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, 2140 SelectionDAG &DAG) const { 2141 SDLoc DL(Op); 2142 SDValue In = Op.getOperand(0); 2143 EVT InVT = In.getValueType(); 2144 EVT ResVT = Op.getValueType(); 2145 2146 if (InVT == MVT::i32 && ResVT == MVT::f32) { 2147 SDValue In64; 2148 if (Subtarget.hasHighWord()) { 2149 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, 2150 MVT::i64); 2151 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL, 2152 MVT::i64, SDValue(U64, 0), In); 2153 } else { 2154 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In); 2155 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, 2156 DAG.getConstant(32, MVT::i64)); 2157 } 2158 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64); 2159 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, 2160 DL, MVT::f32, Out64); 2161 } 2162 if (InVT == MVT::f32 && ResVT == MVT::i32) { 2163 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64); 2164 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL, 2165 MVT::f64, SDValue(U64, 0), In); 2166 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64); 2167 if (Subtarget.hasHighWord()) 2168 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL, 2169 MVT::i32, Out64); 2170 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, 2171 DAG.getConstant(32, MVT::i64)); 2172 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift); 2173 } 2174 llvm_unreachable("Unexpected bitcast combination"); 2175 } 2176 2177 SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, 2178 SelectionDAG &DAG) const { 2179 MachineFunction &MF = DAG.getMachineFunction(); 2180 SystemZMachineFunctionInfo *FuncInfo = 2181 MF.getInfo<SystemZMachineFunctionInfo>(); 2182 EVT PtrVT = getPointerTy(); 2183 2184 SDValue Chain = Op.getOperand(0); 2185 SDValue Addr = Op.getOperand(1); 2186 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2187 SDLoc DL(Op); 2188 2189 // The initial values of each field. 2190 const unsigned NumFields = 4; 2191 SDValue Fields[NumFields] = { 2192 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), PtrVT), 2193 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), PtrVT), 2194 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT), 2195 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT) 2196 }; 2197 2198 // Store each field into its respective slot. 2199 SDValue MemOps[NumFields]; 2200 unsigned Offset = 0; 2201 for (unsigned I = 0; I < NumFields; ++I) { 2202 SDValue FieldAddr = Addr; 2203 if (Offset != 0) 2204 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr, 2205 DAG.getIntPtrConstant(Offset)); 2206 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr, 2207 MachinePointerInfo(SV, Offset), 2208 false, false, 0); 2209 Offset += 8; 2210 } 2211 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); 2212 } 2213 2214 SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, 2215 SelectionDAG &DAG) const { 2216 SDValue Chain = Op.getOperand(0); 2217 SDValue DstPtr = Op.getOperand(1); 2218 SDValue SrcPtr = Op.getOperand(2); 2219 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); 2220 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); 2221 SDLoc DL(Op); 2222 2223 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32), 2224 /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false, 2225 /*isTailCall*/false, 2226 MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); 2227 } 2228 2229 SDValue SystemZTargetLowering:: 2230 lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { 2231 SDValue Chain = Op.getOperand(0); 2232 SDValue Size = Op.getOperand(1); 2233 SDLoc DL(Op); 2234 2235 unsigned SPReg = getStackPointerRegisterToSaveRestore(); 2236 2237 // Get a reference to the stack pointer. 2238 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64); 2239 2240 // Get the new stack pointer value. 2241 SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, Size); 2242 2243 // Copy the new stack pointer back. 2244 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP); 2245 2246 // The allocated data lives above the 160 bytes allocated for the standard 2247 // frame, plus any outgoing stack arguments. We don't know how much that 2248 // amounts to yet, so emit a special ADJDYNALLOC placeholder. 2249 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); 2250 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust); 2251 2252 SDValue Ops[2] = { Result, Chain }; 2253 return DAG.getMergeValues(Ops, DL); 2254 } 2255 2256 SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, 2257 SelectionDAG &DAG) const { 2258 EVT VT = Op.getValueType(); 2259 SDLoc DL(Op); 2260 SDValue Ops[2]; 2261 if (is32Bit(VT)) 2262 // Just do a normal 64-bit multiplication and extract the results. 2263 // We define this so that it can be used for constant division. 2264 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0), 2265 Op.getOperand(1), Ops[1], Ops[0]); 2266 else { 2267 // Do a full 128-bit multiplication based on UMUL_LOHI64: 2268 // 2269 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64) 2270 // 2271 // but using the fact that the upper halves are either all zeros 2272 // or all ones: 2273 // 2274 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64) 2275 // 2276 // and grouping the right terms together since they are quicker than the 2277 // multiplication: 2278 // 2279 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64) 2280 SDValue C63 = DAG.getConstant(63, MVT::i64); 2281 SDValue LL = Op.getOperand(0); 2282 SDValue RL = Op.getOperand(1); 2283 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63); 2284 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63); 2285 // UMUL_LOHI64 returns the low result in the odd register and the high 2286 // result in the even register. SMUL_LOHI is defined to return the 2287 // low half first, so the results are in reverse order. 2288 lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, 2289 LL, RL, Ops[1], Ops[0]); 2290 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH); 2291 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL); 2292 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL); 2293 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum); 2294 } 2295 return DAG.getMergeValues(Ops, DL); 2296 } 2297 2298 SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, 2299 SelectionDAG &DAG) const { 2300 EVT VT = Op.getValueType(); 2301 SDLoc DL(Op); 2302 SDValue Ops[2]; 2303 if (is32Bit(VT)) 2304 // Just do a normal 64-bit multiplication and extract the results. 2305 // We define this so that it can be used for constant division. 2306 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0), 2307 Op.getOperand(1), Ops[1], Ops[0]); 2308 else 2309 // UMUL_LOHI64 returns the low result in the odd register and the high 2310 // result in the even register. UMUL_LOHI is defined to return the 2311 // low half first, so the results are in reverse order. 2312 lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64, 2313 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); 2314 return DAG.getMergeValues(Ops, DL); 2315 } 2316 2317 SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, 2318 SelectionDAG &DAG) const { 2319 SDValue Op0 = Op.getOperand(0); 2320 SDValue Op1 = Op.getOperand(1); 2321 EVT VT = Op.getValueType(); 2322 SDLoc DL(Op); 2323 unsigned Opcode; 2324 2325 // We use DSGF for 32-bit division. 2326 if (is32Bit(VT)) { 2327 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0); 2328 Opcode = SystemZISD::SDIVREM32; 2329 } else if (DAG.ComputeNumSignBits(Op1) > 32) { 2330 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1); 2331 Opcode = SystemZISD::SDIVREM32; 2332 } else 2333 Opcode = SystemZISD::SDIVREM64; 2334 2335 // DSG(F) takes a 64-bit dividend, so the even register in the GR128 2336 // input is "don't care". The instruction returns the remainder in 2337 // the even register and the quotient in the odd register. 2338 SDValue Ops[2]; 2339 lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, Opcode, 2340 Op0, Op1, Ops[1], Ops[0]); 2341 return DAG.getMergeValues(Ops, DL); 2342 } 2343 2344 SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op, 2345 SelectionDAG &DAG) const { 2346 EVT VT = Op.getValueType(); 2347 SDLoc DL(Op); 2348 2349 // DL(G) uses a double-width dividend, so we need to clear the even 2350 // register in the GR128 input. The instruction returns the remainder 2351 // in the even register and the quotient in the odd register. 2352 SDValue Ops[2]; 2353 if (is32Bit(VT)) 2354 lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_32, SystemZISD::UDIVREM32, 2355 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); 2356 else 2357 lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_64, SystemZISD::UDIVREM64, 2358 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); 2359 return DAG.getMergeValues(Ops, DL); 2360 } 2361 2362 SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { 2363 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation"); 2364 2365 // Get the known-zero masks for each operand. 2366 SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) }; 2367 APInt KnownZero[2], KnownOne[2]; 2368 DAG.computeKnownBits(Ops[0], KnownZero[0], KnownOne[0]); 2369 DAG.computeKnownBits(Ops[1], KnownZero[1], KnownOne[1]); 2370 2371 // See if the upper 32 bits of one operand and the lower 32 bits of the 2372 // other are known zero. They are the low and high operands respectively. 2373 uint64_t Masks[] = { KnownZero[0].getZExtValue(), 2374 KnownZero[1].getZExtValue() }; 2375 unsigned High, Low; 2376 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff) 2377 High = 1, Low = 0; 2378 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff) 2379 High = 0, Low = 1; 2380 else 2381 return Op; 2382 2383 SDValue LowOp = Ops[Low]; 2384 SDValue HighOp = Ops[High]; 2385 2386 // If the high part is a constant, we're better off using IILH. 2387 if (HighOp.getOpcode() == ISD::Constant) 2388 return Op; 2389 2390 // If the low part is a constant that is outside the range of LHI, 2391 // then we're better off using IILF. 2392 if (LowOp.getOpcode() == ISD::Constant) { 2393 int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue()); 2394 if (!isInt<16>(Value)) 2395 return Op; 2396 } 2397 2398 // Check whether the high part is an AND that doesn't change the 2399 // high 32 bits and just masks out low bits. We can skip it if so. 2400 if (HighOp.getOpcode() == ISD::AND && 2401 HighOp.getOperand(1).getOpcode() == ISD::Constant) { 2402 SDValue HighOp0 = HighOp.getOperand(0); 2403 uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue(); 2404 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff)))) 2405 HighOp = HighOp0; 2406 } 2407 2408 // Take advantage of the fact that all GR32 operations only change the 2409 // low 32 bits by truncating Low to an i32 and inserting it directly 2410 // using a subreg. The interesting cases are those where the truncation 2411 // can be folded. 2412 SDLoc DL(Op); 2413 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp); 2414 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL, 2415 MVT::i64, HighOp, Low32); 2416 } 2417 2418 SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, 2419 SelectionDAG &DAG) const { 2420 EVT VT = Op.getValueType(); 2421 int64_t OrigBitSize = VT.getSizeInBits(); 2422 SDLoc DL(Op); 2423 2424 // Get the known-zero mask for the operand. 2425 Op = Op.getOperand(0); 2426 APInt KnownZero, KnownOne; 2427 DAG.computeKnownBits(Op, KnownZero, KnownOne); 2428 unsigned NumSignificantBits = (~KnownZero).getActiveBits(); 2429 if (NumSignificantBits == 0) 2430 return DAG.getConstant(0, VT); 2431 2432 // Skip known-zero high parts of the operand. 2433 int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits); 2434 BitSize = std::min(BitSize, OrigBitSize); 2435 2436 // The POPCNT instruction counts the number of bits in each byte. 2437 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op); 2438 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op); 2439 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); 2440 2441 // Add up per-byte counts in a binary tree. All bits of Op at 2442 // position larger than BitSize remain zero throughout. 2443 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) { 2444 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, VT)); 2445 if (BitSize != OrigBitSize) 2446 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp, 2447 DAG.getConstant(((uint64_t)1 << BitSize) - 1, VT)); 2448 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); 2449 } 2450 2451 // Extract overall result from high byte. 2452 if (BitSize > 8) 2453 Op = DAG.getNode(ISD::SRL, DL, VT, Op, DAG.getConstant(BitSize - 8, VT)); 2454 2455 return Op; 2456 } 2457 2458 // Op is an atomic load. Lower it into a normal volatile load. 2459 SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, 2460 SelectionDAG &DAG) const { 2461 auto *Node = cast<AtomicSDNode>(Op.getNode()); 2462 return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(), 2463 Node->getChain(), Node->getBasePtr(), 2464 Node->getMemoryVT(), Node->getMemOperand()); 2465 } 2466 2467 // Op is an atomic store. Lower it into a normal volatile store followed 2468 // by a serialization. 2469 SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op, 2470 SelectionDAG &DAG) const { 2471 auto *Node = cast<AtomicSDNode>(Op.getNode()); 2472 SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(), 2473 Node->getBasePtr(), Node->getMemoryVT(), 2474 Node->getMemOperand()); 2475 return SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op), MVT::Other, 2476 Chain), 0); 2477 } 2478 2479 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first 2480 // two into the fullword ATOMIC_LOADW_* operation given by Opcode. 2481 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, 2482 SelectionDAG &DAG, 2483 unsigned Opcode) const { 2484 auto *Node = cast<AtomicSDNode>(Op.getNode()); 2485 2486 // 32-bit operations need no code outside the main loop. 2487 EVT NarrowVT = Node->getMemoryVT(); 2488 EVT WideVT = MVT::i32; 2489 if (NarrowVT == WideVT) 2490 return Op; 2491 2492 int64_t BitSize = NarrowVT.getSizeInBits(); 2493 SDValue ChainIn = Node->getChain(); 2494 SDValue Addr = Node->getBasePtr(); 2495 SDValue Src2 = Node->getVal(); 2496 MachineMemOperand *MMO = Node->getMemOperand(); 2497 SDLoc DL(Node); 2498 EVT PtrVT = Addr.getValueType(); 2499 2500 // Convert atomic subtracts of constants into additions. 2501 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB) 2502 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) { 2503 Opcode = SystemZISD::ATOMIC_LOADW_ADD; 2504 Src2 = DAG.getConstant(-Const->getSExtValue(), Src2.getValueType()); 2505 } 2506 2507 // Get the address of the containing word. 2508 SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, 2509 DAG.getConstant(-4, PtrVT)); 2510 2511 // Get the number of bits that the word must be rotated left in order 2512 // to bring the field to the top bits of a GR32. 2513 SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, 2514 DAG.getConstant(3, PtrVT)); 2515 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); 2516 2517 // Get the complementing shift amount, for rotating a field in the top 2518 // bits back to its proper position. 2519 SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, 2520 DAG.getConstant(0, WideVT), BitShift); 2521 2522 // Extend the source operand to 32 bits and prepare it for the inner loop. 2523 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other 2524 // operations require the source to be shifted in advance. (This shift 2525 // can be folded if the source is constant.) For AND and NAND, the lower 2526 // bits must be set, while for other opcodes they should be left clear. 2527 if (Opcode != SystemZISD::ATOMIC_SWAPW) 2528 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2, 2529 DAG.getConstant(32 - BitSize, WideVT)); 2530 if (Opcode == SystemZISD::ATOMIC_LOADW_AND || 2531 Opcode == SystemZISD::ATOMIC_LOADW_NAND) 2532 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2, 2533 DAG.getConstant(uint32_t(-1) >> BitSize, WideVT)); 2534 2535 // Construct the ATOMIC_LOADW_* node. 2536 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); 2537 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift, 2538 DAG.getConstant(BitSize, WideVT) }; 2539 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops, 2540 NarrowVT, MMO); 2541 2542 // Rotate the result of the final CS so that the field is in the lower 2543 // bits of a GR32, then truncate it. 2544 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift, 2545 DAG.getConstant(BitSize, WideVT)); 2546 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift); 2547 2548 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) }; 2549 return DAG.getMergeValues(RetOps, DL); 2550 } 2551 2552 // Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations 2553 // into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit 2554 // operations into additions. 2555 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op, 2556 SelectionDAG &DAG) const { 2557 auto *Node = cast<AtomicSDNode>(Op.getNode()); 2558 EVT MemVT = Node->getMemoryVT(); 2559 if (MemVT == MVT::i32 || MemVT == MVT::i64) { 2560 // A full-width operation. 2561 assert(Op.getValueType() == MemVT && "Mismatched VTs"); 2562 SDValue Src2 = Node->getVal(); 2563 SDValue NegSrc2; 2564 SDLoc DL(Src2); 2565 2566 if (auto *Op2 = dyn_cast<ConstantSDNode>(Src2)) { 2567 // Use an addition if the operand is constant and either LAA(G) is 2568 // available or the negative value is in the range of A(G)FHI. 2569 int64_t Value = (-Op2->getAPIntValue()).getSExtValue(); 2570 if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1()) 2571 NegSrc2 = DAG.getConstant(Value, MemVT); 2572 } else if (Subtarget.hasInterlockedAccess1()) 2573 // Use LAA(G) if available. 2574 NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, MemVT), 2575 Src2); 2576 2577 if (NegSrc2.getNode()) 2578 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT, 2579 Node->getChain(), Node->getBasePtr(), NegSrc2, 2580 Node->getMemOperand(), Node->getOrdering(), 2581 Node->getSynchScope()); 2582 2583 // Use the node as-is. 2584 return Op; 2585 } 2586 2587 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB); 2588 } 2589 2590 // Node is an 8- or 16-bit ATOMIC_CMP_SWAP operation. Lower the first two 2591 // into a fullword ATOMIC_CMP_SWAPW operation. 2592 SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, 2593 SelectionDAG &DAG) const { 2594 auto *Node = cast<AtomicSDNode>(Op.getNode()); 2595 2596 // We have native support for 32-bit compare and swap. 2597 EVT NarrowVT = Node->getMemoryVT(); 2598 EVT WideVT = MVT::i32; 2599 if (NarrowVT == WideVT) 2600 return Op; 2601 2602 int64_t BitSize = NarrowVT.getSizeInBits(); 2603 SDValue ChainIn = Node->getOperand(0); 2604 SDValue Addr = Node->getOperand(1); 2605 SDValue CmpVal = Node->getOperand(2); 2606 SDValue SwapVal = Node->getOperand(3); 2607 MachineMemOperand *MMO = Node->getMemOperand(); 2608 SDLoc DL(Node); 2609 EVT PtrVT = Addr.getValueType(); 2610 2611 // Get the address of the containing word. 2612 SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, 2613 DAG.getConstant(-4, PtrVT)); 2614 2615 // Get the number of bits that the word must be rotated left in order 2616 // to bring the field to the top bits of a GR32. 2617 SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, 2618 DAG.getConstant(3, PtrVT)); 2619 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); 2620 2621 // Get the complementing shift amount, for rotating a field in the top 2622 // bits back to its proper position. 2623 SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, 2624 DAG.getConstant(0, WideVT), BitShift); 2625 2626 // Construct the ATOMIC_CMP_SWAPW node. 2627 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); 2628 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift, 2629 NegBitShift, DAG.getConstant(BitSize, WideVT) }; 2630 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL, 2631 VTList, Ops, NarrowVT, MMO); 2632 return AtomicOp; 2633 } 2634 2635 SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op, 2636 SelectionDAG &DAG) const { 2637 MachineFunction &MF = DAG.getMachineFunction(); 2638 MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true); 2639 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op), 2640 SystemZ::R15D, Op.getValueType()); 2641 } 2642 2643 SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op, 2644 SelectionDAG &DAG) const { 2645 MachineFunction &MF = DAG.getMachineFunction(); 2646 MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true); 2647 return DAG.getCopyToReg(Op.getOperand(0), SDLoc(Op), 2648 SystemZ::R15D, Op.getOperand(1)); 2649 } 2650 2651 SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, 2652 SelectionDAG &DAG) const { 2653 bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); 2654 if (!IsData) 2655 // Just preserve the chain. 2656 return Op.getOperand(0); 2657 2658 bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); 2659 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ; 2660 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode()); 2661 SDValue Ops[] = { 2662 Op.getOperand(0), 2663 DAG.getConstant(Code, MVT::i32), 2664 Op.getOperand(1) 2665 }; 2666 return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, SDLoc(Op), 2667 Node->getVTList(), Ops, 2668 Node->getMemoryVT(), Node->getMemOperand()); 2669 } 2670 2671 // Return an i32 that contains the value of CC immediately after After, 2672 // whose final operand must be MVT::Glue. 2673 static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) { 2674 SDValue Glue = SDValue(After, After->getNumValues() - 1); 2675 SDValue IPM = DAG.getNode(SystemZISD::IPM, SDLoc(After), MVT::i32, Glue); 2676 return DAG.getNode(ISD::SRL, SDLoc(After), MVT::i32, IPM, 2677 DAG.getConstant(SystemZ::IPM_CC, MVT::i32)); 2678 } 2679 2680 SDValue 2681 SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 2682 SelectionDAG &DAG) const { 2683 unsigned Opcode, CCValid; 2684 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) { 2685 assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); 2686 SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode); 2687 SDValue CC = getCCResult(DAG, Glued.getNode()); 2688 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC); 2689 return SDValue(); 2690 } 2691 2692 return SDValue(); 2693 } 2694 2695 SDValue SystemZTargetLowering::LowerOperation(SDValue Op, 2696 SelectionDAG &DAG) const { 2697 switch (Op.getOpcode()) { 2698 case ISD::BR_CC: 2699 return lowerBR_CC(Op, DAG); 2700 case ISD::SELECT_CC: 2701 return lowerSELECT_CC(Op, DAG); 2702 case ISD::SETCC: 2703 return lowerSETCC(Op, DAG); 2704 case ISD::GlobalAddress: 2705 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG); 2706 case ISD::GlobalTLSAddress: 2707 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG); 2708 case ISD::BlockAddress: 2709 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG); 2710 case ISD::JumpTable: 2711 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG); 2712 case ISD::ConstantPool: 2713 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG); 2714 case ISD::BITCAST: 2715 return lowerBITCAST(Op, DAG); 2716 case ISD::VASTART: 2717 return lowerVASTART(Op, DAG); 2718 case ISD::VACOPY: 2719 return lowerVACOPY(Op, DAG); 2720 case ISD::DYNAMIC_STACKALLOC: 2721 return lowerDYNAMIC_STACKALLOC(Op, DAG); 2722 case ISD::SMUL_LOHI: 2723 return lowerSMUL_LOHI(Op, DAG); 2724 case ISD::UMUL_LOHI: 2725 return lowerUMUL_LOHI(Op, DAG); 2726 case ISD::SDIVREM: 2727 return lowerSDIVREM(Op, DAG); 2728 case ISD::UDIVREM: 2729 return lowerUDIVREM(Op, DAG); 2730 case ISD::OR: 2731 return lowerOR(Op, DAG); 2732 case ISD::CTPOP: 2733 return lowerCTPOP(Op, DAG); 2734 case ISD::ATOMIC_SWAP: 2735 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW); 2736 case ISD::ATOMIC_STORE: 2737 return lowerATOMIC_STORE(Op, DAG); 2738 case ISD::ATOMIC_LOAD: 2739 return lowerATOMIC_LOAD(Op, DAG); 2740 case ISD::ATOMIC_LOAD_ADD: 2741 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD); 2742 case ISD::ATOMIC_LOAD_SUB: 2743 return lowerATOMIC_LOAD_SUB(Op, DAG); 2744 case ISD::ATOMIC_LOAD_AND: 2745 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND); 2746 case ISD::ATOMIC_LOAD_OR: 2747 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR); 2748 case ISD::ATOMIC_LOAD_XOR: 2749 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR); 2750 case ISD::ATOMIC_LOAD_NAND: 2751 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND); 2752 case ISD::ATOMIC_LOAD_MIN: 2753 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN); 2754 case ISD::ATOMIC_LOAD_MAX: 2755 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX); 2756 case ISD::ATOMIC_LOAD_UMIN: 2757 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN); 2758 case ISD::ATOMIC_LOAD_UMAX: 2759 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX); 2760 case ISD::ATOMIC_CMP_SWAP: 2761 return lowerATOMIC_CMP_SWAP(Op, DAG); 2762 case ISD::STACKSAVE: 2763 return lowerSTACKSAVE(Op, DAG); 2764 case ISD::STACKRESTORE: 2765 return lowerSTACKRESTORE(Op, DAG); 2766 case ISD::PREFETCH: 2767 return lowerPREFETCH(Op, DAG); 2768 case ISD::INTRINSIC_W_CHAIN: 2769 return lowerINTRINSIC_W_CHAIN(Op, DAG); 2770 default: 2771 llvm_unreachable("Unexpected node to lower"); 2772 } 2773 } 2774 2775 const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { 2776 #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME 2777 switch (Opcode) { 2778 OPCODE(RET_FLAG); 2779 OPCODE(CALL); 2780 OPCODE(SIBCALL); 2781 OPCODE(PCREL_WRAPPER); 2782 OPCODE(PCREL_OFFSET); 2783 OPCODE(IABS); 2784 OPCODE(ICMP); 2785 OPCODE(FCMP); 2786 OPCODE(TM); 2787 OPCODE(BR_CCMASK); 2788 OPCODE(SELECT_CCMASK); 2789 OPCODE(ADJDYNALLOC); 2790 OPCODE(EXTRACT_ACCESS); 2791 OPCODE(UMUL_LOHI64); 2792 OPCODE(SDIVREM64); 2793 OPCODE(UDIVREM32); 2794 OPCODE(UDIVREM64); 2795 OPCODE(MVC); 2796 OPCODE(MVC_LOOP); 2797 OPCODE(NC); 2798 OPCODE(NC_LOOP); 2799 OPCODE(OC); 2800 OPCODE(OC_LOOP); 2801 OPCODE(XC); 2802 OPCODE(XC_LOOP); 2803 OPCODE(CLC); 2804 OPCODE(CLC_LOOP); 2805 OPCODE(STRCMP); 2806 OPCODE(STPCPY); 2807 OPCODE(SEARCH_STRING); 2808 OPCODE(IPM); 2809 OPCODE(SERIALIZE); 2810 OPCODE(TBEGIN); 2811 OPCODE(TBEGIN_NOFLOAT); 2812 OPCODE(TEND); 2813 OPCODE(ATOMIC_SWAPW); 2814 OPCODE(ATOMIC_LOADW_ADD); 2815 OPCODE(ATOMIC_LOADW_SUB); 2816 OPCODE(ATOMIC_LOADW_AND); 2817 OPCODE(ATOMIC_LOADW_OR); 2818 OPCODE(ATOMIC_LOADW_XOR); 2819 OPCODE(ATOMIC_LOADW_NAND); 2820 OPCODE(ATOMIC_LOADW_MIN); 2821 OPCODE(ATOMIC_LOADW_MAX); 2822 OPCODE(ATOMIC_LOADW_UMIN); 2823 OPCODE(ATOMIC_LOADW_UMAX); 2824 OPCODE(ATOMIC_CMP_SWAPW); 2825 OPCODE(PREFETCH); 2826 } 2827 return nullptr; 2828 #undef OPCODE 2829 } 2830 2831 SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, 2832 DAGCombinerInfo &DCI) const { 2833 SelectionDAG &DAG = DCI.DAG; 2834 unsigned Opcode = N->getOpcode(); 2835 if (Opcode == ISD::SIGN_EXTEND) { 2836 // Convert (sext (ashr (shl X, C1), C2)) to 2837 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as 2838 // cheap as narrower ones. 2839 SDValue N0 = N->getOperand(0); 2840 EVT VT = N->getValueType(0); 2841 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) { 2842 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 2843 SDValue Inner = N0.getOperand(0); 2844 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) { 2845 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) { 2846 unsigned Extra = (VT.getSizeInBits() - 2847 N0.getValueType().getSizeInBits()); 2848 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra; 2849 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra; 2850 EVT ShiftVT = N0.getOperand(1).getValueType(); 2851 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT, 2852 Inner.getOperand(0)); 2853 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext, 2854 DAG.getConstant(NewShlAmt, ShiftVT)); 2855 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, 2856 DAG.getConstant(NewSraAmt, ShiftVT)); 2857 } 2858 } 2859 } 2860 } 2861 return SDValue(); 2862 } 2863 2864 //===----------------------------------------------------------------------===// 2865 // Custom insertion 2866 //===----------------------------------------------------------------------===// 2867 2868 // Create a new basic block after MBB. 2869 static MachineBasicBlock *emitBlockAfter(MachineBasicBlock *MBB) { 2870 MachineFunction &MF = *MBB->getParent(); 2871 MachineBasicBlock *NewMBB = MF.CreateMachineBasicBlock(MBB->getBasicBlock()); 2872 MF.insert(std::next(MachineFunction::iterator(MBB)), NewMBB); 2873 return NewMBB; 2874 } 2875 2876 // Split MBB after MI and return the new block (the one that contains 2877 // instructions after MI). 2878 static MachineBasicBlock *splitBlockAfter(MachineInstr *MI, 2879 MachineBasicBlock *MBB) { 2880 MachineBasicBlock *NewMBB = emitBlockAfter(MBB); 2881 NewMBB->splice(NewMBB->begin(), MBB, 2882 std::next(MachineBasicBlock::iterator(MI)), MBB->end()); 2883 NewMBB->transferSuccessorsAndUpdatePHIs(MBB); 2884 return NewMBB; 2885 } 2886 2887 // Split MBB before MI and return the new block (the one that contains MI). 2888 static MachineBasicBlock *splitBlockBefore(MachineInstr *MI, 2889 MachineBasicBlock *MBB) { 2890 MachineBasicBlock *NewMBB = emitBlockAfter(MBB); 2891 NewMBB->splice(NewMBB->begin(), MBB, MI, MBB->end()); 2892 NewMBB->transferSuccessorsAndUpdatePHIs(MBB); 2893 return NewMBB; 2894 } 2895 2896 // Force base value Base into a register before MI. Return the register. 2897 static unsigned forceReg(MachineInstr *MI, MachineOperand &Base, 2898 const SystemZInstrInfo *TII) { 2899 if (Base.isReg()) 2900 return Base.getReg(); 2901 2902 MachineBasicBlock *MBB = MI->getParent(); 2903 MachineFunction &MF = *MBB->getParent(); 2904 MachineRegisterInfo &MRI = MF.getRegInfo(); 2905 2906 unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 2907 BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LA), Reg) 2908 .addOperand(Base).addImm(0).addReg(0); 2909 return Reg; 2910 } 2911 2912 // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI. 2913 MachineBasicBlock * 2914 SystemZTargetLowering::emitSelect(MachineInstr *MI, 2915 MachineBasicBlock *MBB) const { 2916 const SystemZInstrInfo *TII = 2917 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); 2918 2919 unsigned DestReg = MI->getOperand(0).getReg(); 2920 unsigned TrueReg = MI->getOperand(1).getReg(); 2921 unsigned FalseReg = MI->getOperand(2).getReg(); 2922 unsigned CCValid = MI->getOperand(3).getImm(); 2923 unsigned CCMask = MI->getOperand(4).getImm(); 2924 DebugLoc DL = MI->getDebugLoc(); 2925 2926 MachineBasicBlock *StartMBB = MBB; 2927 MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB); 2928 MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); 2929 2930 // StartMBB: 2931 // BRC CCMask, JoinMBB 2932 // # fallthrough to FalseMBB 2933 MBB = StartMBB; 2934 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 2935 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); 2936 MBB->addSuccessor(JoinMBB); 2937 MBB->addSuccessor(FalseMBB); 2938 2939 // FalseMBB: 2940 // # fallthrough to JoinMBB 2941 MBB = FalseMBB; 2942 MBB->addSuccessor(JoinMBB); 2943 2944 // JoinMBB: 2945 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ] 2946 // ... 2947 MBB = JoinMBB; 2948 BuildMI(*MBB, MI, DL, TII->get(SystemZ::PHI), DestReg) 2949 .addReg(TrueReg).addMBB(StartMBB) 2950 .addReg(FalseReg).addMBB(FalseMBB); 2951 2952 MI->eraseFromParent(); 2953 return JoinMBB; 2954 } 2955 2956 // Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI. 2957 // StoreOpcode is the store to use and Invert says whether the store should 2958 // happen when the condition is false rather than true. If a STORE ON 2959 // CONDITION is available, STOCOpcode is its opcode, otherwise it is 0. 2960 MachineBasicBlock * 2961 SystemZTargetLowering::emitCondStore(MachineInstr *MI, 2962 MachineBasicBlock *MBB, 2963 unsigned StoreOpcode, unsigned STOCOpcode, 2964 bool Invert) const { 2965 const SystemZInstrInfo *TII = 2966 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); 2967 2968 unsigned SrcReg = MI->getOperand(0).getReg(); 2969 MachineOperand Base = MI->getOperand(1); 2970 int64_t Disp = MI->getOperand(2).getImm(); 2971 unsigned IndexReg = MI->getOperand(3).getReg(); 2972 unsigned CCValid = MI->getOperand(4).getImm(); 2973 unsigned CCMask = MI->getOperand(5).getImm(); 2974 DebugLoc DL = MI->getDebugLoc(); 2975 2976 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp); 2977 2978 // Use STOCOpcode if possible. We could use different store patterns in 2979 // order to avoid matching the index register, but the performance trade-offs 2980 // might be more complicated in that case. 2981 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) { 2982 if (Invert) 2983 CCMask ^= CCValid; 2984 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode)) 2985 .addReg(SrcReg).addOperand(Base).addImm(Disp) 2986 .addImm(CCValid).addImm(CCMask); 2987 MI->eraseFromParent(); 2988 return MBB; 2989 } 2990 2991 // Get the condition needed to branch around the store. 2992 if (!Invert) 2993 CCMask ^= CCValid; 2994 2995 MachineBasicBlock *StartMBB = MBB; 2996 MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB); 2997 MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB); 2998 2999 // StartMBB: 3000 // BRC CCMask, JoinMBB 3001 // # fallthrough to FalseMBB 3002 MBB = StartMBB; 3003 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 3004 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); 3005 MBB->addSuccessor(JoinMBB); 3006 MBB->addSuccessor(FalseMBB); 3007 3008 // FalseMBB: 3009 // store %SrcReg, %Disp(%Index,%Base) 3010 // # fallthrough to JoinMBB 3011 MBB = FalseMBB; 3012 BuildMI(MBB, DL, TII->get(StoreOpcode)) 3013 .addReg(SrcReg).addOperand(Base).addImm(Disp).addReg(IndexReg); 3014 MBB->addSuccessor(JoinMBB); 3015 3016 MI->eraseFromParent(); 3017 return JoinMBB; 3018 } 3019 3020 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_* 3021 // or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that 3022 // performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}. 3023 // BitSize is the width of the field in bits, or 0 if this is a partword 3024 // ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize 3025 // is one of the operands. Invert says whether the field should be 3026 // inverted after performing BinOpcode (e.g. for NAND). 3027 MachineBasicBlock * 3028 SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, 3029 MachineBasicBlock *MBB, 3030 unsigned BinOpcode, 3031 unsigned BitSize, 3032 bool Invert) const { 3033 MachineFunction &MF = *MBB->getParent(); 3034 const SystemZInstrInfo *TII = 3035 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); 3036 MachineRegisterInfo &MRI = MF.getRegInfo(); 3037 bool IsSubWord = (BitSize < 32); 3038 3039 // Extract the operands. Base can be a register or a frame index. 3040 // Src2 can be a register or immediate. 3041 unsigned Dest = MI->getOperand(0).getReg(); 3042 MachineOperand Base = earlyUseOperand(MI->getOperand(1)); 3043 int64_t Disp = MI->getOperand(2).getImm(); 3044 MachineOperand Src2 = earlyUseOperand(MI->getOperand(3)); 3045 unsigned BitShift = (IsSubWord ? MI->getOperand(4).getReg() : 0); 3046 unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0); 3047 DebugLoc DL = MI->getDebugLoc(); 3048 if (IsSubWord) 3049 BitSize = MI->getOperand(6).getImm(); 3050 3051 // Subword operations use 32-bit registers. 3052 const TargetRegisterClass *RC = (BitSize <= 32 ? 3053 &SystemZ::GR32BitRegClass : 3054 &SystemZ::GR64BitRegClass); 3055 unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG; 3056 unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG; 3057 3058 // Get the right opcodes for the displacement. 3059 LOpcode = TII->getOpcodeForOffset(LOpcode, Disp); 3060 CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp); 3061 assert(LOpcode && CSOpcode && "Displacement out of range"); 3062 3063 // Create virtual registers for temporary results. 3064 unsigned OrigVal = MRI.createVirtualRegister(RC); 3065 unsigned OldVal = MRI.createVirtualRegister(RC); 3066 unsigned NewVal = (BinOpcode || IsSubWord ? 3067 MRI.createVirtualRegister(RC) : Src2.getReg()); 3068 unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal); 3069 unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal); 3070 3071 // Insert a basic block for the main loop. 3072 MachineBasicBlock *StartMBB = MBB; 3073 MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); 3074 MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); 3075 3076 // StartMBB: 3077 // ... 3078 // %OrigVal = L Disp(%Base) 3079 // # fall through to LoopMMB 3080 MBB = StartMBB; 3081 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal) 3082 .addOperand(Base).addImm(Disp).addReg(0); 3083 MBB->addSuccessor(LoopMBB); 3084 3085 // LoopMBB: 3086 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ] 3087 // %RotatedOldVal = RLL %OldVal, 0(%BitShift) 3088 // %RotatedNewVal = OP %RotatedOldVal, %Src2 3089 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) 3090 // %Dest = CS %OldVal, %NewVal, Disp(%Base) 3091 // JNE LoopMBB 3092 // # fall through to DoneMMB 3093 MBB = LoopMBB; 3094 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) 3095 .addReg(OrigVal).addMBB(StartMBB) 3096 .addReg(Dest).addMBB(LoopMBB); 3097 if (IsSubWord) 3098 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) 3099 .addReg(OldVal).addReg(BitShift).addImm(0); 3100 if (Invert) { 3101 // Perform the operation normally and then invert every bit of the field. 3102 unsigned Tmp = MRI.createVirtualRegister(RC); 3103 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp) 3104 .addReg(RotatedOldVal).addOperand(Src2); 3105 if (BitSize <= 32) 3106 // XILF with the upper BitSize bits set. 3107 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal) 3108 .addReg(Tmp).addImm(-1U << (32 - BitSize)); 3109 else { 3110 // Use LCGR and add -1 to the result, which is more compact than 3111 // an XILF, XILH pair. 3112 unsigned Tmp2 = MRI.createVirtualRegister(RC); 3113 BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp); 3114 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal) 3115 .addReg(Tmp2).addImm(-1); 3116 } 3117 } else if (BinOpcode) 3118 // A simply binary operation. 3119 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal) 3120 .addReg(RotatedOldVal).addOperand(Src2); 3121 else if (IsSubWord) 3122 // Use RISBG to rotate Src2 into position and use it to replace the 3123 // field in RotatedOldVal. 3124 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal) 3125 .addReg(RotatedOldVal).addReg(Src2.getReg()) 3126 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize); 3127 if (IsSubWord) 3128 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) 3129 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); 3130 BuildMI(MBB, DL, TII->get(CSOpcode), Dest) 3131 .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp); 3132 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 3133 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); 3134 MBB->addSuccessor(LoopMBB); 3135 MBB->addSuccessor(DoneMBB); 3136 3137 MI->eraseFromParent(); 3138 return DoneMBB; 3139 } 3140 3141 // Implement EmitInstrWithCustomInserter for pseudo 3142 // ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI. CompareOpcode is the 3143 // instruction that should be used to compare the current field with the 3144 // minimum or maximum value. KeepOldMask is the BRC condition-code mask 3145 // for when the current field should be kept. BitSize is the width of 3146 // the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction. 3147 MachineBasicBlock * 3148 SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, 3149 MachineBasicBlock *MBB, 3150 unsigned CompareOpcode, 3151 unsigned KeepOldMask, 3152 unsigned BitSize) const { 3153 MachineFunction &MF = *MBB->getParent(); 3154 const SystemZInstrInfo *TII = 3155 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); 3156 MachineRegisterInfo &MRI = MF.getRegInfo(); 3157 bool IsSubWord = (BitSize < 32); 3158 3159 // Extract the operands. Base can be a register or a frame index. 3160 unsigned Dest = MI->getOperand(0).getReg(); 3161 MachineOperand Base = earlyUseOperand(MI->getOperand(1)); 3162 int64_t Disp = MI->getOperand(2).getImm(); 3163 unsigned Src2 = MI->getOperand(3).getReg(); 3164 unsigned BitShift = (IsSubWord ? MI->getOperand(4).getReg() : 0); 3165 unsigned NegBitShift = (IsSubWord ? MI->getOperand(5).getReg() : 0); 3166 DebugLoc DL = MI->getDebugLoc(); 3167 if (IsSubWord) 3168 BitSize = MI->getOperand(6).getImm(); 3169 3170 // Subword operations use 32-bit registers. 3171 const TargetRegisterClass *RC = (BitSize <= 32 ? 3172 &SystemZ::GR32BitRegClass : 3173 &SystemZ::GR64BitRegClass); 3174 unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG; 3175 unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG; 3176 3177 // Get the right opcodes for the displacement. 3178 LOpcode = TII->getOpcodeForOffset(LOpcode, Disp); 3179 CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp); 3180 assert(LOpcode && CSOpcode && "Displacement out of range"); 3181 3182 // Create virtual registers for temporary results. 3183 unsigned OrigVal = MRI.createVirtualRegister(RC); 3184 unsigned OldVal = MRI.createVirtualRegister(RC); 3185 unsigned NewVal = MRI.createVirtualRegister(RC); 3186 unsigned RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal); 3187 unsigned RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2); 3188 unsigned RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal); 3189 3190 // Insert 3 basic blocks for the loop. 3191 MachineBasicBlock *StartMBB = MBB; 3192 MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); 3193 MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); 3194 MachineBasicBlock *UseAltMBB = emitBlockAfter(LoopMBB); 3195 MachineBasicBlock *UpdateMBB = emitBlockAfter(UseAltMBB); 3196 3197 // StartMBB: 3198 // ... 3199 // %OrigVal = L Disp(%Base) 3200 // # fall through to LoopMMB 3201 MBB = StartMBB; 3202 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal) 3203 .addOperand(Base).addImm(Disp).addReg(0); 3204 MBB->addSuccessor(LoopMBB); 3205 3206 // LoopMBB: 3207 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ] 3208 // %RotatedOldVal = RLL %OldVal, 0(%BitShift) 3209 // CompareOpcode %RotatedOldVal, %Src2 3210 // BRC KeepOldMask, UpdateMBB 3211 MBB = LoopMBB; 3212 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) 3213 .addReg(OrigVal).addMBB(StartMBB) 3214 .addReg(Dest).addMBB(UpdateMBB); 3215 if (IsSubWord) 3216 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) 3217 .addReg(OldVal).addReg(BitShift).addImm(0); 3218 BuildMI(MBB, DL, TII->get(CompareOpcode)) 3219 .addReg(RotatedOldVal).addReg(Src2); 3220 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 3221 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB); 3222 MBB->addSuccessor(UpdateMBB); 3223 MBB->addSuccessor(UseAltMBB); 3224 3225 // UseAltMBB: 3226 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0 3227 // # fall through to UpdateMMB 3228 MBB = UseAltMBB; 3229 if (IsSubWord) 3230 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal) 3231 .addReg(RotatedOldVal).addReg(Src2) 3232 .addImm(32).addImm(31 + BitSize).addImm(0); 3233 MBB->addSuccessor(UpdateMBB); 3234 3235 // UpdateMBB: 3236 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ], 3237 // [ %RotatedAltVal, UseAltMBB ] 3238 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) 3239 // %Dest = CS %OldVal, %NewVal, Disp(%Base) 3240 // JNE LoopMBB 3241 // # fall through to DoneMMB 3242 MBB = UpdateMBB; 3243 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal) 3244 .addReg(RotatedOldVal).addMBB(LoopMBB) 3245 .addReg(RotatedAltVal).addMBB(UseAltMBB); 3246 if (IsSubWord) 3247 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) 3248 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); 3249 BuildMI(MBB, DL, TII->get(CSOpcode), Dest) 3250 .addReg(OldVal).addReg(NewVal).addOperand(Base).addImm(Disp); 3251 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 3252 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); 3253 MBB->addSuccessor(LoopMBB); 3254 MBB->addSuccessor(DoneMBB); 3255 3256 MI->eraseFromParent(); 3257 return DoneMBB; 3258 } 3259 3260 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW 3261 // instruction MI. 3262 MachineBasicBlock * 3263 SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, 3264 MachineBasicBlock *MBB) const { 3265 MachineFunction &MF = *MBB->getParent(); 3266 const SystemZInstrInfo *TII = 3267 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); 3268 MachineRegisterInfo &MRI = MF.getRegInfo(); 3269 3270 // Extract the operands. Base can be a register or a frame index. 3271 unsigned Dest = MI->getOperand(0).getReg(); 3272 MachineOperand Base = earlyUseOperand(MI->getOperand(1)); 3273 int64_t Disp = MI->getOperand(2).getImm(); 3274 unsigned OrigCmpVal = MI->getOperand(3).getReg(); 3275 unsigned OrigSwapVal = MI->getOperand(4).getReg(); 3276 unsigned BitShift = MI->getOperand(5).getReg(); 3277 unsigned NegBitShift = MI->getOperand(6).getReg(); 3278 int64_t BitSize = MI->getOperand(7).getImm(); 3279 DebugLoc DL = MI->getDebugLoc(); 3280 3281 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass; 3282 3283 // Get the right opcodes for the displacement. 3284 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); 3285 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); 3286 assert(LOpcode && CSOpcode && "Displacement out of range"); 3287 3288 // Create virtual registers for temporary results. 3289 unsigned OrigOldVal = MRI.createVirtualRegister(RC); 3290 unsigned OldVal = MRI.createVirtualRegister(RC); 3291 unsigned CmpVal = MRI.createVirtualRegister(RC); 3292 unsigned SwapVal = MRI.createVirtualRegister(RC); 3293 unsigned StoreVal = MRI.createVirtualRegister(RC); 3294 unsigned RetryOldVal = MRI.createVirtualRegister(RC); 3295 unsigned RetryCmpVal = MRI.createVirtualRegister(RC); 3296 unsigned RetrySwapVal = MRI.createVirtualRegister(RC); 3297 3298 // Insert 2 basic blocks for the loop. 3299 MachineBasicBlock *StartMBB = MBB; 3300 MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); 3301 MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); 3302 MachineBasicBlock *SetMBB = emitBlockAfter(LoopMBB); 3303 3304 // StartMBB: 3305 // ... 3306 // %OrigOldVal = L Disp(%Base) 3307 // # fall through to LoopMMB 3308 MBB = StartMBB; 3309 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal) 3310 .addOperand(Base).addImm(Disp).addReg(0); 3311 MBB->addSuccessor(LoopMBB); 3312 3313 // LoopMBB: 3314 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ] 3315 // %CmpVal = phi [ %OrigCmpVal, EntryBB ], [ %RetryCmpVal, SetMBB ] 3316 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ] 3317 // %Dest = RLL %OldVal, BitSize(%BitShift) 3318 // ^^ The low BitSize bits contain the field 3319 // of interest. 3320 // %RetryCmpVal = RISBG32 %CmpVal, %Dest, 32, 63-BitSize, 0 3321 // ^^ Replace the upper 32-BitSize bits of the 3322 // comparison value with those that we loaded, 3323 // so that we can use a full word comparison. 3324 // CR %Dest, %RetryCmpVal 3325 // JNE DoneMBB 3326 // # Fall through to SetMBB 3327 MBB = LoopMBB; 3328 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) 3329 .addReg(OrigOldVal).addMBB(StartMBB) 3330 .addReg(RetryOldVal).addMBB(SetMBB); 3331 BuildMI(MBB, DL, TII->get(SystemZ::PHI), CmpVal) 3332 .addReg(OrigCmpVal).addMBB(StartMBB) 3333 .addReg(RetryCmpVal).addMBB(SetMBB); 3334 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal) 3335 .addReg(OrigSwapVal).addMBB(StartMBB) 3336 .addReg(RetrySwapVal).addMBB(SetMBB); 3337 BuildMI(MBB, DL, TII->get(SystemZ::RLL), Dest) 3338 .addReg(OldVal).addReg(BitShift).addImm(BitSize); 3339 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetryCmpVal) 3340 .addReg(CmpVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0); 3341 BuildMI(MBB, DL, TII->get(SystemZ::CR)) 3342 .addReg(Dest).addReg(RetryCmpVal); 3343 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 3344 .addImm(SystemZ::CCMASK_ICMP) 3345 .addImm(SystemZ::CCMASK_CMP_NE).addMBB(DoneMBB); 3346 MBB->addSuccessor(DoneMBB); 3347 MBB->addSuccessor(SetMBB); 3348 3349 // SetMBB: 3350 // %RetrySwapVal = RISBG32 %SwapVal, %Dest, 32, 63-BitSize, 0 3351 // ^^ Replace the upper 32-BitSize bits of the new 3352 // value with those that we loaded. 3353 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift) 3354 // ^^ Rotate the new field to its proper position. 3355 // %RetryOldVal = CS %Dest, %StoreVal, Disp(%Base) 3356 // JNE LoopMBB 3357 // # fall through to ExitMMB 3358 MBB = SetMBB; 3359 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal) 3360 .addReg(SwapVal).addReg(Dest).addImm(32).addImm(63 - BitSize).addImm(0); 3361 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal) 3362 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize); 3363 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal) 3364 .addReg(OldVal).addReg(StoreVal).addOperand(Base).addImm(Disp); 3365 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 3366 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); 3367 MBB->addSuccessor(LoopMBB); 3368 MBB->addSuccessor(DoneMBB); 3369 3370 MI->eraseFromParent(); 3371 return DoneMBB; 3372 } 3373 3374 // Emit an extension from a GR32 or GR64 to a GR128. ClearEven is true 3375 // if the high register of the GR128 value must be cleared or false if 3376 // it's "don't care". SubReg is subreg_l32 when extending a GR32 3377 // and subreg_l64 when extending a GR64. 3378 MachineBasicBlock * 3379 SystemZTargetLowering::emitExt128(MachineInstr *MI, 3380 MachineBasicBlock *MBB, 3381 bool ClearEven, unsigned SubReg) const { 3382 MachineFunction &MF = *MBB->getParent(); 3383 const SystemZInstrInfo *TII = 3384 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); 3385 MachineRegisterInfo &MRI = MF.getRegInfo(); 3386 DebugLoc DL = MI->getDebugLoc(); 3387 3388 unsigned Dest = MI->getOperand(0).getReg(); 3389 unsigned Src = MI->getOperand(1).getReg(); 3390 unsigned In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); 3391 3392 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128); 3393 if (ClearEven) { 3394 unsigned NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); 3395 unsigned Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); 3396 3397 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64) 3398 .addImm(0); 3399 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128) 3400 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64); 3401 In128 = NewIn128; 3402 } 3403 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest) 3404 .addReg(In128).addReg(Src).addImm(SubReg); 3405 3406 MI->eraseFromParent(); 3407 return MBB; 3408 } 3409 3410 MachineBasicBlock * 3411 SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, 3412 MachineBasicBlock *MBB, 3413 unsigned Opcode) const { 3414 MachineFunction &MF = *MBB->getParent(); 3415 const SystemZInstrInfo *TII = 3416 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); 3417 MachineRegisterInfo &MRI = MF.getRegInfo(); 3418 DebugLoc DL = MI->getDebugLoc(); 3419 3420 MachineOperand DestBase = earlyUseOperand(MI->getOperand(0)); 3421 uint64_t DestDisp = MI->getOperand(1).getImm(); 3422 MachineOperand SrcBase = earlyUseOperand(MI->getOperand(2)); 3423 uint64_t SrcDisp = MI->getOperand(3).getImm(); 3424 uint64_t Length = MI->getOperand(4).getImm(); 3425 3426 // When generating more than one CLC, all but the last will need to 3427 // branch to the end when a difference is found. 3428 MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ? 3429 splitBlockAfter(MI, MBB) : nullptr); 3430 3431 // Check for the loop form, in which operand 5 is the trip count. 3432 if (MI->getNumExplicitOperands() > 5) { 3433 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase); 3434 3435 uint64_t StartCountReg = MI->getOperand(5).getReg(); 3436 uint64_t StartSrcReg = forceReg(MI, SrcBase, TII); 3437 uint64_t StartDestReg = (HaveSingleBase ? StartSrcReg : 3438 forceReg(MI, DestBase, TII)); 3439 3440 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass; 3441 uint64_t ThisSrcReg = MRI.createVirtualRegister(RC); 3442 uint64_t ThisDestReg = (HaveSingleBase ? ThisSrcReg : 3443 MRI.createVirtualRegister(RC)); 3444 uint64_t NextSrcReg = MRI.createVirtualRegister(RC); 3445 uint64_t NextDestReg = (HaveSingleBase ? NextSrcReg : 3446 MRI.createVirtualRegister(RC)); 3447 3448 RC = &SystemZ::GR64BitRegClass; 3449 uint64_t ThisCountReg = MRI.createVirtualRegister(RC); 3450 uint64_t NextCountReg = MRI.createVirtualRegister(RC); 3451 3452 MachineBasicBlock *StartMBB = MBB; 3453 MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); 3454 MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); 3455 MachineBasicBlock *NextMBB = (EndMBB ? emitBlockAfter(LoopMBB) : LoopMBB); 3456 3457 // StartMBB: 3458 // # fall through to LoopMMB 3459 MBB->addSuccessor(LoopMBB); 3460 3461 // LoopMBB: 3462 // %ThisDestReg = phi [ %StartDestReg, StartMBB ], 3463 // [ %NextDestReg, NextMBB ] 3464 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ], 3465 // [ %NextSrcReg, NextMBB ] 3466 // %ThisCountReg = phi [ %StartCountReg, StartMBB ], 3467 // [ %NextCountReg, NextMBB ] 3468 // ( PFD 2, 768+DestDisp(%ThisDestReg) ) 3469 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg) 3470 // ( JLH EndMBB ) 3471 // 3472 // The prefetch is used only for MVC. The JLH is used only for CLC. 3473 MBB = LoopMBB; 3474 3475 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg) 3476 .addReg(StartDestReg).addMBB(StartMBB) 3477 .addReg(NextDestReg).addMBB(NextMBB); 3478 if (!HaveSingleBase) 3479 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg) 3480 .addReg(StartSrcReg).addMBB(StartMBB) 3481 .addReg(NextSrcReg).addMBB(NextMBB); 3482 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg) 3483 .addReg(StartCountReg).addMBB(StartMBB) 3484 .addReg(NextCountReg).addMBB(NextMBB); 3485 if (Opcode == SystemZ::MVC) 3486 BuildMI(MBB, DL, TII->get(SystemZ::PFD)) 3487 .addImm(SystemZ::PFD_WRITE) 3488 .addReg(ThisDestReg).addImm(DestDisp + 768).addReg(0); 3489 BuildMI(MBB, DL, TII->get(Opcode)) 3490 .addReg(ThisDestReg).addImm(DestDisp).addImm(256) 3491 .addReg(ThisSrcReg).addImm(SrcDisp); 3492 if (EndMBB) { 3493 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 3494 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) 3495 .addMBB(EndMBB); 3496 MBB->addSuccessor(EndMBB); 3497 MBB->addSuccessor(NextMBB); 3498 } 3499 3500 // NextMBB: 3501 // %NextDestReg = LA 256(%ThisDestReg) 3502 // %NextSrcReg = LA 256(%ThisSrcReg) 3503 // %NextCountReg = AGHI %ThisCountReg, -1 3504 // CGHI %NextCountReg, 0 3505 // JLH LoopMBB 3506 // # fall through to DoneMMB 3507 // 3508 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes. 3509 MBB = NextMBB; 3510 3511 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg) 3512 .addReg(ThisDestReg).addImm(256).addReg(0); 3513 if (!HaveSingleBase) 3514 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg) 3515 .addReg(ThisSrcReg).addImm(256).addReg(0); 3516 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg) 3517 .addReg(ThisCountReg).addImm(-1); 3518 BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) 3519 .addReg(NextCountReg).addImm(0); 3520 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 3521 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) 3522 .addMBB(LoopMBB); 3523 MBB->addSuccessor(LoopMBB); 3524 MBB->addSuccessor(DoneMBB); 3525 3526 DestBase = MachineOperand::CreateReg(NextDestReg, false); 3527 SrcBase = MachineOperand::CreateReg(NextSrcReg, false); 3528 Length &= 255; 3529 MBB = DoneMBB; 3530 } 3531 // Handle any remaining bytes with straight-line code. 3532 while (Length > 0) { 3533 uint64_t ThisLength = std::min(Length, uint64_t(256)); 3534 // The previous iteration might have created out-of-range displacements. 3535 // Apply them using LAY if so. 3536 if (!isUInt<12>(DestDisp)) { 3537 unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 3538 BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg) 3539 .addOperand(DestBase).addImm(DestDisp).addReg(0); 3540 DestBase = MachineOperand::CreateReg(Reg, false); 3541 DestDisp = 0; 3542 } 3543 if (!isUInt<12>(SrcDisp)) { 3544 unsigned Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 3545 BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(SystemZ::LAY), Reg) 3546 .addOperand(SrcBase).addImm(SrcDisp).addReg(0); 3547 SrcBase = MachineOperand::CreateReg(Reg, false); 3548 SrcDisp = 0; 3549 } 3550 BuildMI(*MBB, MI, DL, TII->get(Opcode)) 3551 .addOperand(DestBase).addImm(DestDisp).addImm(ThisLength) 3552 .addOperand(SrcBase).addImm(SrcDisp); 3553 DestDisp += ThisLength; 3554 SrcDisp += ThisLength; 3555 Length -= ThisLength; 3556 // If there's another CLC to go, branch to the end if a difference 3557 // was found. 3558 if (EndMBB && Length > 0) { 3559 MachineBasicBlock *NextMBB = splitBlockBefore(MI, MBB); 3560 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 3561 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) 3562 .addMBB(EndMBB); 3563 MBB->addSuccessor(EndMBB); 3564 MBB->addSuccessor(NextMBB); 3565 MBB = NextMBB; 3566 } 3567 } 3568 if (EndMBB) { 3569 MBB->addSuccessor(EndMBB); 3570 MBB = EndMBB; 3571 MBB->addLiveIn(SystemZ::CC); 3572 } 3573 3574 MI->eraseFromParent(); 3575 return MBB; 3576 } 3577 3578 // Decompose string pseudo-instruction MI into a loop that continually performs 3579 // Opcode until CC != 3. 3580 MachineBasicBlock * 3581 SystemZTargetLowering::emitStringWrapper(MachineInstr *MI, 3582 MachineBasicBlock *MBB, 3583 unsigned Opcode) const { 3584 MachineFunction &MF = *MBB->getParent(); 3585 const SystemZInstrInfo *TII = 3586 static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo()); 3587 MachineRegisterInfo &MRI = MF.getRegInfo(); 3588 DebugLoc DL = MI->getDebugLoc(); 3589 3590 uint64_t End1Reg = MI->getOperand(0).getReg(); 3591 uint64_t Start1Reg = MI->getOperand(1).getReg(); 3592 uint64_t Start2Reg = MI->getOperand(2).getReg(); 3593 uint64_t CharReg = MI->getOperand(3).getReg(); 3594 3595 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass; 3596 uint64_t This1Reg = MRI.createVirtualRegister(RC); 3597 uint64_t This2Reg = MRI.createVirtualRegister(RC); 3598 uint64_t End2Reg = MRI.createVirtualRegister(RC); 3599 3600 MachineBasicBlock *StartMBB = MBB; 3601 MachineBasicBlock *DoneMBB = splitBlockBefore(MI, MBB); 3602 MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB); 3603 3604 // StartMBB: 3605 // # fall through to LoopMMB 3606 MBB->addSuccessor(LoopMBB); 3607 3608 // LoopMBB: 3609 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ] 3610 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ] 3611 // R0L = %CharReg 3612 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L 3613 // JO LoopMBB 3614 // # fall through to DoneMMB 3615 // 3616 // The load of R0L can be hoisted by post-RA LICM. 3617 MBB = LoopMBB; 3618 3619 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg) 3620 .addReg(Start1Reg).addMBB(StartMBB) 3621 .addReg(End1Reg).addMBB(LoopMBB); 3622 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg) 3623 .addReg(Start2Reg).addMBB(StartMBB) 3624 .addReg(End2Reg).addMBB(LoopMBB); 3625 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg); 3626 BuildMI(MBB, DL, TII->get(Opcode)) 3627 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define) 3628 .addReg(This1Reg).addReg(This2Reg); 3629 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 3630 .addImm(SystemZ::CCMASK_ANY).addImm(SystemZ::CCMASK_3).addMBB(LoopMBB); 3631 MBB->addSuccessor(LoopMBB); 3632 MBB->addSuccessor(DoneMBB); 3633 3634 DoneMBB->addLiveIn(SystemZ::CC); 3635 3636 MI->eraseFromParent(); 3637 return DoneMBB; 3638 } 3639 3640 // Update TBEGIN instruction with final opcode and register clobbers. 3641 MachineBasicBlock * 3642 SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI, 3643 MachineBasicBlock *MBB, 3644 unsigned Opcode, 3645 bool NoFloat) const { 3646 MachineFunction &MF = *MBB->getParent(); 3647 const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); 3648 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 3649 3650 // Update opcode. 3651 MI->setDesc(TII->get(Opcode)); 3652 3653 // We cannot handle a TBEGIN that clobbers the stack or frame pointer. 3654 // Make sure to add the corresponding GRSM bits if they are missing. 3655 uint64_t Control = MI->getOperand(2).getImm(); 3656 static const unsigned GPRControlBit[16] = { 3657 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000, 3658 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100 3659 }; 3660 Control |= GPRControlBit[15]; 3661 if (TFI->hasFP(MF)) 3662 Control |= GPRControlBit[11]; 3663 MI->getOperand(2).setImm(Control); 3664 3665 // Add GPR clobbers. 3666 for (int I = 0; I < 16; I++) { 3667 if ((Control & GPRControlBit[I]) == 0) { 3668 unsigned Reg = SystemZMC::GR64Regs[I]; 3669 MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); 3670 } 3671 } 3672 3673 // Add FPR clobbers. 3674 if (!NoFloat && (Control & 4) != 0) { 3675 for (int I = 0; I < 16; I++) { 3676 unsigned Reg = SystemZMC::FP64Regs[I]; 3677 MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); 3678 } 3679 } 3680 3681 return MBB; 3682 } 3683 3684 MachineBasicBlock *SystemZTargetLowering:: 3685 EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { 3686 switch (MI->getOpcode()) { 3687 case SystemZ::Select32Mux: 3688 case SystemZ::Select32: 3689 case SystemZ::SelectF32: 3690 case SystemZ::Select64: 3691 case SystemZ::SelectF64: 3692 case SystemZ::SelectF128: 3693 return emitSelect(MI, MBB); 3694 3695 case SystemZ::CondStore8Mux: 3696 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false); 3697 case SystemZ::CondStore8MuxInv: 3698 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true); 3699 case SystemZ::CondStore16Mux: 3700 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false); 3701 case SystemZ::CondStore16MuxInv: 3702 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true); 3703 case SystemZ::CondStore8: 3704 return emitCondStore(MI, MBB, SystemZ::STC, 0, false); 3705 case SystemZ::CondStore8Inv: 3706 return emitCondStore(MI, MBB, SystemZ::STC, 0, true); 3707 case SystemZ::CondStore16: 3708 return emitCondStore(MI, MBB, SystemZ::STH, 0, false); 3709 case SystemZ::CondStore16Inv: 3710 return emitCondStore(MI, MBB, SystemZ::STH, 0, true); 3711 case SystemZ::CondStore32: 3712 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false); 3713 case SystemZ::CondStore32Inv: 3714 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true); 3715 case SystemZ::CondStore64: 3716 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false); 3717 case SystemZ::CondStore64Inv: 3718 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true); 3719 case SystemZ::CondStoreF32: 3720 return emitCondStore(MI, MBB, SystemZ::STE, 0, false); 3721 case SystemZ::CondStoreF32Inv: 3722 return emitCondStore(MI, MBB, SystemZ::STE, 0, true); 3723 case SystemZ::CondStoreF64: 3724 return emitCondStore(MI, MBB, SystemZ::STD, 0, false); 3725 case SystemZ::CondStoreF64Inv: 3726 return emitCondStore(MI, MBB, SystemZ::STD, 0, true); 3727 3728 case SystemZ::AEXT128_64: 3729 return emitExt128(MI, MBB, false, SystemZ::subreg_l64); 3730 case SystemZ::ZEXT128_32: 3731 return emitExt128(MI, MBB, true, SystemZ::subreg_l32); 3732 case SystemZ::ZEXT128_64: 3733 return emitExt128(MI, MBB, true, SystemZ::subreg_l64); 3734 3735 case SystemZ::ATOMIC_SWAPW: 3736 return emitAtomicLoadBinary(MI, MBB, 0, 0); 3737 case SystemZ::ATOMIC_SWAP_32: 3738 return emitAtomicLoadBinary(MI, MBB, 0, 32); 3739 case SystemZ::ATOMIC_SWAP_64: 3740 return emitAtomicLoadBinary(MI, MBB, 0, 64); 3741 3742 case SystemZ::ATOMIC_LOADW_AR: 3743 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 0); 3744 case SystemZ::ATOMIC_LOADW_AFI: 3745 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 0); 3746 case SystemZ::ATOMIC_LOAD_AR: 3747 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 32); 3748 case SystemZ::ATOMIC_LOAD_AHI: 3749 return emitAtomicLoadBinary(MI, MBB, SystemZ::AHI, 32); 3750 case SystemZ::ATOMIC_LOAD_AFI: 3751 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 32); 3752 case SystemZ::ATOMIC_LOAD_AGR: 3753 return emitAtomicLoadBinary(MI, MBB, SystemZ::AGR, 64); 3754 case SystemZ::ATOMIC_LOAD_AGHI: 3755 return emitAtomicLoadBinary(MI, MBB, SystemZ::AGHI, 64); 3756 case SystemZ::ATOMIC_LOAD_AGFI: 3757 return emitAtomicLoadBinary(MI, MBB, SystemZ::AGFI, 64); 3758 3759 case SystemZ::ATOMIC_LOADW_SR: 3760 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 0); 3761 case SystemZ::ATOMIC_LOAD_SR: 3762 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 32); 3763 case SystemZ::ATOMIC_LOAD_SGR: 3764 return emitAtomicLoadBinary(MI, MBB, SystemZ::SGR, 64); 3765 3766 case SystemZ::ATOMIC_LOADW_NR: 3767 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0); 3768 case SystemZ::ATOMIC_LOADW_NILH: 3769 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0); 3770 case SystemZ::ATOMIC_LOAD_NR: 3771 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32); 3772 case SystemZ::ATOMIC_LOAD_NILL: 3773 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32); 3774 case SystemZ::ATOMIC_LOAD_NILH: 3775 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32); 3776 case SystemZ::ATOMIC_LOAD_NILF: 3777 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32); 3778 case SystemZ::ATOMIC_LOAD_NGR: 3779 return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64); 3780 case SystemZ::ATOMIC_LOAD_NILL64: 3781 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64); 3782 case SystemZ::ATOMIC_LOAD_NILH64: 3783 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64); 3784 case SystemZ::ATOMIC_LOAD_NIHL64: 3785 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64); 3786 case SystemZ::ATOMIC_LOAD_NIHH64: 3787 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64); 3788 case SystemZ::ATOMIC_LOAD_NILF64: 3789 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64); 3790 case SystemZ::ATOMIC_LOAD_NIHF64: 3791 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64); 3792 3793 case SystemZ::ATOMIC_LOADW_OR: 3794 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0); 3795 case SystemZ::ATOMIC_LOADW_OILH: 3796 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 0); 3797 case SystemZ::ATOMIC_LOAD_OR: 3798 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32); 3799 case SystemZ::ATOMIC_LOAD_OILL: 3800 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 32); 3801 case SystemZ::ATOMIC_LOAD_OILH: 3802 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 32); 3803 case SystemZ::ATOMIC_LOAD_OILF: 3804 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 32); 3805 case SystemZ::ATOMIC_LOAD_OGR: 3806 return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64); 3807 case SystemZ::ATOMIC_LOAD_OILL64: 3808 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL64, 64); 3809 case SystemZ::ATOMIC_LOAD_OILH64: 3810 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH64, 64); 3811 case SystemZ::ATOMIC_LOAD_OIHL64: 3812 return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL64, 64); 3813 case SystemZ::ATOMIC_LOAD_OIHH64: 3814 return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH64, 64); 3815 case SystemZ::ATOMIC_LOAD_OILF64: 3816 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF64, 64); 3817 case SystemZ::ATOMIC_LOAD_OIHF64: 3818 return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF64, 64); 3819 3820 case SystemZ::ATOMIC_LOADW_XR: 3821 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0); 3822 case SystemZ::ATOMIC_LOADW_XILF: 3823 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 0); 3824 case SystemZ::ATOMIC_LOAD_XR: 3825 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32); 3826 case SystemZ::ATOMIC_LOAD_XILF: 3827 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 32); 3828 case SystemZ::ATOMIC_LOAD_XGR: 3829 return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64); 3830 case SystemZ::ATOMIC_LOAD_XILF64: 3831 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF64, 64); 3832 case SystemZ::ATOMIC_LOAD_XIHF64: 3833 return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF64, 64); 3834 3835 case SystemZ::ATOMIC_LOADW_NRi: 3836 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true); 3837 case SystemZ::ATOMIC_LOADW_NILHi: 3838 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0, true); 3839 case SystemZ::ATOMIC_LOAD_NRi: 3840 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true); 3841 case SystemZ::ATOMIC_LOAD_NILLi: 3842 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32, true); 3843 case SystemZ::ATOMIC_LOAD_NILHi: 3844 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32, true); 3845 case SystemZ::ATOMIC_LOAD_NILFi: 3846 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32, true); 3847 case SystemZ::ATOMIC_LOAD_NGRi: 3848 return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true); 3849 case SystemZ::ATOMIC_LOAD_NILL64i: 3850 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64, true); 3851 case SystemZ::ATOMIC_LOAD_NILH64i: 3852 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64, true); 3853 case SystemZ::ATOMIC_LOAD_NIHL64i: 3854 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64, true); 3855 case SystemZ::ATOMIC_LOAD_NIHH64i: 3856 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64, true); 3857 case SystemZ::ATOMIC_LOAD_NILF64i: 3858 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64, true); 3859 case SystemZ::ATOMIC_LOAD_NIHF64i: 3860 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64, true); 3861 3862 case SystemZ::ATOMIC_LOADW_MIN: 3863 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, 3864 SystemZ::CCMASK_CMP_LE, 0); 3865 case SystemZ::ATOMIC_LOAD_MIN_32: 3866 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, 3867 SystemZ::CCMASK_CMP_LE, 32); 3868 case SystemZ::ATOMIC_LOAD_MIN_64: 3869 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR, 3870 SystemZ::CCMASK_CMP_LE, 64); 3871 3872 case SystemZ::ATOMIC_LOADW_MAX: 3873 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, 3874 SystemZ::CCMASK_CMP_GE, 0); 3875 case SystemZ::ATOMIC_LOAD_MAX_32: 3876 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, 3877 SystemZ::CCMASK_CMP_GE, 32); 3878 case SystemZ::ATOMIC_LOAD_MAX_64: 3879 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR, 3880 SystemZ::CCMASK_CMP_GE, 64); 3881 3882 case SystemZ::ATOMIC_LOADW_UMIN: 3883 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, 3884 SystemZ::CCMASK_CMP_LE, 0); 3885 case SystemZ::ATOMIC_LOAD_UMIN_32: 3886 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, 3887 SystemZ::CCMASK_CMP_LE, 32); 3888 case SystemZ::ATOMIC_LOAD_UMIN_64: 3889 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR, 3890 SystemZ::CCMASK_CMP_LE, 64); 3891 3892 case SystemZ::ATOMIC_LOADW_UMAX: 3893 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, 3894 SystemZ::CCMASK_CMP_GE, 0); 3895 case SystemZ::ATOMIC_LOAD_UMAX_32: 3896 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, 3897 SystemZ::CCMASK_CMP_GE, 32); 3898 case SystemZ::ATOMIC_LOAD_UMAX_64: 3899 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR, 3900 SystemZ::CCMASK_CMP_GE, 64); 3901 3902 case SystemZ::ATOMIC_CMP_SWAPW: 3903 return emitAtomicCmpSwapW(MI, MBB); 3904 case SystemZ::MVCSequence: 3905 case SystemZ::MVCLoop: 3906 return emitMemMemWrapper(MI, MBB, SystemZ::MVC); 3907 case SystemZ::NCSequence: 3908 case SystemZ::NCLoop: 3909 return emitMemMemWrapper(MI, MBB, SystemZ::NC); 3910 case SystemZ::OCSequence: 3911 case SystemZ::OCLoop: 3912 return emitMemMemWrapper(MI, MBB, SystemZ::OC); 3913 case SystemZ::XCSequence: 3914 case SystemZ::XCLoop: 3915 return emitMemMemWrapper(MI, MBB, SystemZ::XC); 3916 case SystemZ::CLCSequence: 3917 case SystemZ::CLCLoop: 3918 return emitMemMemWrapper(MI, MBB, SystemZ::CLC); 3919 case SystemZ::CLSTLoop: 3920 return emitStringWrapper(MI, MBB, SystemZ::CLST); 3921 case SystemZ::MVSTLoop: 3922 return emitStringWrapper(MI, MBB, SystemZ::MVST); 3923 case SystemZ::SRSTLoop: 3924 return emitStringWrapper(MI, MBB, SystemZ::SRST); 3925 case SystemZ::TBEGIN: 3926 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false); 3927 case SystemZ::TBEGIN_nofloat: 3928 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true); 3929 case SystemZ::TBEGINC: 3930 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true); 3931 default: 3932 llvm_unreachable("Unexpected instr type to insert"); 3933 } 3934 } 3935