1 //===- InstCombineCalls.cpp -----------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the visitCall and visitInvoke functions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "InstCombine.h" 15 #include "llvm/ADT/Statistic.h" 16 #include "llvm/Analysis/MemoryBuiltins.h" 17 #include "llvm/IR/CallSite.h" 18 #include "llvm/IR/DataLayout.h" 19 #include "llvm/IR/PatternMatch.h" 20 #include "llvm/Transforms/Utils/BuildLibCalls.h" 21 #include "llvm/Transforms/Utils/Local.h" 22 using namespace llvm; 23 using namespace PatternMatch; 24 25 #define DEBUG_TYPE "instcombine" 26 27 STATISTIC(NumSimplified, "Number of library calls simplified"); 28 29 /// getPromotedType - Return the specified type promoted as it would be to pass 30 /// though a va_arg area. 31 static Type *getPromotedType(Type *Ty) { 32 if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) { 33 if (ITy->getBitWidth() < 32) 34 return Type::getInt32Ty(Ty->getContext()); 35 } 36 return Ty; 37 } 38 39 /// reduceToSingleValueType - Given an aggregate type which ultimately holds a 40 /// single scalar element, like {{{type}}} or [1 x type], return type. 41 static Type *reduceToSingleValueType(Type *T) { 42 while (!T->isSingleValueType()) { 43 if (StructType *STy = dyn_cast<StructType>(T)) { 44 if (STy->getNumElements() == 1) 45 T = STy->getElementType(0); 46 else 47 break; 48 } else if (ArrayType *ATy = dyn_cast<ArrayType>(T)) { 49 if (ATy->getNumElements() == 1) 50 T = ATy->getElementType(); 51 else 52 break; 53 } else 54 break; 55 } 56 57 return T; 58 } 59 60 Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { 61 unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL); 62 unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL); 63 unsigned MinAlign = std::min(DstAlign, SrcAlign); 64 unsigned CopyAlign = MI->getAlignment(); 65 66 if (CopyAlign < MinAlign) { 67 MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), 68 MinAlign, false)); 69 return MI; 70 } 71 72 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with 73 // load/store. 74 ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2)); 75 if (!MemOpLength) return nullptr; 76 77 // Source and destination pointer types are always "i8*" for intrinsic. See 78 // if the size is something we can handle with a single primitive load/store. 79 // A single load+store correctly handles overlapping memory in the memmove 80 // case. 81 uint64_t Size = MemOpLength->getLimitedValue(); 82 assert(Size && "0-sized memory transferring should be removed already."); 83 84 if (Size > 8 || (Size&(Size-1))) 85 return nullptr; // If not 1/2/4/8 bytes, exit. 86 87 // Use an integer load+store unless we can find something better. 88 unsigned SrcAddrSp = 89 cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace(); 90 unsigned DstAddrSp = 91 cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace(); 92 93 IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); 94 Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp); 95 Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp); 96 97 // Memcpy forces the use of i8* for the source and destination. That means 98 // that if you're using memcpy to move one double around, you'll get a cast 99 // from double* to i8*. We'd much rather use a double load+store rather than 100 // an i64 load+store, here because this improves the odds that the source or 101 // dest address will be promotable. See if we can find a better type than the 102 // integer datatype. 103 Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts(); 104 MDNode *CopyMD = nullptr; 105 if (StrippedDest != MI->getArgOperand(0)) { 106 Type *SrcETy = cast<PointerType>(StrippedDest->getType()) 107 ->getElementType(); 108 if (DL && SrcETy->isSized() && DL->getTypeStoreSize(SrcETy) == Size) { 109 // The SrcETy might be something like {{{double}}} or [1 x double]. Rip 110 // down through these levels if so. 111 SrcETy = reduceToSingleValueType(SrcETy); 112 113 if (SrcETy->isSingleValueType()) { 114 NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp); 115 NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp); 116 117 // If the memcpy has metadata describing the members, see if we can 118 // get the TBAA tag describing our copy. 119 if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) { 120 if (M->getNumOperands() == 3 && 121 M->getOperand(0) && 122 isa<ConstantInt>(M->getOperand(0)) && 123 cast<ConstantInt>(M->getOperand(0))->isNullValue() && 124 M->getOperand(1) && 125 isa<ConstantInt>(M->getOperand(1)) && 126 cast<ConstantInt>(M->getOperand(1))->getValue() == Size && 127 M->getOperand(2) && 128 isa<MDNode>(M->getOperand(2))) 129 CopyMD = cast<MDNode>(M->getOperand(2)); 130 } 131 } 132 } 133 } 134 135 // If the memcpy/memmove provides better alignment info than we can 136 // infer, use it. 137 SrcAlign = std::max(SrcAlign, CopyAlign); 138 DstAlign = std::max(DstAlign, CopyAlign); 139 140 Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy); 141 Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); 142 LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile()); 143 L->setAlignment(SrcAlign); 144 if (CopyMD) 145 L->setMetadata(LLVMContext::MD_tbaa, CopyMD); 146 StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile()); 147 S->setAlignment(DstAlign); 148 if (CopyMD) 149 S->setMetadata(LLVMContext::MD_tbaa, CopyMD); 150 151 // Set the size of the copy to 0, it will be deleted on the next iteration. 152 MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType())); 153 return MI; 154 } 155 156 Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { 157 unsigned Alignment = getKnownAlignment(MI->getDest(), DL); 158 if (MI->getAlignment() < Alignment) { 159 MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), 160 Alignment, false)); 161 return MI; 162 } 163 164 // Extract the length and alignment and fill if they are constant. 165 ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength()); 166 ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue()); 167 if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8)) 168 return nullptr; 169 uint64_t Len = LenC->getLimitedValue(); 170 Alignment = MI->getAlignment(); 171 assert(Len && "0-sized memory setting should be removed already."); 172 173 // memset(s,c,n) -> store s, c (for n=1,2,4,8) 174 if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) { 175 Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8. 176 177 Value *Dest = MI->getDest(); 178 unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace(); 179 Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp); 180 Dest = Builder->CreateBitCast(Dest, NewDstPtrTy); 181 182 // Alignment 0 is identity for alignment 1 for memset, but not store. 183 if (Alignment == 0) Alignment = 1; 184 185 // Extract the fill value and store. 186 uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL; 187 StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest, 188 MI->isVolatile()); 189 S->setAlignment(Alignment); 190 191 // Set the size of the copy to 0, it will be deleted on the next iteration. 192 MI->setLength(Constant::getNullValue(LenC->getType())); 193 return MI; 194 } 195 196 return nullptr; 197 } 198 199 /// visitCallInst - CallInst simplification. This mostly only handles folding 200 /// of intrinsic instructions. For normal calls, it allows visitCallSite to do 201 /// the heavy lifting. 202 /// 203 Instruction *InstCombiner::visitCallInst(CallInst &CI) { 204 if (isFreeCall(&CI, TLI)) 205 return visitFree(CI); 206 207 // If the caller function is nounwind, mark the call as nounwind, even if the 208 // callee isn't. 209 if (CI.getParent()->getParent()->doesNotThrow() && 210 !CI.doesNotThrow()) { 211 CI.setDoesNotThrow(); 212 return &CI; 213 } 214 215 IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI); 216 if (!II) return visitCallSite(&CI); 217 218 // Intrinsics cannot occur in an invoke, so handle them here instead of in 219 // visitCallSite. 220 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) { 221 bool Changed = false; 222 223 // memmove/cpy/set of zero bytes is a noop. 224 if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) { 225 if (NumBytes->isNullValue()) 226 return EraseInstFromFunction(CI); 227 228 if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) 229 if (CI->getZExtValue() == 1) { 230 // Replace the instruction with just byte operations. We would 231 // transform other cases to loads/stores, but we don't know if 232 // alignment is sufficient. 233 } 234 } 235 236 // No other transformations apply to volatile transfers. 237 if (MI->isVolatile()) 238 return nullptr; 239 240 // If we have a memmove and the source operation is a constant global, 241 // then the source and dest pointers can't alias, so we can change this 242 // into a call to memcpy. 243 if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) { 244 if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource())) 245 if (GVSrc->isConstant()) { 246 Module *M = CI.getParent()->getParent()->getParent(); 247 Intrinsic::ID MemCpyID = Intrinsic::memcpy; 248 Type *Tys[3] = { CI.getArgOperand(0)->getType(), 249 CI.getArgOperand(1)->getType(), 250 CI.getArgOperand(2)->getType() }; 251 CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys)); 252 Changed = true; 253 } 254 } 255 256 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { 257 // memmove(x,x,size) -> noop. 258 if (MTI->getSource() == MTI->getDest()) 259 return EraseInstFromFunction(CI); 260 } 261 262 // If we can determine a pointer alignment that is bigger than currently 263 // set, update the alignment. 264 if (isa<MemTransferInst>(MI)) { 265 if (Instruction *I = SimplifyMemTransfer(MI)) 266 return I; 267 } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) { 268 if (Instruction *I = SimplifyMemSet(MSI)) 269 return I; 270 } 271 272 if (Changed) return II; 273 } 274 275 switch (II->getIntrinsicID()) { 276 default: break; 277 case Intrinsic::objectsize: { 278 uint64_t Size; 279 if (getObjectSize(II->getArgOperand(0), Size, DL, TLI)) 280 return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size)); 281 return nullptr; 282 } 283 case Intrinsic::bswap: { 284 Value *IIOperand = II->getArgOperand(0); 285 Value *X = nullptr; 286 287 // bswap(bswap(x)) -> x 288 if (match(IIOperand, m_BSwap(m_Value(X)))) 289 return ReplaceInstUsesWith(CI, X); 290 291 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) 292 if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) { 293 unsigned C = X->getType()->getPrimitiveSizeInBits() - 294 IIOperand->getType()->getPrimitiveSizeInBits(); 295 Value *CV = ConstantInt::get(X->getType(), C); 296 Value *V = Builder->CreateLShr(X, CV); 297 return new TruncInst(V, IIOperand->getType()); 298 } 299 break; 300 } 301 302 case Intrinsic::powi: 303 if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) { 304 // powi(x, 0) -> 1.0 305 if (Power->isZero()) 306 return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0)); 307 // powi(x, 1) -> x 308 if (Power->isOne()) 309 return ReplaceInstUsesWith(CI, II->getArgOperand(0)); 310 // powi(x, -1) -> 1/x 311 if (Power->isAllOnesValue()) 312 return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0), 313 II->getArgOperand(0)); 314 } 315 break; 316 case Intrinsic::cttz: { 317 // If all bits below the first known one are known zero, 318 // this value is constant. 319 IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType()); 320 // FIXME: Try to simplify vectors of integers. 321 if (!IT) break; 322 uint32_t BitWidth = IT->getBitWidth(); 323 APInt KnownZero(BitWidth, 0); 324 APInt KnownOne(BitWidth, 0); 325 computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne); 326 unsigned TrailingZeros = KnownOne.countTrailingZeros(); 327 APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros)); 328 if ((Mask & KnownZero) == Mask) 329 return ReplaceInstUsesWith(CI, ConstantInt::get(IT, 330 APInt(BitWidth, TrailingZeros))); 331 332 } 333 break; 334 case Intrinsic::ctlz: { 335 // If all bits above the first known one are known zero, 336 // this value is constant. 337 IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType()); 338 // FIXME: Try to simplify vectors of integers. 339 if (!IT) break; 340 uint32_t BitWidth = IT->getBitWidth(); 341 APInt KnownZero(BitWidth, 0); 342 APInt KnownOne(BitWidth, 0); 343 computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne); 344 unsigned LeadingZeros = KnownOne.countLeadingZeros(); 345 APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros)); 346 if ((Mask & KnownZero) == Mask) 347 return ReplaceInstUsesWith(CI, ConstantInt::get(IT, 348 APInt(BitWidth, LeadingZeros))); 349 350 } 351 break; 352 case Intrinsic::uadd_with_overflow: { 353 Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); 354 IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType()); 355 uint32_t BitWidth = IT->getBitWidth(); 356 APInt LHSKnownZero(BitWidth, 0); 357 APInt LHSKnownOne(BitWidth, 0); 358 computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); 359 bool LHSKnownNegative = LHSKnownOne[BitWidth - 1]; 360 bool LHSKnownPositive = LHSKnownZero[BitWidth - 1]; 361 362 if (LHSKnownNegative || LHSKnownPositive) { 363 APInt RHSKnownZero(BitWidth, 0); 364 APInt RHSKnownOne(BitWidth, 0); 365 computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); 366 bool RHSKnownNegative = RHSKnownOne[BitWidth - 1]; 367 bool RHSKnownPositive = RHSKnownZero[BitWidth - 1]; 368 if (LHSKnownNegative && RHSKnownNegative) { 369 // The sign bit is set in both cases: this MUST overflow. 370 // Create a simple add instruction, and insert it into the struct. 371 Value *Add = Builder->CreateAdd(LHS, RHS); 372 Add->takeName(&CI); 373 Constant *V[] = { 374 UndefValue::get(LHS->getType()), 375 ConstantInt::getTrue(II->getContext()) 376 }; 377 StructType *ST = cast<StructType>(II->getType()); 378 Constant *Struct = ConstantStruct::get(ST, V); 379 return InsertValueInst::Create(Struct, Add, 0); 380 } 381 382 if (LHSKnownPositive && RHSKnownPositive) { 383 // The sign bit is clear in both cases: this CANNOT overflow. 384 // Create a simple add instruction, and insert it into the struct. 385 Value *Add = Builder->CreateNUWAdd(LHS, RHS); 386 Add->takeName(&CI); 387 Constant *V[] = { 388 UndefValue::get(LHS->getType()), 389 ConstantInt::getFalse(II->getContext()) 390 }; 391 StructType *ST = cast<StructType>(II->getType()); 392 Constant *Struct = ConstantStruct::get(ST, V); 393 return InsertValueInst::Create(Struct, Add, 0); 394 } 395 } 396 } 397 // FALL THROUGH uadd into sadd 398 case Intrinsic::sadd_with_overflow: 399 // Canonicalize constants into the RHS. 400 if (isa<Constant>(II->getArgOperand(0)) && 401 !isa<Constant>(II->getArgOperand(1))) { 402 Value *LHS = II->getArgOperand(0); 403 II->setArgOperand(0, II->getArgOperand(1)); 404 II->setArgOperand(1, LHS); 405 return II; 406 } 407 408 // X + undef -> undef 409 if (isa<UndefValue>(II->getArgOperand(1))) 410 return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); 411 412 if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) { 413 // X + 0 -> {X, false} 414 if (RHS->isZero()) { 415 Constant *V[] = { 416 UndefValue::get(II->getArgOperand(0)->getType()), 417 ConstantInt::getFalse(II->getContext()) 418 }; 419 Constant *Struct = 420 ConstantStruct::get(cast<StructType>(II->getType()), V); 421 return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); 422 } 423 } 424 425 // We can strength reduce reduce this signed add into a regular add if we 426 // can prove that it will never overflow. 427 if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow) { 428 Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); 429 if (WillNotOverflowSignedAdd(LHS, RHS)) { 430 Value *Add = Builder->CreateNSWAdd(LHS, RHS); 431 Add->takeName(&CI); 432 Constant *V[] = {UndefValue::get(Add->getType()), Builder->getFalse()}; 433 StructType *ST = cast<StructType>(II->getType()); 434 Constant *Struct = ConstantStruct::get(ST, V); 435 return InsertValueInst::Create(Struct, Add, 0); 436 } 437 } 438 439 break; 440 case Intrinsic::usub_with_overflow: 441 case Intrinsic::ssub_with_overflow: 442 // undef - X -> undef 443 // X - undef -> undef 444 if (isa<UndefValue>(II->getArgOperand(0)) || 445 isa<UndefValue>(II->getArgOperand(1))) 446 return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); 447 448 if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) { 449 // X - 0 -> {X, false} 450 if (RHS->isZero()) { 451 Constant *V[] = { 452 UndefValue::get(II->getArgOperand(0)->getType()), 453 ConstantInt::getFalse(II->getContext()) 454 }; 455 Constant *Struct = 456 ConstantStruct::get(cast<StructType>(II->getType()), V); 457 return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); 458 } 459 } 460 break; 461 case Intrinsic::umul_with_overflow: { 462 Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); 463 unsigned BitWidth = cast<IntegerType>(LHS->getType())->getBitWidth(); 464 465 APInt LHSKnownZero(BitWidth, 0); 466 APInt LHSKnownOne(BitWidth, 0); 467 computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); 468 APInt RHSKnownZero(BitWidth, 0); 469 APInt RHSKnownOne(BitWidth, 0); 470 computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); 471 472 // Get the largest possible values for each operand. 473 APInt LHSMax = ~LHSKnownZero; 474 APInt RHSMax = ~RHSKnownZero; 475 476 // If multiplying the maximum values does not overflow then we can turn 477 // this into a plain NUW mul. 478 bool Overflow; 479 LHSMax.umul_ov(RHSMax, Overflow); 480 if (!Overflow) { 481 Value *Mul = Builder->CreateNUWMul(LHS, RHS, "umul_with_overflow"); 482 Constant *V[] = { 483 UndefValue::get(LHS->getType()), 484 Builder->getFalse() 485 }; 486 Constant *Struct = ConstantStruct::get(cast<StructType>(II->getType()),V); 487 return InsertValueInst::Create(Struct, Mul, 0); 488 } 489 } // FALL THROUGH 490 case Intrinsic::smul_with_overflow: 491 // Canonicalize constants into the RHS. 492 if (isa<Constant>(II->getArgOperand(0)) && 493 !isa<Constant>(II->getArgOperand(1))) { 494 Value *LHS = II->getArgOperand(0); 495 II->setArgOperand(0, II->getArgOperand(1)); 496 II->setArgOperand(1, LHS); 497 return II; 498 } 499 500 // X * undef -> undef 501 if (isa<UndefValue>(II->getArgOperand(1))) 502 return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); 503 504 if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) { 505 // X*0 -> {0, false} 506 if (RHSI->isZero()) 507 return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType())); 508 509 // X * 1 -> {X, false} 510 if (RHSI->equalsInt(1)) { 511 Constant *V[] = { 512 UndefValue::get(II->getArgOperand(0)->getType()), 513 ConstantInt::getFalse(II->getContext()) 514 }; 515 Constant *Struct = 516 ConstantStruct::get(cast<StructType>(II->getType()), V); 517 return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); 518 } 519 } 520 break; 521 case Intrinsic::ppc_altivec_lvx: 522 case Intrinsic::ppc_altivec_lvxl: 523 // Turn PPC lvx -> load if the pointer is known aligned. 524 if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL) >= 16) { 525 Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), 526 PointerType::getUnqual(II->getType())); 527 return new LoadInst(Ptr); 528 } 529 break; 530 case Intrinsic::ppc_altivec_stvx: 531 case Intrinsic::ppc_altivec_stvxl: 532 // Turn stvx -> store if the pointer is known aligned. 533 if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL) >= 16) { 534 Type *OpPtrTy = 535 PointerType::getUnqual(II->getArgOperand(0)->getType()); 536 Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); 537 return new StoreInst(II->getArgOperand(0), Ptr); 538 } 539 break; 540 case Intrinsic::x86_sse_storeu_ps: 541 case Intrinsic::x86_sse2_storeu_pd: 542 case Intrinsic::x86_sse2_storeu_dq: 543 // Turn X86 storeu -> store if the pointer is known aligned. 544 if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL) >= 16) { 545 Type *OpPtrTy = 546 PointerType::getUnqual(II->getArgOperand(1)->getType()); 547 Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy); 548 return new StoreInst(II->getArgOperand(1), Ptr); 549 } 550 break; 551 552 case Intrinsic::x86_sse_cvtss2si: 553 case Intrinsic::x86_sse_cvtss2si64: 554 case Intrinsic::x86_sse_cvttss2si: 555 case Intrinsic::x86_sse_cvttss2si64: 556 case Intrinsic::x86_sse2_cvtsd2si: 557 case Intrinsic::x86_sse2_cvtsd2si64: 558 case Intrinsic::x86_sse2_cvttsd2si: 559 case Intrinsic::x86_sse2_cvttsd2si64: { 560 // These intrinsics only demand the 0th element of their input vectors. If 561 // we can simplify the input based on that, do so now. 562 unsigned VWidth = 563 cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements(); 564 APInt DemandedElts(VWidth, 1); 565 APInt UndefElts(VWidth, 0); 566 if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0), 567 DemandedElts, UndefElts)) { 568 II->setArgOperand(0, V); 569 return II; 570 } 571 break; 572 } 573 574 // Constant fold <A x Bi> << Ci. 575 // FIXME: We don't handle _dq because it's a shift of an i128, but is 576 // represented in the IR as <2 x i64>. A per element shift is wrong. 577 case Intrinsic::x86_sse2_psll_d: 578 case Intrinsic::x86_sse2_psll_q: 579 case Intrinsic::x86_sse2_psll_w: 580 case Intrinsic::x86_sse2_pslli_d: 581 case Intrinsic::x86_sse2_pslli_q: 582 case Intrinsic::x86_sse2_pslli_w: 583 case Intrinsic::x86_avx2_psll_d: 584 case Intrinsic::x86_avx2_psll_q: 585 case Intrinsic::x86_avx2_psll_w: 586 case Intrinsic::x86_avx2_pslli_d: 587 case Intrinsic::x86_avx2_pslli_q: 588 case Intrinsic::x86_avx2_pslli_w: 589 case Intrinsic::x86_sse2_psrl_d: 590 case Intrinsic::x86_sse2_psrl_q: 591 case Intrinsic::x86_sse2_psrl_w: 592 case Intrinsic::x86_sse2_psrli_d: 593 case Intrinsic::x86_sse2_psrli_q: 594 case Intrinsic::x86_sse2_psrli_w: 595 case Intrinsic::x86_avx2_psrl_d: 596 case Intrinsic::x86_avx2_psrl_q: 597 case Intrinsic::x86_avx2_psrl_w: 598 case Intrinsic::x86_avx2_psrli_d: 599 case Intrinsic::x86_avx2_psrli_q: 600 case Intrinsic::x86_avx2_psrli_w: { 601 // Simplify if count is constant. To 0 if >= BitWidth, 602 // otherwise to shl/lshr. 603 auto CDV = dyn_cast<ConstantDataVector>(II->getArgOperand(1)); 604 auto CInt = dyn_cast<ConstantInt>(II->getArgOperand(1)); 605 if (!CDV && !CInt) 606 break; 607 ConstantInt *Count; 608 if (CDV) 609 Count = cast<ConstantInt>(CDV->getElementAsConstant(0)); 610 else 611 Count = CInt; 612 613 auto Vec = II->getArgOperand(0); 614 auto VT = cast<VectorType>(Vec->getType()); 615 if (Count->getZExtValue() > 616 VT->getElementType()->getPrimitiveSizeInBits() - 1) 617 return ReplaceInstUsesWith( 618 CI, ConstantAggregateZero::get(Vec->getType())); 619 620 bool isPackedShiftLeft = true; 621 switch (II->getIntrinsicID()) { 622 default : break; 623 case Intrinsic::x86_sse2_psrl_d: 624 case Intrinsic::x86_sse2_psrl_q: 625 case Intrinsic::x86_sse2_psrl_w: 626 case Intrinsic::x86_sse2_psrli_d: 627 case Intrinsic::x86_sse2_psrli_q: 628 case Intrinsic::x86_sse2_psrli_w: 629 case Intrinsic::x86_avx2_psrl_d: 630 case Intrinsic::x86_avx2_psrl_q: 631 case Intrinsic::x86_avx2_psrl_w: 632 case Intrinsic::x86_avx2_psrli_d: 633 case Intrinsic::x86_avx2_psrli_q: 634 case Intrinsic::x86_avx2_psrli_w: isPackedShiftLeft = false; break; 635 } 636 637 unsigned VWidth = VT->getNumElements(); 638 // Get a constant vector of the same type as the first operand. 639 auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue()); 640 if (isPackedShiftLeft) 641 return BinaryOperator::CreateShl(Vec, 642 Builder->CreateVectorSplat(VWidth, VTCI)); 643 644 return BinaryOperator::CreateLShr(Vec, 645 Builder->CreateVectorSplat(VWidth, VTCI)); 646 } 647 648 case Intrinsic::x86_sse41_pmovsxbw: 649 case Intrinsic::x86_sse41_pmovsxwd: 650 case Intrinsic::x86_sse41_pmovsxdq: 651 case Intrinsic::x86_sse41_pmovzxbw: 652 case Intrinsic::x86_sse41_pmovzxwd: 653 case Intrinsic::x86_sse41_pmovzxdq: { 654 // pmov{s|z}x ignores the upper half of their input vectors. 655 unsigned VWidth = 656 cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements(); 657 unsigned LowHalfElts = VWidth / 2; 658 APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts)); 659 APInt UndefElts(VWidth, 0); 660 if (Value *TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), 661 InputDemandedElts, 662 UndefElts)) { 663 II->setArgOperand(0, TmpV); 664 return II; 665 } 666 break; 667 } 668 669 case Intrinsic::x86_sse4a_insertqi: { 670 // insertqi x, y, 64, 0 can just copy y's lower bits and leave the top 671 // ones undef 672 // TODO: eventually we should lower this intrinsic to IR 673 if (auto CIWidth = dyn_cast<ConstantInt>(II->getArgOperand(2))) { 674 if (auto CIStart = dyn_cast<ConstantInt>(II->getArgOperand(3))) { 675 if (CIWidth->equalsInt(64) && CIStart->isZero()) { 676 Value *Vec = II->getArgOperand(1); 677 Value *Undef = UndefValue::get(Vec->getType()); 678 const uint32_t Mask[] = { 0, 2 }; 679 return ReplaceInstUsesWith( 680 CI, 681 Builder->CreateShuffleVector( 682 Vec, Undef, ConstantDataVector::get( 683 II->getContext(), ArrayRef<uint32_t>(Mask)))); 684 685 } else if (auto Source = 686 dyn_cast<IntrinsicInst>(II->getArgOperand(0))) { 687 if (Source->hasOneUse() && 688 Source->getArgOperand(1) == II->getArgOperand(1)) { 689 // If the source of the insert has only one use and it's another 690 // insert (and they're both inserting from the same vector), try to 691 // bundle both together. 692 auto CISourceWidth = 693 dyn_cast<ConstantInt>(Source->getArgOperand(2)); 694 auto CISourceStart = 695 dyn_cast<ConstantInt>(Source->getArgOperand(3)); 696 if (CISourceStart && CISourceWidth) { 697 unsigned Start = CIStart->getZExtValue(); 698 unsigned Width = CIWidth->getZExtValue(); 699 unsigned End = Start + Width; 700 unsigned SourceStart = CISourceStart->getZExtValue(); 701 unsigned SourceWidth = CISourceWidth->getZExtValue(); 702 unsigned SourceEnd = SourceStart + SourceWidth; 703 unsigned NewStart, NewWidth; 704 bool ShouldReplace = false; 705 if (Start <= SourceStart && SourceStart <= End) { 706 NewStart = Start; 707 NewWidth = std::max(End, SourceEnd) - NewStart; 708 ShouldReplace = true; 709 } else if (SourceStart <= Start && Start <= SourceEnd) { 710 NewStart = SourceStart; 711 NewWidth = std::max(SourceEnd, End) - NewStart; 712 ShouldReplace = true; 713 } 714 715 if (ShouldReplace) { 716 Constant *ConstantWidth = ConstantInt::get( 717 II->getArgOperand(2)->getType(), NewWidth, false); 718 Constant *ConstantStart = ConstantInt::get( 719 II->getArgOperand(3)->getType(), NewStart, false); 720 Value *Args[4] = { Source->getArgOperand(0), 721 II->getArgOperand(1), ConstantWidth, 722 ConstantStart }; 723 Module *M = CI.getParent()->getParent()->getParent(); 724 Value *F = 725 Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi); 726 return ReplaceInstUsesWith(CI, Builder->CreateCall(F, Args)); 727 } 728 } 729 } 730 } 731 } 732 } 733 break; 734 } 735 736 case Intrinsic::x86_sse41_pblendvb: 737 case Intrinsic::x86_sse41_blendvps: 738 case Intrinsic::x86_sse41_blendvpd: 739 case Intrinsic::x86_avx_blendv_ps_256: 740 case Intrinsic::x86_avx_blendv_pd_256: 741 case Intrinsic::x86_avx2_pblendvb: { 742 // Convert blendv* to vector selects if the mask is constant. 743 // This optimization is convoluted because the intrinsic is defined as 744 // getting a vector of floats or doubles for the ps and pd versions. 745 // FIXME: That should be changed. 746 Value *Mask = II->getArgOperand(2); 747 if (auto C = dyn_cast<ConstantDataVector>(Mask)) { 748 auto Tyi1 = Builder->getInt1Ty(); 749 auto SelectorType = cast<VectorType>(Mask->getType()); 750 auto EltTy = SelectorType->getElementType(); 751 unsigned Size = SelectorType->getNumElements(); 752 unsigned BitWidth = 753 EltTy->isFloatTy() 754 ? 32 755 : (EltTy->isDoubleTy() ? 64 : EltTy->getIntegerBitWidth()); 756 assert((BitWidth == 64 || BitWidth == 32 || BitWidth == 8) && 757 "Wrong arguments for variable blend intrinsic"); 758 SmallVector<Constant *, 32> Selectors; 759 for (unsigned I = 0; I < Size; ++I) { 760 // The intrinsics only read the top bit 761 uint64_t Selector; 762 if (BitWidth == 8) 763 Selector = C->getElementAsInteger(I); 764 else 765 Selector = C->getElementAsAPFloat(I).bitcastToAPInt().getZExtValue(); 766 Selectors.push_back(ConstantInt::get(Tyi1, Selector >> (BitWidth - 1))); 767 } 768 auto NewSelector = ConstantVector::get(Selectors); 769 return SelectInst::Create(NewSelector, II->getArgOperand(1), 770 II->getArgOperand(0), "blendv"); 771 } else { 772 break; 773 } 774 } 775 776 case Intrinsic::x86_avx_vpermilvar_ps: 777 case Intrinsic::x86_avx_vpermilvar_ps_256: 778 case Intrinsic::x86_avx_vpermilvar_pd: 779 case Intrinsic::x86_avx_vpermilvar_pd_256: { 780 // Convert vpermil* to shufflevector if the mask is constant. 781 Value *V = II->getArgOperand(1); 782 unsigned Size = cast<VectorType>(V->getType())->getNumElements(); 783 assert(Size == 8 || Size == 4 || Size == 2); 784 uint32_t Indexes[8]; 785 if (auto C = dyn_cast<ConstantDataVector>(V)) { 786 // The intrinsics only read one or two bits, clear the rest. 787 for (unsigned I = 0; I < Size; ++I) { 788 uint32_t Index = C->getElementAsInteger(I) & 0x3; 789 if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd || 790 II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) 791 Index >>= 1; 792 Indexes[I] = Index; 793 } 794 } else if (isa<ConstantAggregateZero>(V)) { 795 for (unsigned I = 0; I < Size; ++I) 796 Indexes[I] = 0; 797 } else { 798 break; 799 } 800 // The _256 variants are a bit trickier since the mask bits always index 801 // into the corresponding 128 half. In order to convert to a generic 802 // shuffle, we have to make that explicit. 803 if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_ps_256 || 804 II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) { 805 for (unsigned I = Size / 2; I < Size; ++I) 806 Indexes[I] += Size / 2; 807 } 808 auto NewC = 809 ConstantDataVector::get(V->getContext(), makeArrayRef(Indexes, Size)); 810 auto V1 = II->getArgOperand(0); 811 auto V2 = UndefValue::get(V1->getType()); 812 auto Shuffle = Builder->CreateShuffleVector(V1, V2, NewC); 813 return ReplaceInstUsesWith(CI, Shuffle); 814 } 815 816 case Intrinsic::ppc_altivec_vperm: 817 // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. 818 // Note that ppc_altivec_vperm has a big-endian bias, so when creating 819 // a vectorshuffle for little endian, we must undo the transformation 820 // performed on vec_perm in altivec.h. That is, we must complement 821 // the permutation mask with respect to 31 and reverse the order of 822 // V1 and V2. 823 if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) { 824 assert(Mask->getType()->getVectorNumElements() == 16 && 825 "Bad type for intrinsic!"); 826 827 // Check that all of the elements are integer constants or undefs. 828 bool AllEltsOk = true; 829 for (unsigned i = 0; i != 16; ++i) { 830 Constant *Elt = Mask->getAggregateElement(i); 831 if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) { 832 AllEltsOk = false; 833 break; 834 } 835 } 836 837 if (AllEltsOk) { 838 // Cast the input vectors to byte vectors. 839 Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0), 840 Mask->getType()); 841 Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1), 842 Mask->getType()); 843 Value *Result = UndefValue::get(Op0->getType()); 844 845 // Only extract each element once. 846 Value *ExtractedElts[32]; 847 memset(ExtractedElts, 0, sizeof(ExtractedElts)); 848 849 for (unsigned i = 0; i != 16; ++i) { 850 if (isa<UndefValue>(Mask->getAggregateElement(i))) 851 continue; 852 unsigned Idx = 853 cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue(); 854 Idx &= 31; // Match the hardware behavior. 855 if (DL && DL->isLittleEndian()) 856 Idx = 31 - Idx; 857 858 if (!ExtractedElts[Idx]) { 859 Value *Op0ToUse = (DL && DL->isLittleEndian()) ? Op1 : Op0; 860 Value *Op1ToUse = (DL && DL->isLittleEndian()) ? Op0 : Op1; 861 ExtractedElts[Idx] = 862 Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse, 863 Builder->getInt32(Idx&15)); 864 } 865 866 // Insert this value into the result vector. 867 Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx], 868 Builder->getInt32(i)); 869 } 870 return CastInst::Create(Instruction::BitCast, Result, CI.getType()); 871 } 872 } 873 break; 874 875 case Intrinsic::arm_neon_vld1: 876 case Intrinsic::arm_neon_vld2: 877 case Intrinsic::arm_neon_vld3: 878 case Intrinsic::arm_neon_vld4: 879 case Intrinsic::arm_neon_vld2lane: 880 case Intrinsic::arm_neon_vld3lane: 881 case Intrinsic::arm_neon_vld4lane: 882 case Intrinsic::arm_neon_vst1: 883 case Intrinsic::arm_neon_vst2: 884 case Intrinsic::arm_neon_vst3: 885 case Intrinsic::arm_neon_vst4: 886 case Intrinsic::arm_neon_vst2lane: 887 case Intrinsic::arm_neon_vst3lane: 888 case Intrinsic::arm_neon_vst4lane: { 889 unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL); 890 unsigned AlignArg = II->getNumArgOperands() - 1; 891 ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg)); 892 if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) { 893 II->setArgOperand(AlignArg, 894 ConstantInt::get(Type::getInt32Ty(II->getContext()), 895 MemAlign, false)); 896 return II; 897 } 898 break; 899 } 900 901 case Intrinsic::arm_neon_vmulls: 902 case Intrinsic::arm_neon_vmullu: 903 case Intrinsic::aarch64_neon_smull: 904 case Intrinsic::aarch64_neon_umull: { 905 Value *Arg0 = II->getArgOperand(0); 906 Value *Arg1 = II->getArgOperand(1); 907 908 // Handle mul by zero first: 909 if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) { 910 return ReplaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType())); 911 } 912 913 // Check for constant LHS & RHS - in this case we just simplify. 914 bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu || 915 II->getIntrinsicID() == Intrinsic::aarch64_neon_umull); 916 VectorType *NewVT = cast<VectorType>(II->getType()); 917 if (Constant *CV0 = dyn_cast<Constant>(Arg0)) { 918 if (Constant *CV1 = dyn_cast<Constant>(Arg1)) { 919 CV0 = ConstantExpr::getIntegerCast(CV0, NewVT, /*isSigned=*/!Zext); 920 CV1 = ConstantExpr::getIntegerCast(CV1, NewVT, /*isSigned=*/!Zext); 921 922 return ReplaceInstUsesWith(CI, ConstantExpr::getMul(CV0, CV1)); 923 } 924 925 // Couldn't simplify - canonicalize constant to the RHS. 926 std::swap(Arg0, Arg1); 927 } 928 929 // Handle mul by one: 930 if (Constant *CV1 = dyn_cast<Constant>(Arg1)) 931 if (ConstantInt *Splat = 932 dyn_cast_or_null<ConstantInt>(CV1->getSplatValue())) 933 if (Splat->isOne()) 934 return CastInst::CreateIntegerCast(Arg0, II->getType(), 935 /*isSigned=*/!Zext); 936 937 break; 938 } 939 940 case Intrinsic::AMDGPU_rcp: { 941 if (const ConstantFP *C = dyn_cast<ConstantFP>(II->getArgOperand(0))) { 942 const APFloat &ArgVal = C->getValueAPF(); 943 APFloat Val(ArgVal.getSemantics(), 1.0); 944 APFloat::opStatus Status = Val.divide(ArgVal, 945 APFloat::rmNearestTiesToEven); 946 // Only do this if it was exact and therefore not dependent on the 947 // rounding mode. 948 if (Status == APFloat::opOK) 949 return ReplaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val)); 950 } 951 952 break; 953 } 954 case Intrinsic::stackrestore: { 955 // If the save is right next to the restore, remove the restore. This can 956 // happen when variable allocas are DCE'd. 957 if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) { 958 if (SS->getIntrinsicID() == Intrinsic::stacksave) { 959 BasicBlock::iterator BI = SS; 960 if (&*++BI == II) 961 return EraseInstFromFunction(CI); 962 } 963 } 964 965 // Scan down this block to see if there is another stack restore in the 966 // same block without an intervening call/alloca. 967 BasicBlock::iterator BI = II; 968 TerminatorInst *TI = II->getParent()->getTerminator(); 969 bool CannotRemove = false; 970 for (++BI; &*BI != TI; ++BI) { 971 if (isa<AllocaInst>(BI)) { 972 CannotRemove = true; 973 break; 974 } 975 if (CallInst *BCI = dyn_cast<CallInst>(BI)) { 976 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) { 977 // If there is a stackrestore below this one, remove this one. 978 if (II->getIntrinsicID() == Intrinsic::stackrestore) 979 return EraseInstFromFunction(CI); 980 // Otherwise, ignore the intrinsic. 981 } else { 982 // If we found a non-intrinsic call, we can't remove the stack 983 // restore. 984 CannotRemove = true; 985 break; 986 } 987 } 988 } 989 990 // If the stack restore is in a return, resume, or unwind block and if there 991 // are no allocas or calls between the restore and the return, nuke the 992 // restore. 993 if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI))) 994 return EraseInstFromFunction(CI); 995 break; 996 } 997 } 998 999 return visitCallSite(II); 1000 } 1001 1002 // InvokeInst simplification 1003 // 1004 Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) { 1005 return visitCallSite(&II); 1006 } 1007 1008 /// isSafeToEliminateVarargsCast - If this cast does not affect the value 1009 /// passed through the varargs area, we can eliminate the use of the cast. 1010 static bool isSafeToEliminateVarargsCast(const CallSite CS, 1011 const CastInst * const CI, 1012 const DataLayout * const DL, 1013 const int ix) { 1014 if (!CI->isLosslessCast()) 1015 return false; 1016 1017 // The size of ByVal or InAlloca arguments is derived from the type, so we 1018 // can't change to a type with a different size. If the size were 1019 // passed explicitly we could avoid this check. 1020 if (!CS.isByValOrInAllocaArgument(ix)) 1021 return true; 1022 1023 Type* SrcTy = 1024 cast<PointerType>(CI->getOperand(0)->getType())->getElementType(); 1025 Type* DstTy = cast<PointerType>(CI->getType())->getElementType(); 1026 if (!SrcTy->isSized() || !DstTy->isSized()) 1027 return false; 1028 if (!DL || DL->getTypeAllocSize(SrcTy) != DL->getTypeAllocSize(DstTy)) 1029 return false; 1030 return true; 1031 } 1032 1033 // Try to fold some different type of calls here. 1034 // Currently we're only working with the checking functions, memcpy_chk, 1035 // mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk, 1036 // strcat_chk and strncat_chk. 1037 Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *DL) { 1038 if (!CI->getCalledFunction()) return nullptr; 1039 1040 if (Value *With = Simplifier->optimizeCall(CI)) { 1041 ++NumSimplified; 1042 return CI->use_empty() ? CI : ReplaceInstUsesWith(*CI, With); 1043 } 1044 1045 return nullptr; 1046 } 1047 1048 static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) { 1049 // Strip off at most one level of pointer casts, looking for an alloca. This 1050 // is good enough in practice and simpler than handling any number of casts. 1051 Value *Underlying = TrampMem->stripPointerCasts(); 1052 if (Underlying != TrampMem && 1053 (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem)) 1054 return nullptr; 1055 if (!isa<AllocaInst>(Underlying)) 1056 return nullptr; 1057 1058 IntrinsicInst *InitTrampoline = nullptr; 1059 for (User *U : TrampMem->users()) { 1060 IntrinsicInst *II = dyn_cast<IntrinsicInst>(U); 1061 if (!II) 1062 return nullptr; 1063 if (II->getIntrinsicID() == Intrinsic::init_trampoline) { 1064 if (InitTrampoline) 1065 // More than one init_trampoline writes to this value. Give up. 1066 return nullptr; 1067 InitTrampoline = II; 1068 continue; 1069 } 1070 if (II->getIntrinsicID() == Intrinsic::adjust_trampoline) 1071 // Allow any number of calls to adjust.trampoline. 1072 continue; 1073 return nullptr; 1074 } 1075 1076 // No call to init.trampoline found. 1077 if (!InitTrampoline) 1078 return nullptr; 1079 1080 // Check that the alloca is being used in the expected way. 1081 if (InitTrampoline->getOperand(0) != TrampMem) 1082 return nullptr; 1083 1084 return InitTrampoline; 1085 } 1086 1087 static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp, 1088 Value *TrampMem) { 1089 // Visit all the previous instructions in the basic block, and try to find a 1090 // init.trampoline which has a direct path to the adjust.trampoline. 1091 for (BasicBlock::iterator I = AdjustTramp, 1092 E = AdjustTramp->getParent()->begin(); I != E; ) { 1093 Instruction *Inst = --I; 1094 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) 1095 if (II->getIntrinsicID() == Intrinsic::init_trampoline && 1096 II->getOperand(0) == TrampMem) 1097 return II; 1098 if (Inst->mayWriteToMemory()) 1099 return nullptr; 1100 } 1101 return nullptr; 1102 } 1103 1104 // Given a call to llvm.adjust.trampoline, find and return the corresponding 1105 // call to llvm.init.trampoline if the call to the trampoline can be optimized 1106 // to a direct call to a function. Otherwise return NULL. 1107 // 1108 static IntrinsicInst *FindInitTrampoline(Value *Callee) { 1109 Callee = Callee->stripPointerCasts(); 1110 IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee); 1111 if (!AdjustTramp || 1112 AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline) 1113 return nullptr; 1114 1115 Value *TrampMem = AdjustTramp->getOperand(0); 1116 1117 if (IntrinsicInst *IT = FindInitTrampolineFromAlloca(TrampMem)) 1118 return IT; 1119 if (IntrinsicInst *IT = FindInitTrampolineFromBB(AdjustTramp, TrampMem)) 1120 return IT; 1121 return nullptr; 1122 } 1123 1124 // visitCallSite - Improvements for call and invoke instructions. 1125 // 1126 Instruction *InstCombiner::visitCallSite(CallSite CS) { 1127 if (isAllocLikeFn(CS.getInstruction(), TLI)) 1128 return visitAllocSite(*CS.getInstruction()); 1129 1130 bool Changed = false; 1131 1132 // If the callee is a pointer to a function, attempt to move any casts to the 1133 // arguments of the call/invoke. 1134 Value *Callee = CS.getCalledValue(); 1135 if (!isa<Function>(Callee) && transformConstExprCastCall(CS)) 1136 return nullptr; 1137 1138 if (Function *CalleeF = dyn_cast<Function>(Callee)) 1139 // If the call and callee calling conventions don't match, this call must 1140 // be unreachable, as the call is undefined. 1141 if (CalleeF->getCallingConv() != CS.getCallingConv() && 1142 // Only do this for calls to a function with a body. A prototype may 1143 // not actually end up matching the implementation's calling conv for a 1144 // variety of reasons (e.g. it may be written in assembly). 1145 !CalleeF->isDeclaration()) { 1146 Instruction *OldCall = CS.getInstruction(); 1147 new StoreInst(ConstantInt::getTrue(Callee->getContext()), 1148 UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), 1149 OldCall); 1150 // If OldCall does not return void then replaceAllUsesWith undef. 1151 // This allows ValueHandlers and custom metadata to adjust itself. 1152 if (!OldCall->getType()->isVoidTy()) 1153 ReplaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType())); 1154 if (isa<CallInst>(OldCall)) 1155 return EraseInstFromFunction(*OldCall); 1156 1157 // We cannot remove an invoke, because it would change the CFG, just 1158 // change the callee to a null pointer. 1159 cast<InvokeInst>(OldCall)->setCalledFunction( 1160 Constant::getNullValue(CalleeF->getType())); 1161 return nullptr; 1162 } 1163 1164 if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { 1165 // If CS does not return void then replaceAllUsesWith undef. 1166 // This allows ValueHandlers and custom metadata to adjust itself. 1167 if (!CS.getInstruction()->getType()->isVoidTy()) 1168 ReplaceInstUsesWith(*CS.getInstruction(), 1169 UndefValue::get(CS.getInstruction()->getType())); 1170 1171 if (isa<InvokeInst>(CS.getInstruction())) { 1172 // Can't remove an invoke because we cannot change the CFG. 1173 return nullptr; 1174 } 1175 1176 // This instruction is not reachable, just remove it. We insert a store to 1177 // undef so that we know that this code is not reachable, despite the fact 1178 // that we can't modify the CFG here. 1179 new StoreInst(ConstantInt::getTrue(Callee->getContext()), 1180 UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), 1181 CS.getInstruction()); 1182 1183 return EraseInstFromFunction(*CS.getInstruction()); 1184 } 1185 1186 if (IntrinsicInst *II = FindInitTrampoline(Callee)) 1187 return transformCallThroughTrampoline(CS, II); 1188 1189 PointerType *PTy = cast<PointerType>(Callee->getType()); 1190 FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); 1191 if (FTy->isVarArg()) { 1192 int ix = FTy->getNumParams(); 1193 // See if we can optimize any arguments passed through the varargs area of 1194 // the call. 1195 for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(), 1196 E = CS.arg_end(); I != E; ++I, ++ix) { 1197 CastInst *CI = dyn_cast<CastInst>(*I); 1198 if (CI && isSafeToEliminateVarargsCast(CS, CI, DL, ix)) { 1199 *I = CI->getOperand(0); 1200 Changed = true; 1201 } 1202 } 1203 } 1204 1205 if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) { 1206 // Inline asm calls cannot throw - mark them 'nounwind'. 1207 CS.setDoesNotThrow(); 1208 Changed = true; 1209 } 1210 1211 // Try to optimize the call if possible, we require DataLayout for most of 1212 // this. None of these calls are seen as possibly dead so go ahead and 1213 // delete the instruction now. 1214 if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) { 1215 Instruction *I = tryOptimizeCall(CI, DL); 1216 // If we changed something return the result, etc. Otherwise let 1217 // the fallthrough check. 1218 if (I) return EraseInstFromFunction(*I); 1219 } 1220 1221 return Changed ? CS.getInstruction() : nullptr; 1222 } 1223 1224 // transformConstExprCastCall - If the callee is a constexpr cast of a function, 1225 // attempt to move the cast to the arguments of the call/invoke. 1226 // 1227 bool InstCombiner::transformConstExprCastCall(CallSite CS) { 1228 Function *Callee = 1229 dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); 1230 if (!Callee) 1231 return false; 1232 Instruction *Caller = CS.getInstruction(); 1233 const AttributeSet &CallerPAL = CS.getAttributes(); 1234 1235 // Okay, this is a cast from a function to a different type. Unless doing so 1236 // would cause a type conversion of one of our arguments, change this call to 1237 // be a direct call with arguments casted to the appropriate types. 1238 // 1239 FunctionType *FT = Callee->getFunctionType(); 1240 Type *OldRetTy = Caller->getType(); 1241 Type *NewRetTy = FT->getReturnType(); 1242 1243 // Check to see if we are changing the return type... 1244 if (OldRetTy != NewRetTy) { 1245 1246 if (NewRetTy->isStructTy()) 1247 return false; // TODO: Handle multiple return values. 1248 1249 if (!CastInst::isBitCastable(NewRetTy, OldRetTy)) { 1250 if (Callee->isDeclaration()) 1251 return false; // Cannot transform this return value. 1252 1253 if (!Caller->use_empty() && 1254 // void -> non-void is handled specially 1255 !NewRetTy->isVoidTy()) 1256 return false; // Cannot transform this return value. 1257 } 1258 1259 if (!CallerPAL.isEmpty() && !Caller->use_empty()) { 1260 AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex); 1261 if (RAttrs. 1262 hasAttributes(AttributeFuncs:: 1263 typeIncompatible(NewRetTy, AttributeSet::ReturnIndex), 1264 AttributeSet::ReturnIndex)) 1265 return false; // Attribute not compatible with transformed value. 1266 } 1267 1268 // If the callsite is an invoke instruction, and the return value is used by 1269 // a PHI node in a successor, we cannot change the return type of the call 1270 // because there is no place to put the cast instruction (without breaking 1271 // the critical edge). Bail out in this case. 1272 if (!Caller->use_empty()) 1273 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) 1274 for (User *U : II->users()) 1275 if (PHINode *PN = dyn_cast<PHINode>(U)) 1276 if (PN->getParent() == II->getNormalDest() || 1277 PN->getParent() == II->getUnwindDest()) 1278 return false; 1279 } 1280 1281 unsigned NumActualArgs = CS.arg_size(); 1282 unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs); 1283 1284 CallSite::arg_iterator AI = CS.arg_begin(); 1285 for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) { 1286 Type *ParamTy = FT->getParamType(i); 1287 Type *ActTy = (*AI)->getType(); 1288 1289 if (!CastInst::isBitCastable(ActTy, ParamTy)) 1290 return false; // Cannot transform this parameter value. 1291 1292 if (AttrBuilder(CallerPAL.getParamAttributes(i + 1), i + 1). 1293 hasAttributes(AttributeFuncs:: 1294 typeIncompatible(ParamTy, i + 1), i + 1)) 1295 return false; // Attribute not compatible with transformed value. 1296 1297 if (CS.isInAllocaArgument(i)) 1298 return false; // Cannot transform to and from inalloca. 1299 1300 // If the parameter is passed as a byval argument, then we have to have a 1301 // sized type and the sized type has to have the same size as the old type. 1302 if (ParamTy != ActTy && 1303 CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1, 1304 Attribute::ByVal)) { 1305 PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy); 1306 if (!ParamPTy || !ParamPTy->getElementType()->isSized() || !DL) 1307 return false; 1308 1309 Type *CurElTy = ActTy->getPointerElementType(); 1310 if (DL->getTypeAllocSize(CurElTy) != 1311 DL->getTypeAllocSize(ParamPTy->getElementType())) 1312 return false; 1313 } 1314 } 1315 1316 if (Callee->isDeclaration()) { 1317 // Do not delete arguments unless we have a function body. 1318 if (FT->getNumParams() < NumActualArgs && !FT->isVarArg()) 1319 return false; 1320 1321 // If the callee is just a declaration, don't change the varargsness of the 1322 // call. We don't want to introduce a varargs call where one doesn't 1323 // already exist. 1324 PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType()); 1325 if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg()) 1326 return false; 1327 1328 // If both the callee and the cast type are varargs, we still have to make 1329 // sure the number of fixed parameters are the same or we have the same 1330 // ABI issues as if we introduce a varargs call. 1331 if (FT->isVarArg() && 1332 cast<FunctionType>(APTy->getElementType())->isVarArg() && 1333 FT->getNumParams() != 1334 cast<FunctionType>(APTy->getElementType())->getNumParams()) 1335 return false; 1336 } 1337 1338 if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && 1339 !CallerPAL.isEmpty()) 1340 // In this case we have more arguments than the new function type, but we 1341 // won't be dropping them. Check that these extra arguments have attributes 1342 // that are compatible with being a vararg call argument. 1343 for (unsigned i = CallerPAL.getNumSlots(); i; --i) { 1344 unsigned Index = CallerPAL.getSlotIndex(i - 1); 1345 if (Index <= FT->getNumParams()) 1346 break; 1347 1348 // Check if it has an attribute that's incompatible with varargs. 1349 AttributeSet PAttrs = CallerPAL.getSlotAttributes(i - 1); 1350 if (PAttrs.hasAttribute(Index, Attribute::StructRet)) 1351 return false; 1352 } 1353 1354 1355 // Okay, we decided that this is a safe thing to do: go ahead and start 1356 // inserting cast instructions as necessary. 1357 std::vector<Value*> Args; 1358 Args.reserve(NumActualArgs); 1359 SmallVector<AttributeSet, 8> attrVec; 1360 attrVec.reserve(NumCommonArgs); 1361 1362 // Get any return attributes. 1363 AttrBuilder RAttrs(CallerPAL, AttributeSet::ReturnIndex); 1364 1365 // If the return value is not being used, the type may not be compatible 1366 // with the existing attributes. Wipe out any problematic attributes. 1367 RAttrs. 1368 removeAttributes(AttributeFuncs:: 1369 typeIncompatible(NewRetTy, AttributeSet::ReturnIndex), 1370 AttributeSet::ReturnIndex); 1371 1372 // Add the new return attributes. 1373 if (RAttrs.hasAttributes()) 1374 attrVec.push_back(AttributeSet::get(Caller->getContext(), 1375 AttributeSet::ReturnIndex, RAttrs)); 1376 1377 AI = CS.arg_begin(); 1378 for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) { 1379 Type *ParamTy = FT->getParamType(i); 1380 1381 if ((*AI)->getType() == ParamTy) { 1382 Args.push_back(*AI); 1383 } else { 1384 Args.push_back(Builder->CreateBitCast(*AI, ParamTy)); 1385 } 1386 1387 // Add any parameter attributes. 1388 AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1); 1389 if (PAttrs.hasAttributes()) 1390 attrVec.push_back(AttributeSet::get(Caller->getContext(), i + 1, 1391 PAttrs)); 1392 } 1393 1394 // If the function takes more arguments than the call was taking, add them 1395 // now. 1396 for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) 1397 Args.push_back(Constant::getNullValue(FT->getParamType(i))); 1398 1399 // If we are removing arguments to the function, emit an obnoxious warning. 1400 if (FT->getNumParams() < NumActualArgs) { 1401 // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722 1402 if (FT->isVarArg()) { 1403 // Add all of the arguments in their promoted form to the arg list. 1404 for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) { 1405 Type *PTy = getPromotedType((*AI)->getType()); 1406 if (PTy != (*AI)->getType()) { 1407 // Must promote to pass through va_arg area! 1408 Instruction::CastOps opcode = 1409 CastInst::getCastOpcode(*AI, false, PTy, false); 1410 Args.push_back(Builder->CreateCast(opcode, *AI, PTy)); 1411 } else { 1412 Args.push_back(*AI); 1413 } 1414 1415 // Add any parameter attributes. 1416 AttrBuilder PAttrs(CallerPAL.getParamAttributes(i + 1), i + 1); 1417 if (PAttrs.hasAttributes()) 1418 attrVec.push_back(AttributeSet::get(FT->getContext(), i + 1, 1419 PAttrs)); 1420 } 1421 } 1422 } 1423 1424 AttributeSet FnAttrs = CallerPAL.getFnAttributes(); 1425 if (CallerPAL.hasAttributes(AttributeSet::FunctionIndex)) 1426 attrVec.push_back(AttributeSet::get(Callee->getContext(), FnAttrs)); 1427 1428 if (NewRetTy->isVoidTy()) 1429 Caller->setName(""); // Void type should not have a name. 1430 1431 const AttributeSet &NewCallerPAL = AttributeSet::get(Callee->getContext(), 1432 attrVec); 1433 1434 Instruction *NC; 1435 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { 1436 NC = Builder->CreateInvoke(Callee, II->getNormalDest(), 1437 II->getUnwindDest(), Args); 1438 NC->takeName(II); 1439 cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv()); 1440 cast<InvokeInst>(NC)->setAttributes(NewCallerPAL); 1441 } else { 1442 CallInst *CI = cast<CallInst>(Caller); 1443 NC = Builder->CreateCall(Callee, Args); 1444 NC->takeName(CI); 1445 if (CI->isTailCall()) 1446 cast<CallInst>(NC)->setTailCall(); 1447 cast<CallInst>(NC)->setCallingConv(CI->getCallingConv()); 1448 cast<CallInst>(NC)->setAttributes(NewCallerPAL); 1449 } 1450 1451 // Insert a cast of the return type as necessary. 1452 Value *NV = NC; 1453 if (OldRetTy != NV->getType() && !Caller->use_empty()) { 1454 if (!NV->getType()->isVoidTy()) { 1455 NV = NC = CastInst::Create(CastInst::BitCast, NC, OldRetTy); 1456 NC->setDebugLoc(Caller->getDebugLoc()); 1457 1458 // If this is an invoke instruction, we should insert it after the first 1459 // non-phi, instruction in the normal successor block. 1460 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { 1461 BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt(); 1462 InsertNewInstBefore(NC, *I); 1463 } else { 1464 // Otherwise, it's a call, just insert cast right after the call. 1465 InsertNewInstBefore(NC, *Caller); 1466 } 1467 Worklist.AddUsersToWorkList(*Caller); 1468 } else { 1469 NV = UndefValue::get(Caller->getType()); 1470 } 1471 } 1472 1473 if (!Caller->use_empty()) 1474 ReplaceInstUsesWith(*Caller, NV); 1475 else if (Caller->hasValueHandle()) 1476 ValueHandleBase::ValueIsRAUWd(Caller, NV); 1477 1478 EraseInstFromFunction(*Caller); 1479 return true; 1480 } 1481 1482 // transformCallThroughTrampoline - Turn a call to a function created by 1483 // init_trampoline / adjust_trampoline intrinsic pair into a direct call to the 1484 // underlying function. 1485 // 1486 Instruction * 1487 InstCombiner::transformCallThroughTrampoline(CallSite CS, 1488 IntrinsicInst *Tramp) { 1489 Value *Callee = CS.getCalledValue(); 1490 PointerType *PTy = cast<PointerType>(Callee->getType()); 1491 FunctionType *FTy = cast<FunctionType>(PTy->getElementType()); 1492 const AttributeSet &Attrs = CS.getAttributes(); 1493 1494 // If the call already has the 'nest' attribute somewhere then give up - 1495 // otherwise 'nest' would occur twice after splicing in the chain. 1496 if (Attrs.hasAttrSomewhere(Attribute::Nest)) 1497 return nullptr; 1498 1499 assert(Tramp && 1500 "transformCallThroughTrampoline called with incorrect CallSite."); 1501 1502 Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts()); 1503 PointerType *NestFPTy = cast<PointerType>(NestF->getType()); 1504 FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType()); 1505 1506 const AttributeSet &NestAttrs = NestF->getAttributes(); 1507 if (!NestAttrs.isEmpty()) { 1508 unsigned NestIdx = 1; 1509 Type *NestTy = nullptr; 1510 AttributeSet NestAttr; 1511 1512 // Look for a parameter marked with the 'nest' attribute. 1513 for (FunctionType::param_iterator I = NestFTy->param_begin(), 1514 E = NestFTy->param_end(); I != E; ++NestIdx, ++I) 1515 if (NestAttrs.hasAttribute(NestIdx, Attribute::Nest)) { 1516 // Record the parameter type and any other attributes. 1517 NestTy = *I; 1518 NestAttr = NestAttrs.getParamAttributes(NestIdx); 1519 break; 1520 } 1521 1522 if (NestTy) { 1523 Instruction *Caller = CS.getInstruction(); 1524 std::vector<Value*> NewArgs; 1525 NewArgs.reserve(CS.arg_size() + 1); 1526 1527 SmallVector<AttributeSet, 8> NewAttrs; 1528 NewAttrs.reserve(Attrs.getNumSlots() + 1); 1529 1530 // Insert the nest argument into the call argument list, which may 1531 // mean appending it. Likewise for attributes. 1532 1533 // Add any result attributes. 1534 if (Attrs.hasAttributes(AttributeSet::ReturnIndex)) 1535 NewAttrs.push_back(AttributeSet::get(Caller->getContext(), 1536 Attrs.getRetAttributes())); 1537 1538 { 1539 unsigned Idx = 1; 1540 CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); 1541 do { 1542 if (Idx == NestIdx) { 1543 // Add the chain argument and attributes. 1544 Value *NestVal = Tramp->getArgOperand(2); 1545 if (NestVal->getType() != NestTy) 1546 NestVal = Builder->CreateBitCast(NestVal, NestTy, "nest"); 1547 NewArgs.push_back(NestVal); 1548 NewAttrs.push_back(AttributeSet::get(Caller->getContext(), 1549 NestAttr)); 1550 } 1551 1552 if (I == E) 1553 break; 1554 1555 // Add the original argument and attributes. 1556 NewArgs.push_back(*I); 1557 AttributeSet Attr = Attrs.getParamAttributes(Idx); 1558 if (Attr.hasAttributes(Idx)) { 1559 AttrBuilder B(Attr, Idx); 1560 NewAttrs.push_back(AttributeSet::get(Caller->getContext(), 1561 Idx + (Idx >= NestIdx), B)); 1562 } 1563 1564 ++Idx, ++I; 1565 } while (1); 1566 } 1567 1568 // Add any function attributes. 1569 if (Attrs.hasAttributes(AttributeSet::FunctionIndex)) 1570 NewAttrs.push_back(AttributeSet::get(FTy->getContext(), 1571 Attrs.getFnAttributes())); 1572 1573 // The trampoline may have been bitcast to a bogus type (FTy). 1574 // Handle this by synthesizing a new function type, equal to FTy 1575 // with the chain parameter inserted. 1576 1577 std::vector<Type*> NewTypes; 1578 NewTypes.reserve(FTy->getNumParams()+1); 1579 1580 // Insert the chain's type into the list of parameter types, which may 1581 // mean appending it. 1582 { 1583 unsigned Idx = 1; 1584 FunctionType::param_iterator I = FTy->param_begin(), 1585 E = FTy->param_end(); 1586 1587 do { 1588 if (Idx == NestIdx) 1589 // Add the chain's type. 1590 NewTypes.push_back(NestTy); 1591 1592 if (I == E) 1593 break; 1594 1595 // Add the original type. 1596 NewTypes.push_back(*I); 1597 1598 ++Idx, ++I; 1599 } while (1); 1600 } 1601 1602 // Replace the trampoline call with a direct call. Let the generic 1603 // code sort out any function type mismatches. 1604 FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, 1605 FTy->isVarArg()); 1606 Constant *NewCallee = 1607 NestF->getType() == PointerType::getUnqual(NewFTy) ? 1608 NestF : ConstantExpr::getBitCast(NestF, 1609 PointerType::getUnqual(NewFTy)); 1610 const AttributeSet &NewPAL = 1611 AttributeSet::get(FTy->getContext(), NewAttrs); 1612 1613 Instruction *NewCaller; 1614 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) { 1615 NewCaller = InvokeInst::Create(NewCallee, 1616 II->getNormalDest(), II->getUnwindDest(), 1617 NewArgs); 1618 cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv()); 1619 cast<InvokeInst>(NewCaller)->setAttributes(NewPAL); 1620 } else { 1621 NewCaller = CallInst::Create(NewCallee, NewArgs); 1622 if (cast<CallInst>(Caller)->isTailCall()) 1623 cast<CallInst>(NewCaller)->setTailCall(); 1624 cast<CallInst>(NewCaller)-> 1625 setCallingConv(cast<CallInst>(Caller)->getCallingConv()); 1626 cast<CallInst>(NewCaller)->setAttributes(NewPAL); 1627 } 1628 1629 return NewCaller; 1630 } 1631 } 1632 1633 // Replace the trampoline call with a direct call. Since there is no 'nest' 1634 // parameter, there is no need to adjust the argument list. Let the generic 1635 // code sort out any function type mismatches. 1636 Constant *NewCallee = 1637 NestF->getType() == PTy ? NestF : 1638 ConstantExpr::getBitCast(NestF, PTy); 1639 CS.setCalledFunction(NewCallee); 1640 return CS.getInstruction(); 1641 } 1642