Home | History | Annotate | Download | only in CodeGen
      1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This contains code to emit Builtin calls as LLVM code.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "TargetInfo.h"
     15 #include "CodeGenFunction.h"
     16 #include "CodeGenModule.h"
     17 #include "CGObjCRuntime.h"
     18 #include "clang/Basic/TargetInfo.h"
     19 #include "clang/AST/ASTContext.h"
     20 #include "clang/AST/Decl.h"
     21 #include "clang/Basic/TargetBuiltins.h"
     22 #include "llvm/Intrinsics.h"
     23 #include "llvm/Target/TargetData.h"
     24 
     25 using namespace clang;
     26 using namespace CodeGen;
     27 using namespace llvm;
     28 
     29 /// getBuiltinLibFunction - Given a builtin id for a function like
     30 /// "__builtin_fabsf", return a Function* for "fabsf".
     31 llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
     32                                                   unsigned BuiltinID) {
     33   assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
     34 
     35   // Get the name, skip over the __builtin_ prefix (if necessary).
     36   StringRef Name;
     37   GlobalDecl D(FD);
     38 
     39   // If the builtin has been declared explicitly with an assembler label,
     40   // use the mangled name. This differs from the plain label on platforms
     41   // that prefix labels.
     42   if (FD->hasAttr<AsmLabelAttr>())
     43     Name = getMangledName(D);
     44   else
     45     Name = Context.BuiltinInfo.GetName(BuiltinID) + 10;
     46 
     47   llvm::FunctionType *Ty =
     48     cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
     49 
     50   return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
     51 }
     52 
     53 /// Emit the conversions required to turn the given value into an
     54 /// integer of the given size.
     55 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
     56                         QualType T, llvm::IntegerType *IntType) {
     57   V = CGF.EmitToMemory(V, T);
     58 
     59   if (V->getType()->isPointerTy())
     60     return CGF.Builder.CreatePtrToInt(V, IntType);
     61 
     62   assert(V->getType() == IntType);
     63   return V;
     64 }
     65 
     66 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
     67                           QualType T, llvm::Type *ResultType) {
     68   V = CGF.EmitFromMemory(V, T);
     69 
     70   if (ResultType->isPointerTy())
     71     return CGF.Builder.CreateIntToPtr(V, ResultType);
     72 
     73   assert(V->getType() == ResultType);
     74   return V;
     75 }
     76 
     77 /// Utility to insert an atomic instruction based on Instrinsic::ID
     78 /// and the expression node.
     79 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
     80                                llvm::AtomicRMWInst::BinOp Kind,
     81                                const CallExpr *E) {
     82   QualType T = E->getType();
     83   assert(E->getArg(0)->getType()->isPointerType());
     84   assert(CGF.getContext().hasSameUnqualifiedType(T,
     85                                   E->getArg(0)->getType()->getPointeeType()));
     86   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
     87 
     88   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
     89   unsigned AddrSpace =
     90     cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
     91 
     92   llvm::IntegerType *IntType =
     93     llvm::IntegerType::get(CGF.getLLVMContext(),
     94                            CGF.getContext().getTypeSize(T));
     95   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
     96 
     97   llvm::Value *Args[2];
     98   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
     99   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
    100   llvm::Type *ValueType = Args[1]->getType();
    101   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
    102 
    103   llvm::Value *Result =
    104       CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
    105                                   llvm::SequentiallyConsistent);
    106   Result = EmitFromInt(CGF, Result, T, ValueType);
    107   return RValue::get(Result);
    108 }
    109 
    110 /// Utility to insert an atomic instruction based Instrinsic::ID and
    111 /// the expression node, where the return value is the result of the
    112 /// operation.
    113 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
    114                                    llvm::AtomicRMWInst::BinOp Kind,
    115                                    const CallExpr *E,
    116                                    Instruction::BinaryOps Op) {
    117   QualType T = E->getType();
    118   assert(E->getArg(0)->getType()->isPointerType());
    119   assert(CGF.getContext().hasSameUnqualifiedType(T,
    120                                   E->getArg(0)->getType()->getPointeeType()));
    121   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
    122 
    123   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
    124   unsigned AddrSpace =
    125     cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
    126 
    127   llvm::IntegerType *IntType =
    128     llvm::IntegerType::get(CGF.getLLVMContext(),
    129                            CGF.getContext().getTypeSize(T));
    130   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
    131 
    132   llvm::Value *Args[2];
    133   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
    134   llvm::Type *ValueType = Args[1]->getType();
    135   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
    136   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
    137 
    138   llvm::Value *Result =
    139       CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
    140                                   llvm::SequentiallyConsistent);
    141   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
    142   Result = EmitFromInt(CGF, Result, T, ValueType);
    143   return RValue::get(Result);
    144 }
    145 
    146 /// EmitFAbs - Emit a call to fabs/fabsf/fabsl, depending on the type of ValTy,
    147 /// which must be a scalar floating point type.
    148 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V, QualType ValTy) {
    149   const BuiltinType *ValTyP = ValTy->getAs<BuiltinType>();
    150   assert(ValTyP && "isn't scalar fp type!");
    151 
    152   StringRef FnName;
    153   switch (ValTyP->getKind()) {
    154   default: llvm_unreachable("Isn't a scalar fp type!");
    155   case BuiltinType::Float:      FnName = "fabsf"; break;
    156   case BuiltinType::Double:     FnName = "fabs"; break;
    157   case BuiltinType::LongDouble: FnName = "fabsl"; break;
    158   }
    159 
    160   // The prototype is something that takes and returns whatever V's type is.
    161   llvm::FunctionType *FT = llvm::FunctionType::get(V->getType(), V->getType(),
    162                                                    false);
    163   llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction(FT, FnName);
    164 
    165   return CGF.Builder.CreateCall(Fn, V, "abs");
    166 }
    167 
    168 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn,
    169                               const CallExpr *E, llvm::Value *calleeValue) {
    170   return CGF.EmitCall(E->getCallee()->getType(), calleeValue,
    171                       ReturnValueSlot(), E->arg_begin(), E->arg_end(), Fn);
    172 }
    173 
    174 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
    175                                         unsigned BuiltinID, const CallExpr *E) {
    176   // See if we can constant fold this builtin.  If so, don't emit it at all.
    177   Expr::EvalResult Result;
    178   if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
    179       !Result.hasSideEffects()) {
    180     if (Result.Val.isInt())
    181       return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
    182                                                 Result.Val.getInt()));
    183     if (Result.Val.isFloat())
    184       return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
    185                                                Result.Val.getFloat()));
    186   }
    187 
    188   switch (BuiltinID) {
    189   default: break;  // Handle intrinsics and libm functions below.
    190   case Builtin::BI__builtin___CFStringMakeConstantString:
    191   case Builtin::BI__builtin___NSStringMakeConstantString:
    192     return RValue::get(CGM.EmitConstantExpr(E, E->getType(), 0));
    193   case Builtin::BI__builtin_stdarg_start:
    194   case Builtin::BI__builtin_va_start:
    195   case Builtin::BI__builtin_va_end: {
    196     Value *ArgValue = EmitVAListRef(E->getArg(0));
    197     llvm::Type *DestType = Int8PtrTy;
    198     if (ArgValue->getType() != DestType)
    199       ArgValue = Builder.CreateBitCast(ArgValue, DestType,
    200                                        ArgValue->getName().data());
    201 
    202     Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ?
    203       Intrinsic::vaend : Intrinsic::vastart;
    204     return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue));
    205   }
    206   case Builtin::BI__builtin_va_copy: {
    207     Value *DstPtr = EmitVAListRef(E->getArg(0));
    208     Value *SrcPtr = EmitVAListRef(E->getArg(1));
    209 
    210     llvm::Type *Type = Int8PtrTy;
    211 
    212     DstPtr = Builder.CreateBitCast(DstPtr, Type);
    213     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
    214     return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy),
    215                                            DstPtr, SrcPtr));
    216   }
    217   case Builtin::BI__builtin_abs:
    218   case Builtin::BI__builtin_labs:
    219   case Builtin::BI__builtin_llabs: {
    220     Value *ArgValue = EmitScalarExpr(E->getArg(0));
    221 
    222     Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
    223     Value *CmpResult =
    224     Builder.CreateICmpSGE(ArgValue,
    225                           llvm::Constant::getNullValue(ArgValue->getType()),
    226                                                             "abscond");
    227     Value *Result =
    228       Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
    229 
    230     return RValue::get(Result);
    231   }
    232 
    233   case Builtin::BI__builtin_conj:
    234   case Builtin::BI__builtin_conjf:
    235   case Builtin::BI__builtin_conjl: {
    236     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
    237     Value *Real = ComplexVal.first;
    238     Value *Imag = ComplexVal.second;
    239     Value *Zero =
    240       Imag->getType()->isFPOrFPVectorTy()
    241         ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
    242         : llvm::Constant::getNullValue(Imag->getType());
    243 
    244     Imag = Builder.CreateFSub(Zero, Imag, "sub");
    245     return RValue::getComplex(std::make_pair(Real, Imag));
    246   }
    247   case Builtin::BI__builtin_creal:
    248   case Builtin::BI__builtin_crealf:
    249   case Builtin::BI__builtin_creall: {
    250     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
    251     return RValue::get(ComplexVal.first);
    252   }
    253 
    254   case Builtin::BI__builtin_cimag:
    255   case Builtin::BI__builtin_cimagf:
    256   case Builtin::BI__builtin_cimagl: {
    257     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
    258     return RValue::get(ComplexVal.second);
    259   }
    260 
    261   case Builtin::BI__builtin_ctzs:
    262   case Builtin::BI__builtin_ctz:
    263   case Builtin::BI__builtin_ctzl:
    264   case Builtin::BI__builtin_ctzll: {
    265     Value *ArgValue = EmitScalarExpr(E->getArg(0));
    266 
    267     llvm::Type *ArgType = ArgValue->getType();
    268     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
    269 
    270     llvm::Type *ResultType = ConvertType(E->getType());
    271     Value *ZeroUndef = Builder.getInt1(Target.isCLZForZeroUndef());
    272     Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef);
    273     if (Result->getType() != ResultType)
    274       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
    275                                      "cast");
    276     return RValue::get(Result);
    277   }
    278   case Builtin::BI__builtin_clzs:
    279   case Builtin::BI__builtin_clz:
    280   case Builtin::BI__builtin_clzl:
    281   case Builtin::BI__builtin_clzll: {
    282     Value *ArgValue = EmitScalarExpr(E->getArg(0));
    283 
    284     llvm::Type *ArgType = ArgValue->getType();
    285     Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
    286 
    287     llvm::Type *ResultType = ConvertType(E->getType());
    288     Value *ZeroUndef = Builder.getInt1(Target.isCLZForZeroUndef());
    289     Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef);
    290     if (Result->getType() != ResultType)
    291       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
    292                                      "cast");
    293     return RValue::get(Result);
    294   }
    295   case Builtin::BI__builtin_ffs:
    296   case Builtin::BI__builtin_ffsl:
    297   case Builtin::BI__builtin_ffsll: {
    298     // ffs(x) -> x ? cttz(x) + 1 : 0
    299     Value *ArgValue = EmitScalarExpr(E->getArg(0));
    300 
    301     llvm::Type *ArgType = ArgValue->getType();
    302     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
    303 
    304     llvm::Type *ResultType = ConvertType(E->getType());
    305     Value *Tmp = Builder.CreateAdd(Builder.CreateCall2(F, ArgValue,
    306                                                        Builder.getTrue()),
    307                                    llvm::ConstantInt::get(ArgType, 1));
    308     Value *Zero = llvm::Constant::getNullValue(ArgType);
    309     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
    310     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
    311     if (Result->getType() != ResultType)
    312       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
    313                                      "cast");
    314     return RValue::get(Result);
    315   }
    316   case Builtin::BI__builtin_parity:
    317   case Builtin::BI__builtin_parityl:
    318   case Builtin::BI__builtin_parityll: {
    319     // parity(x) -> ctpop(x) & 1
    320     Value *ArgValue = EmitScalarExpr(E->getArg(0));
    321 
    322     llvm::Type *ArgType = ArgValue->getType();
    323     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
    324 
    325     llvm::Type *ResultType = ConvertType(E->getType());
    326     Value *Tmp = Builder.CreateCall(F, ArgValue);
    327     Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
    328     if (Result->getType() != ResultType)
    329       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
    330                                      "cast");
    331     return RValue::get(Result);
    332   }
    333   case Builtin::BI__builtin_popcount:
    334   case Builtin::BI__builtin_popcountl:
    335   case Builtin::BI__builtin_popcountll: {
    336     Value *ArgValue = EmitScalarExpr(E->getArg(0));
    337 
    338     llvm::Type *ArgType = ArgValue->getType();
    339     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
    340 
    341     llvm::Type *ResultType = ConvertType(E->getType());
    342     Value *Result = Builder.CreateCall(F, ArgValue);
    343     if (Result->getType() != ResultType)
    344       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
    345                                      "cast");
    346     return RValue::get(Result);
    347   }
    348   case Builtin::BI__builtin_expect: {
    349     Value *ArgValue = EmitScalarExpr(E->getArg(0));
    350     llvm::Type *ArgType = ArgValue->getType();
    351 
    352     Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
    353     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
    354 
    355     Value *Result = Builder.CreateCall2(FnExpect, ArgValue, ExpectedValue,
    356                                         "expval");
    357     return RValue::get(Result);
    358   }
    359   case Builtin::BI__builtin_bswap32:
    360   case Builtin::BI__builtin_bswap64: {
    361     Value *ArgValue = EmitScalarExpr(E->getArg(0));
    362     llvm::Type *ArgType = ArgValue->getType();
    363     Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType);
    364     return RValue::get(Builder.CreateCall(F, ArgValue));
    365   }
    366   case Builtin::BI__builtin_object_size: {
    367     // We rely on constant folding to deal with expressions with side effects.
    368     assert(!E->getArg(0)->HasSideEffects(getContext()) &&
    369            "should have been constant folded");
    370 
    371     // We pass this builtin onto the optimizer so that it can
    372     // figure out the object size in more complex cases.
    373     llvm::Type *ResType = ConvertType(E->getType());
    374 
    375     // LLVM only supports 0 and 2, make sure that we pass along that
    376     // as a boolean.
    377     Value *Ty = EmitScalarExpr(E->getArg(1));
    378     ConstantInt *CI = dyn_cast<ConstantInt>(Ty);
    379     assert(CI);
    380     uint64_t val = CI->getZExtValue();
    381     CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1);
    382 
    383     Value *F = CGM.getIntrinsic(Intrinsic::objectsize, ResType);
    384     return RValue::get(Builder.CreateCall2(F, EmitScalarExpr(E->getArg(0)),CI));
    385   }
    386   case Builtin::BI__builtin_prefetch: {
    387     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
    388     // FIXME: Technically these constants should of type 'int', yes?
    389     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
    390       llvm::ConstantInt::get(Int32Ty, 0);
    391     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
    392       llvm::ConstantInt::get(Int32Ty, 3);
    393     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
    394     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
    395     return RValue::get(Builder.CreateCall4(F, Address, RW, Locality, Data));
    396   }
    397   case Builtin::BI__builtin_readcyclecounter: {
    398     Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
    399     return RValue::get(Builder.CreateCall(F));
    400   }
    401   case Builtin::BI__builtin_trap: {
    402     Value *F = CGM.getIntrinsic(Intrinsic::trap);
    403     return RValue::get(Builder.CreateCall(F));
    404   }
    405   case Builtin::BI__builtin_unreachable: {
    406     if (CatchUndefined)
    407       EmitCheck(Builder.getFalse());
    408     else
    409       Builder.CreateUnreachable();
    410 
    411     // We do need to preserve an insertion point.
    412     EmitBlock(createBasicBlock("unreachable.cont"));
    413 
    414     return RValue::get(0);
    415   }
    416 
    417   case Builtin::BI__builtin_powi:
    418   case Builtin::BI__builtin_powif:
    419   case Builtin::BI__builtin_powil: {
    420     Value *Base = EmitScalarExpr(E->getArg(0));
    421     Value *Exponent = EmitScalarExpr(E->getArg(1));
    422     llvm::Type *ArgType = Base->getType();
    423     Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
    424     return RValue::get(Builder.CreateCall2(F, Base, Exponent));
    425   }
    426 
    427   case Builtin::BI__builtin_isgreater:
    428   case Builtin::BI__builtin_isgreaterequal:
    429   case Builtin::BI__builtin_isless:
    430   case Builtin::BI__builtin_islessequal:
    431   case Builtin::BI__builtin_islessgreater:
    432   case Builtin::BI__builtin_isunordered: {
    433     // Ordered comparisons: we know the arguments to these are matching scalar
    434     // floating point values.
    435     Value *LHS = EmitScalarExpr(E->getArg(0));
    436     Value *RHS = EmitScalarExpr(E->getArg(1));
    437 
    438     switch (BuiltinID) {
    439     default: llvm_unreachable("Unknown ordered comparison");
    440     case Builtin::BI__builtin_isgreater:
    441       LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
    442       break;
    443     case Builtin::BI__builtin_isgreaterequal:
    444       LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
    445       break;
    446     case Builtin::BI__builtin_isless:
    447       LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
    448       break;
    449     case Builtin::BI__builtin_islessequal:
    450       LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
    451       break;
    452     case Builtin::BI__builtin_islessgreater:
    453       LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
    454       break;
    455     case Builtin::BI__builtin_isunordered:
    456       LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
    457       break;
    458     }
    459     // ZExt bool to int type.
    460     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
    461   }
    462   case Builtin::BI__builtin_isnan: {
    463     Value *V = EmitScalarExpr(E->getArg(0));
    464     V = Builder.CreateFCmpUNO(V, V, "cmp");
    465     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
    466   }
    467 
    468   case Builtin::BI__builtin_isinf: {
    469     // isinf(x) --> fabs(x) == infinity
    470     Value *V = EmitScalarExpr(E->getArg(0));
    471     V = EmitFAbs(*this, V, E->getArg(0)->getType());
    472 
    473     V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf");
    474     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
    475   }
    476 
    477   // TODO: BI__builtin_isinf_sign
    478   //   isinf_sign(x) -> isinf(x) ? (signbit(x) ? -1 : 1) : 0
    479 
    480   case Builtin::BI__builtin_isnormal: {
    481     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
    482     Value *V = EmitScalarExpr(E->getArg(0));
    483     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
    484 
    485     Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
    486     Value *IsLessThanInf =
    487       Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
    488     APFloat Smallest = APFloat::getSmallestNormalized(
    489                    getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
    490     Value *IsNormal =
    491       Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
    492                             "isnormal");
    493     V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
    494     V = Builder.CreateAnd(V, IsNormal, "and");
    495     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
    496   }
    497 
    498   case Builtin::BI__builtin_isfinite: {
    499     // isfinite(x) --> x == x && fabs(x) != infinity;
    500     Value *V = EmitScalarExpr(E->getArg(0));
    501     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
    502 
    503     Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
    504     Value *IsNotInf =
    505       Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
    506 
    507     V = Builder.CreateAnd(Eq, IsNotInf, "and");
    508     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
    509   }
    510 
    511   case Builtin::BI__builtin_fpclassify: {
    512     Value *V = EmitScalarExpr(E->getArg(5));
    513     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
    514 
    515     // Create Result
    516     BasicBlock *Begin = Builder.GetInsertBlock();
    517     BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
    518     Builder.SetInsertPoint(End);
    519     PHINode *Result =
    520       Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
    521                         "fpclassify_result");
    522 
    523     // if (V==0) return FP_ZERO
    524     Builder.SetInsertPoint(Begin);
    525     Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
    526                                           "iszero");
    527     Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
    528     BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
    529     Builder.CreateCondBr(IsZero, End, NotZero);
    530     Result->addIncoming(ZeroLiteral, Begin);
    531 
    532     // if (V != V) return FP_NAN
    533     Builder.SetInsertPoint(NotZero);
    534     Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
    535     Value *NanLiteral = EmitScalarExpr(E->getArg(0));
    536     BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
    537     Builder.CreateCondBr(IsNan, End, NotNan);
    538     Result->addIncoming(NanLiteral, NotZero);
    539 
    540     // if (fabs(V) == infinity) return FP_INFINITY
    541     Builder.SetInsertPoint(NotNan);
    542     Value *VAbs = EmitFAbs(*this, V, E->getArg(5)->getType());
    543     Value *IsInf =
    544       Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
    545                             "isinf");
    546     Value *InfLiteral = EmitScalarExpr(E->getArg(1));
    547     BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
    548     Builder.CreateCondBr(IsInf, End, NotInf);
    549     Result->addIncoming(InfLiteral, NotNan);
    550 
    551     // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
    552     Builder.SetInsertPoint(NotInf);
    553     APFloat Smallest = APFloat::getSmallestNormalized(
    554         getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
    555     Value *IsNormal =
    556       Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
    557                             "isnormal");
    558     Value *NormalResult =
    559       Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
    560                            EmitScalarExpr(E->getArg(3)));
    561     Builder.CreateBr(End);
    562     Result->addIncoming(NormalResult, NotInf);
    563 
    564     // return Result
    565     Builder.SetInsertPoint(End);
    566     return RValue::get(Result);
    567   }
    568 
    569   case Builtin::BIalloca:
    570   case Builtin::BI__builtin_alloca: {
    571     Value *Size = EmitScalarExpr(E->getArg(0));
    572     return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size));
    573   }
    574   case Builtin::BIbzero:
    575   case Builtin::BI__builtin_bzero: {
    576     std::pair<llvm::Value*, unsigned> Dest =
    577         EmitPointerWithAlignment(E->getArg(0));
    578     Value *SizeVal = EmitScalarExpr(E->getArg(1));
    579     Builder.CreateMemSet(Dest.first, Builder.getInt8(0), SizeVal,
    580                          Dest.second, false);
    581     return RValue::get(Dest.first);
    582   }
    583   case Builtin::BImemcpy:
    584   case Builtin::BI__builtin_memcpy: {
    585     std::pair<llvm::Value*, unsigned> Dest =
    586         EmitPointerWithAlignment(E->getArg(0));
    587     std::pair<llvm::Value*, unsigned> Src =
    588         EmitPointerWithAlignment(E->getArg(1));
    589     Value *SizeVal = EmitScalarExpr(E->getArg(2));
    590     unsigned Align = std::min(Dest.second, Src.second);
    591     Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false);
    592     return RValue::get(Dest.first);
    593   }
    594 
    595   case Builtin::BI__builtin___memcpy_chk: {
    596     // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
    597     llvm::APSInt Size, DstSize;
    598     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
    599         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
    600       break;
    601     if (Size.ugt(DstSize))
    602       break;
    603     std::pair<llvm::Value*, unsigned> Dest =
    604         EmitPointerWithAlignment(E->getArg(0));
    605     std::pair<llvm::Value*, unsigned> Src =
    606         EmitPointerWithAlignment(E->getArg(1));
    607     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
    608     unsigned Align = std::min(Dest.second, Src.second);
    609     Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false);
    610     return RValue::get(Dest.first);
    611   }
    612 
    613   case Builtin::BI__builtin_objc_memmove_collectable: {
    614     Value *Address = EmitScalarExpr(E->getArg(0));
    615     Value *SrcAddr = EmitScalarExpr(E->getArg(1));
    616     Value *SizeVal = EmitScalarExpr(E->getArg(2));
    617     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
    618                                                   Address, SrcAddr, SizeVal);
    619     return RValue::get(Address);
    620   }
    621 
    622   case Builtin::BI__builtin___memmove_chk: {
    623     // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
    624     llvm::APSInt Size, DstSize;
    625     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
    626         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
    627       break;
    628     if (Size.ugt(DstSize))
    629       break;
    630     std::pair<llvm::Value*, unsigned> Dest =
    631         EmitPointerWithAlignment(E->getArg(0));
    632     std::pair<llvm::Value*, unsigned> Src =
    633         EmitPointerWithAlignment(E->getArg(1));
    634     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
    635     unsigned Align = std::min(Dest.second, Src.second);
    636     Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false);
    637     return RValue::get(Dest.first);
    638   }
    639 
    640   case Builtin::BImemmove:
    641   case Builtin::BI__builtin_memmove: {
    642     std::pair<llvm::Value*, unsigned> Dest =
    643         EmitPointerWithAlignment(E->getArg(0));
    644     std::pair<llvm::Value*, unsigned> Src =
    645         EmitPointerWithAlignment(E->getArg(1));
    646     Value *SizeVal = EmitScalarExpr(E->getArg(2));
    647     unsigned Align = std::min(Dest.second, Src.second);
    648     Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false);
    649     return RValue::get(Dest.first);
    650   }
    651   case Builtin::BImemset:
    652   case Builtin::BI__builtin_memset: {
    653     std::pair<llvm::Value*, unsigned> Dest =
    654         EmitPointerWithAlignment(E->getArg(0));
    655     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
    656                                          Builder.getInt8Ty());
    657     Value *SizeVal = EmitScalarExpr(E->getArg(2));
    658     Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false);
    659     return RValue::get(Dest.first);
    660   }
    661   case Builtin::BI__builtin___memset_chk: {
    662     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
    663     llvm::APSInt Size, DstSize;
    664     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
    665         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
    666       break;
    667     if (Size.ugt(DstSize))
    668       break;
    669     std::pair<llvm::Value*, unsigned> Dest =
    670         EmitPointerWithAlignment(E->getArg(0));
    671     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
    672                                          Builder.getInt8Ty());
    673     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
    674     Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false);
    675     return RValue::get(Dest.first);
    676   }
    677   case Builtin::BI__builtin_dwarf_cfa: {
    678     // The offset in bytes from the first argument to the CFA.
    679     //
    680     // Why on earth is this in the frontend?  Is there any reason at
    681     // all that the backend can't reasonably determine this while
    682     // lowering llvm.eh.dwarf.cfa()?
    683     //
    684     // TODO: If there's a satisfactory reason, add a target hook for
    685     // this instead of hard-coding 0, which is correct for most targets.
    686     int32_t Offset = 0;
    687 
    688     Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
    689     return RValue::get(Builder.CreateCall(F,
    690                                       llvm::ConstantInt::get(Int32Ty, Offset)));
    691   }
    692   case Builtin::BI__builtin_return_address: {
    693     Value *Depth = EmitScalarExpr(E->getArg(0));
    694     Depth = Builder.CreateIntCast(Depth, Int32Ty, false);
    695     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
    696     return RValue::get(Builder.CreateCall(F, Depth));
    697   }
    698   case Builtin::BI__builtin_frame_address: {
    699     Value *Depth = EmitScalarExpr(E->getArg(0));
    700     Depth = Builder.CreateIntCast(Depth, Int32Ty, false);
    701     Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
    702     return RValue::get(Builder.CreateCall(F, Depth));
    703   }
    704   case Builtin::BI__builtin_extract_return_addr: {
    705     Value *Address = EmitScalarExpr(E->getArg(0));
    706     Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
    707     return RValue::get(Result);
    708   }
    709   case Builtin::BI__builtin_frob_return_addr: {
    710     Value *Address = EmitScalarExpr(E->getArg(0));
    711     Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
    712     return RValue::get(Result);
    713   }
    714   case Builtin::BI__builtin_dwarf_sp_column: {
    715     llvm::IntegerType *Ty
    716       = cast<llvm::IntegerType>(ConvertType(E->getType()));
    717     int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
    718     if (Column == -1) {
    719       CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
    720       return RValue::get(llvm::UndefValue::get(Ty));
    721     }
    722     return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
    723   }
    724   case Builtin::BI__builtin_init_dwarf_reg_size_table: {
    725     Value *Address = EmitScalarExpr(E->getArg(0));
    726     if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
    727       CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
    728     return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
    729   }
    730   case Builtin::BI__builtin_eh_return: {
    731     Value *Int = EmitScalarExpr(E->getArg(0));
    732     Value *Ptr = EmitScalarExpr(E->getArg(1));
    733 
    734     llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
    735     assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
    736            "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
    737     Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
    738                                   ? Intrinsic::eh_return_i32
    739                                   : Intrinsic::eh_return_i64);
    740     Builder.CreateCall2(F, Int, Ptr);
    741     Builder.CreateUnreachable();
    742 
    743     // We do need to preserve an insertion point.
    744     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
    745 
    746     return RValue::get(0);
    747   }
    748   case Builtin::BI__builtin_unwind_init: {
    749     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
    750     return RValue::get(Builder.CreateCall(F));
    751   }
    752   case Builtin::BI__builtin_extend_pointer: {
    753     // Extends a pointer to the size of an _Unwind_Word, which is
    754     // uint64_t on all platforms.  Generally this gets poked into a
    755     // register and eventually used as an address, so if the
    756     // addressing registers are wider than pointers and the platform
    757     // doesn't implicitly ignore high-order bits when doing
    758     // addressing, we need to make sure we zext / sext based on
    759     // the platform's expectations.
    760     //
    761     // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
    762 
    763     // Cast the pointer to intptr_t.
    764     Value *Ptr = EmitScalarExpr(E->getArg(0));
    765     Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
    766 
    767     // If that's 64 bits, we're done.
    768     if (IntPtrTy->getBitWidth() == 64)
    769       return RValue::get(Result);
    770 
    771     // Otherwise, ask the codegen data what to do.
    772     if (getTargetHooks().extendPointerWithSExt())
    773       return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
    774     else
    775       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
    776   }
    777   case Builtin::BI__builtin_setjmp: {
    778     // Buffer is a void**.
    779     Value *Buf = EmitScalarExpr(E->getArg(0));
    780 
    781     // Store the frame pointer to the setjmp buffer.
    782     Value *FrameAddr =
    783       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
    784                          ConstantInt::get(Int32Ty, 0));
    785     Builder.CreateStore(FrameAddr, Buf);
    786 
    787     // Store the stack pointer to the setjmp buffer.
    788     Value *StackAddr =
    789       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
    790     Value *StackSaveSlot =
    791       Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2));
    792     Builder.CreateStore(StackAddr, StackSaveSlot);
    793 
    794     // Call LLVM's EH setjmp, which is lightweight.
    795     Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
    796     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
    797     return RValue::get(Builder.CreateCall(F, Buf));
    798   }
    799   case Builtin::BI__builtin_longjmp: {
    800     Value *Buf = EmitScalarExpr(E->getArg(0));
    801     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
    802 
    803     // Call LLVM's EH longjmp, which is lightweight.
    804     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
    805 
    806     // longjmp doesn't return; mark this as unreachable.
    807     Builder.CreateUnreachable();
    808 
    809     // We do need to preserve an insertion point.
    810     EmitBlock(createBasicBlock("longjmp.cont"));
    811 
    812     return RValue::get(0);
    813   }
    814   case Builtin::BI__sync_fetch_and_add:
    815   case Builtin::BI__sync_fetch_and_sub:
    816   case Builtin::BI__sync_fetch_and_or:
    817   case Builtin::BI__sync_fetch_and_and:
    818   case Builtin::BI__sync_fetch_and_xor:
    819   case Builtin::BI__sync_add_and_fetch:
    820   case Builtin::BI__sync_sub_and_fetch:
    821   case Builtin::BI__sync_and_and_fetch:
    822   case Builtin::BI__sync_or_and_fetch:
    823   case Builtin::BI__sync_xor_and_fetch:
    824   case Builtin::BI__sync_val_compare_and_swap:
    825   case Builtin::BI__sync_bool_compare_and_swap:
    826   case Builtin::BI__sync_lock_test_and_set:
    827   case Builtin::BI__sync_lock_release:
    828   case Builtin::BI__sync_swap:
    829     llvm_unreachable("Shouldn't make it through sema");
    830   case Builtin::BI__sync_fetch_and_add_1:
    831   case Builtin::BI__sync_fetch_and_add_2:
    832   case Builtin::BI__sync_fetch_and_add_4:
    833   case Builtin::BI__sync_fetch_and_add_8:
    834   case Builtin::BI__sync_fetch_and_add_16:
    835     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
    836   case Builtin::BI__sync_fetch_and_sub_1:
    837   case Builtin::BI__sync_fetch_and_sub_2:
    838   case Builtin::BI__sync_fetch_and_sub_4:
    839   case Builtin::BI__sync_fetch_and_sub_8:
    840   case Builtin::BI__sync_fetch_and_sub_16:
    841     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
    842   case Builtin::BI__sync_fetch_and_or_1:
    843   case Builtin::BI__sync_fetch_and_or_2:
    844   case Builtin::BI__sync_fetch_and_or_4:
    845   case Builtin::BI__sync_fetch_and_or_8:
    846   case Builtin::BI__sync_fetch_and_or_16:
    847     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
    848   case Builtin::BI__sync_fetch_and_and_1:
    849   case Builtin::BI__sync_fetch_and_and_2:
    850   case Builtin::BI__sync_fetch_and_and_4:
    851   case Builtin::BI__sync_fetch_and_and_8:
    852   case Builtin::BI__sync_fetch_and_and_16:
    853     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
    854   case Builtin::BI__sync_fetch_and_xor_1:
    855   case Builtin::BI__sync_fetch_and_xor_2:
    856   case Builtin::BI__sync_fetch_and_xor_4:
    857   case Builtin::BI__sync_fetch_and_xor_8:
    858   case Builtin::BI__sync_fetch_and_xor_16:
    859     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
    860 
    861   // Clang extensions: not overloaded yet.
    862   case Builtin::BI__sync_fetch_and_min:
    863     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
    864   case Builtin::BI__sync_fetch_and_max:
    865     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
    866   case Builtin::BI__sync_fetch_and_umin:
    867     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
    868   case Builtin::BI__sync_fetch_and_umax:
    869     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
    870 
    871   case Builtin::BI__sync_add_and_fetch_1:
    872   case Builtin::BI__sync_add_and_fetch_2:
    873   case Builtin::BI__sync_add_and_fetch_4:
    874   case Builtin::BI__sync_add_and_fetch_8:
    875   case Builtin::BI__sync_add_and_fetch_16:
    876     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
    877                                 llvm::Instruction::Add);
    878   case Builtin::BI__sync_sub_and_fetch_1:
    879   case Builtin::BI__sync_sub_and_fetch_2:
    880   case Builtin::BI__sync_sub_and_fetch_4:
    881   case Builtin::BI__sync_sub_and_fetch_8:
    882   case Builtin::BI__sync_sub_and_fetch_16:
    883     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
    884                                 llvm::Instruction::Sub);
    885   case Builtin::BI__sync_and_and_fetch_1:
    886   case Builtin::BI__sync_and_and_fetch_2:
    887   case Builtin::BI__sync_and_and_fetch_4:
    888   case Builtin::BI__sync_and_and_fetch_8:
    889   case Builtin::BI__sync_and_and_fetch_16:
    890     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
    891                                 llvm::Instruction::And);
    892   case Builtin::BI__sync_or_and_fetch_1:
    893   case Builtin::BI__sync_or_and_fetch_2:
    894   case Builtin::BI__sync_or_and_fetch_4:
    895   case Builtin::BI__sync_or_and_fetch_8:
    896   case Builtin::BI__sync_or_and_fetch_16:
    897     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
    898                                 llvm::Instruction::Or);
    899   case Builtin::BI__sync_xor_and_fetch_1:
    900   case Builtin::BI__sync_xor_and_fetch_2:
    901   case Builtin::BI__sync_xor_and_fetch_4:
    902   case Builtin::BI__sync_xor_and_fetch_8:
    903   case Builtin::BI__sync_xor_and_fetch_16:
    904     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
    905                                 llvm::Instruction::Xor);
    906 
    907   case Builtin::BI__sync_val_compare_and_swap_1:
    908   case Builtin::BI__sync_val_compare_and_swap_2:
    909   case Builtin::BI__sync_val_compare_and_swap_4:
    910   case Builtin::BI__sync_val_compare_and_swap_8:
    911   case Builtin::BI__sync_val_compare_and_swap_16: {
    912     QualType T = E->getType();
    913     llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0));
    914     unsigned AddrSpace =
    915       cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
    916 
    917     llvm::IntegerType *IntType =
    918       llvm::IntegerType::get(getLLVMContext(),
    919                              getContext().getTypeSize(T));
    920     llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
    921 
    922     Value *Args[3];
    923     Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType);
    924     Args[1] = EmitScalarExpr(E->getArg(1));
    925     llvm::Type *ValueType = Args[1]->getType();
    926     Args[1] = EmitToInt(*this, Args[1], T, IntType);
    927     Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
    928 
    929     Value *Result = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
    930                                                 llvm::SequentiallyConsistent);
    931     Result = EmitFromInt(*this, Result, T, ValueType);
    932     return RValue::get(Result);
    933   }
    934 
    935   case Builtin::BI__sync_bool_compare_and_swap_1:
    936   case Builtin::BI__sync_bool_compare_and_swap_2:
    937   case Builtin::BI__sync_bool_compare_and_swap_4:
    938   case Builtin::BI__sync_bool_compare_and_swap_8:
    939   case Builtin::BI__sync_bool_compare_and_swap_16: {
    940     QualType T = E->getArg(1)->getType();
    941     llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0));
    942     unsigned AddrSpace =
    943       cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
    944 
    945     llvm::IntegerType *IntType =
    946       llvm::IntegerType::get(getLLVMContext(),
    947                              getContext().getTypeSize(T));
    948     llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
    949 
    950     Value *Args[3];
    951     Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType);
    952     Args[1] = EmitToInt(*this, EmitScalarExpr(E->getArg(1)), T, IntType);
    953     Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
    954 
    955     Value *OldVal = Args[1];
    956     Value *PrevVal = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
    957                                                  llvm::SequentiallyConsistent);
    958     Value *Result = Builder.CreateICmpEQ(PrevVal, OldVal);
    959     // zext bool to int.
    960     Result = Builder.CreateZExt(Result, ConvertType(E->getType()));
    961     return RValue::get(Result);
    962   }
    963 
    964   case Builtin::BI__sync_swap_1:
    965   case Builtin::BI__sync_swap_2:
    966   case Builtin::BI__sync_swap_4:
    967   case Builtin::BI__sync_swap_8:
    968   case Builtin::BI__sync_swap_16:
    969     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
    970 
    971   case Builtin::BI__sync_lock_test_and_set_1:
    972   case Builtin::BI__sync_lock_test_and_set_2:
    973   case Builtin::BI__sync_lock_test_and_set_4:
    974   case Builtin::BI__sync_lock_test_and_set_8:
    975   case Builtin::BI__sync_lock_test_and_set_16:
    976     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
    977 
    978   case Builtin::BI__sync_lock_release_1:
    979   case Builtin::BI__sync_lock_release_2:
    980   case Builtin::BI__sync_lock_release_4:
    981   case Builtin::BI__sync_lock_release_8:
    982   case Builtin::BI__sync_lock_release_16: {
    983     Value *Ptr = EmitScalarExpr(E->getArg(0));
    984     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
    985     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
    986     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
    987                                              StoreSize.getQuantity() * 8);
    988     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
    989     llvm::StoreInst *Store =
    990       Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
    991     Store->setAlignment(StoreSize.getQuantity());
    992     Store->setAtomic(llvm::Release);
    993     return RValue::get(0);
    994   }
    995 
    996   case Builtin::BI__sync_synchronize: {
    997     // We assume this is supposed to correspond to a C++0x-style
    998     // sequentially-consistent fence (i.e. this is only usable for
    999     // synchonization, not device I/O or anything like that). This intrinsic
   1000     // is really badly designed in the sense that in theory, there isn't
   1001     // any way to safely use it... but in practice, it mostly works
   1002     // to use it with non-atomic loads and stores to get acquire/release
   1003     // semantics.
   1004     Builder.CreateFence(llvm::SequentiallyConsistent);
   1005     return RValue::get(0);
   1006   }
   1007 
   1008   case Builtin::BI__c11_atomic_is_lock_free:
   1009   case Builtin::BI__atomic_is_lock_free: {
   1010     // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
   1011     // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
   1012     // _Atomic(T) is always properly-aligned.
   1013     const char *LibCallName = "__atomic_is_lock_free";
   1014     CallArgList Args;
   1015     Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
   1016              getContext().getSizeType());
   1017     if (BuiltinID == Builtin::BI__atomic_is_lock_free)
   1018       Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
   1019                getContext().VoidPtrTy);
   1020     else
   1021       Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
   1022                getContext().VoidPtrTy);
   1023     const CGFunctionInfo &FuncInfo =
   1024         CGM.getTypes().arrangeFreeFunctionCall(E->getType(), Args,
   1025                                                FunctionType::ExtInfo(),
   1026                                                RequiredArgs::All);
   1027     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
   1028     llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
   1029     return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
   1030   }
   1031 
   1032   case Builtin::BI__atomic_test_and_set: {
   1033     // Look at the argument type to determine whether this is a volatile
   1034     // operation. The parameter type is always volatile.
   1035     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
   1036     bool Volatile =
   1037         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
   1038 
   1039     Value *Ptr = EmitScalarExpr(E->getArg(0));
   1040     unsigned AddrSpace =
   1041         cast<llvm::PointerType>(Ptr->getType())->getAddressSpace();
   1042     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
   1043     Value *NewVal = Builder.getInt8(1);
   1044     Value *Order = EmitScalarExpr(E->getArg(1));
   1045     if (isa<llvm::ConstantInt>(Order)) {
   1046       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
   1047       AtomicRMWInst *Result = 0;
   1048       switch (ord) {
   1049       case 0:  // memory_order_relaxed
   1050       default: // invalid order
   1051         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
   1052                                          Ptr, NewVal,
   1053                                          llvm::Monotonic);
   1054         break;
   1055       case 1:  // memory_order_consume
   1056       case 2:  // memory_order_acquire
   1057         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
   1058                                          Ptr, NewVal,
   1059                                          llvm::Acquire);
   1060         break;
   1061       case 3:  // memory_order_release
   1062         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
   1063                                          Ptr, NewVal,
   1064                                          llvm::Release);
   1065         break;
   1066       case 4:  // memory_order_acq_rel
   1067         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
   1068                                          Ptr, NewVal,
   1069                                          llvm::AcquireRelease);
   1070         break;
   1071       case 5:  // memory_order_seq_cst
   1072         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
   1073                                          Ptr, NewVal,
   1074                                          llvm::SequentiallyConsistent);
   1075         break;
   1076       }
   1077       Result->setVolatile(Volatile);
   1078       return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
   1079     }
   1080 
   1081     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
   1082 
   1083     llvm::BasicBlock *BBs[5] = {
   1084       createBasicBlock("monotonic", CurFn),
   1085       createBasicBlock("acquire", CurFn),
   1086       createBasicBlock("release", CurFn),
   1087       createBasicBlock("acqrel", CurFn),
   1088       createBasicBlock("seqcst", CurFn)
   1089     };
   1090     llvm::AtomicOrdering Orders[5] = {
   1091       llvm::Monotonic, llvm::Acquire, llvm::Release,
   1092       llvm::AcquireRelease, llvm::SequentiallyConsistent
   1093     };
   1094 
   1095     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
   1096     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
   1097 
   1098     Builder.SetInsertPoint(ContBB);
   1099     PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
   1100 
   1101     for (unsigned i = 0; i < 5; ++i) {
   1102       Builder.SetInsertPoint(BBs[i]);
   1103       AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
   1104                                                    Ptr, NewVal, Orders[i]);
   1105       RMW->setVolatile(Volatile);
   1106       Result->addIncoming(RMW, BBs[i]);
   1107       Builder.CreateBr(ContBB);
   1108     }
   1109 
   1110     SI->addCase(Builder.getInt32(0), BBs[0]);
   1111     SI->addCase(Builder.getInt32(1), BBs[1]);
   1112     SI->addCase(Builder.getInt32(2), BBs[1]);
   1113     SI->addCase(Builder.getInt32(3), BBs[2]);
   1114     SI->addCase(Builder.getInt32(4), BBs[3]);
   1115     SI->addCase(Builder.getInt32(5), BBs[4]);
   1116 
   1117     Builder.SetInsertPoint(ContBB);
   1118     return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
   1119   }
   1120 
   1121   case Builtin::BI__atomic_clear: {
   1122     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
   1123     bool Volatile =
   1124         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
   1125 
   1126     Value *Ptr = EmitScalarExpr(E->getArg(0));
   1127     unsigned AddrSpace =
   1128         cast<llvm::PointerType>(Ptr->getType())->getAddressSpace();
   1129     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
   1130     Value *NewVal = Builder.getInt8(0);
   1131     Value *Order = EmitScalarExpr(E->getArg(1));
   1132     if (isa<llvm::ConstantInt>(Order)) {
   1133       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
   1134       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
   1135       Store->setAlignment(1);
   1136       switch (ord) {
   1137       case 0:  // memory_order_relaxed
   1138       default: // invalid order
   1139         Store->setOrdering(llvm::Monotonic);
   1140         break;
   1141       case 3:  // memory_order_release
   1142         Store->setOrdering(llvm::Release);
   1143         break;
   1144       case 5:  // memory_order_seq_cst
   1145         Store->setOrdering(llvm::SequentiallyConsistent);
   1146         break;
   1147       }
   1148       return RValue::get(0);
   1149     }
   1150 
   1151     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
   1152 
   1153     llvm::BasicBlock *BBs[3] = {
   1154       createBasicBlock("monotonic", CurFn),
   1155       createBasicBlock("release", CurFn),
   1156       createBasicBlock("seqcst", CurFn)
   1157     };
   1158     llvm::AtomicOrdering Orders[3] = {
   1159       llvm::Monotonic, llvm::Release, llvm::SequentiallyConsistent
   1160     };
   1161 
   1162     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
   1163     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
   1164 
   1165     for (unsigned i = 0; i < 3; ++i) {
   1166       Builder.SetInsertPoint(BBs[i]);
   1167       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
   1168       Store->setAlignment(1);
   1169       Store->setOrdering(Orders[i]);
   1170       Builder.CreateBr(ContBB);
   1171     }
   1172 
   1173     SI->addCase(Builder.getInt32(0), BBs[0]);
   1174     SI->addCase(Builder.getInt32(3), BBs[1]);
   1175     SI->addCase(Builder.getInt32(5), BBs[2]);
   1176 
   1177     Builder.SetInsertPoint(ContBB);
   1178     return RValue::get(0);
   1179   }
   1180 
   1181   case Builtin::BI__atomic_thread_fence:
   1182   case Builtin::BI__atomic_signal_fence:
   1183   case Builtin::BI__c11_atomic_thread_fence:
   1184   case Builtin::BI__c11_atomic_signal_fence: {
   1185     llvm::SynchronizationScope Scope;
   1186     if (BuiltinID == Builtin::BI__atomic_signal_fence ||
   1187         BuiltinID == Builtin::BI__c11_atomic_signal_fence)
   1188       Scope = llvm::SingleThread;
   1189     else
   1190       Scope = llvm::CrossThread;
   1191     Value *Order = EmitScalarExpr(E->getArg(0));
   1192     if (isa<llvm::ConstantInt>(Order)) {
   1193       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
   1194       switch (ord) {
   1195       case 0:  // memory_order_relaxed
   1196       default: // invalid order
   1197         break;
   1198       case 1:  // memory_order_consume
   1199       case 2:  // memory_order_acquire
   1200         Builder.CreateFence(llvm::Acquire, Scope);
   1201         break;
   1202       case 3:  // memory_order_release
   1203         Builder.CreateFence(llvm::Release, Scope);
   1204         break;
   1205       case 4:  // memory_order_acq_rel
   1206         Builder.CreateFence(llvm::AcquireRelease, Scope);
   1207         break;
   1208       case 5:  // memory_order_seq_cst
   1209         Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
   1210         break;
   1211       }
   1212       return RValue::get(0);
   1213     }
   1214 
   1215     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
   1216     AcquireBB = createBasicBlock("acquire", CurFn);
   1217     ReleaseBB = createBasicBlock("release", CurFn);
   1218     AcqRelBB = createBasicBlock("acqrel", CurFn);
   1219     SeqCstBB = createBasicBlock("seqcst", CurFn);
   1220     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
   1221 
   1222     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
   1223     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
   1224 
   1225     Builder.SetInsertPoint(AcquireBB);
   1226     Builder.CreateFence(llvm::Acquire, Scope);
   1227     Builder.CreateBr(ContBB);
   1228     SI->addCase(Builder.getInt32(1), AcquireBB);
   1229     SI->addCase(Builder.getInt32(2), AcquireBB);
   1230 
   1231     Builder.SetInsertPoint(ReleaseBB);
   1232     Builder.CreateFence(llvm::Release, Scope);
   1233     Builder.CreateBr(ContBB);
   1234     SI->addCase(Builder.getInt32(3), ReleaseBB);
   1235 
   1236     Builder.SetInsertPoint(AcqRelBB);
   1237     Builder.CreateFence(llvm::AcquireRelease, Scope);
   1238     Builder.CreateBr(ContBB);
   1239     SI->addCase(Builder.getInt32(4), AcqRelBB);
   1240 
   1241     Builder.SetInsertPoint(SeqCstBB);
   1242     Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
   1243     Builder.CreateBr(ContBB);
   1244     SI->addCase(Builder.getInt32(5), SeqCstBB);
   1245 
   1246     Builder.SetInsertPoint(ContBB);
   1247     return RValue::get(0);
   1248   }
   1249 
   1250     // Library functions with special handling.
   1251   case Builtin::BIsqrt:
   1252   case Builtin::BIsqrtf:
   1253   case Builtin::BIsqrtl: {
   1254     // TODO: there is currently no set of optimizer flags
   1255     // sufficient for us to rewrite sqrt to @llvm.sqrt.
   1256     // -fmath-errno=0 is not good enough; we need finiteness.
   1257     // We could probably precondition the call with an ult
   1258     // against 0, but is that worth the complexity?
   1259     break;
   1260   }
   1261 
   1262   case Builtin::BIpow:
   1263   case Builtin::BIpowf:
   1264   case Builtin::BIpowl: {
   1265     // Rewrite sqrt to intrinsic if allowed.
   1266     if (!FD->hasAttr<ConstAttr>())
   1267       break;
   1268     Value *Base = EmitScalarExpr(E->getArg(0));
   1269     Value *Exponent = EmitScalarExpr(E->getArg(1));
   1270     llvm::Type *ArgType = Base->getType();
   1271     Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
   1272     return RValue::get(Builder.CreateCall2(F, Base, Exponent));
   1273   }
   1274 
   1275   case Builtin::BIfma:
   1276   case Builtin::BIfmaf:
   1277   case Builtin::BIfmal:
   1278   case Builtin::BI__builtin_fma:
   1279   case Builtin::BI__builtin_fmaf:
   1280   case Builtin::BI__builtin_fmal: {
   1281     // Rewrite fma to intrinsic.
   1282     Value *FirstArg = EmitScalarExpr(E->getArg(0));
   1283     llvm::Type *ArgType = FirstArg->getType();
   1284     Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
   1285     return RValue::get(Builder.CreateCall3(F, FirstArg,
   1286                                               EmitScalarExpr(E->getArg(1)),
   1287                                               EmitScalarExpr(E->getArg(2))));
   1288   }
   1289 
   1290   case Builtin::BI__builtin_signbit:
   1291   case Builtin::BI__builtin_signbitf:
   1292   case Builtin::BI__builtin_signbitl: {
   1293     LLVMContext &C = CGM.getLLVMContext();
   1294 
   1295     Value *Arg = EmitScalarExpr(E->getArg(0));
   1296     llvm::Type *ArgTy = Arg->getType();
   1297     if (ArgTy->isPPC_FP128Ty())
   1298       break; // FIXME: I'm not sure what the right implementation is here.
   1299     int ArgWidth = ArgTy->getPrimitiveSizeInBits();
   1300     llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth);
   1301     Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy);
   1302     Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy);
   1303     Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp);
   1304     return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType())));
   1305   }
   1306   case Builtin::BI__builtin_annotation: {
   1307     llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
   1308     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
   1309                                       AnnVal->getType());
   1310 
   1311     // Get the annotation string, go through casts. Sema requires this to be a
   1312     // non-wide string literal, potentially casted, so the cast<> is safe.
   1313     const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
   1314     llvm::StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
   1315     return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
   1316   }
   1317   }
   1318 
   1319   // If this is an alias for a lib function (e.g. __builtin_sin), emit
   1320   // the call using the normal call path, but using the unmangled
   1321   // version of the function name.
   1322   if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
   1323     return emitLibraryCall(*this, FD, E,
   1324                            CGM.getBuiltinLibFunction(FD, BuiltinID));
   1325 
   1326   // If this is a predefined lib function (e.g. malloc), emit the call
   1327   // using exactly the normal call path.
   1328   if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
   1329     return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee()));
   1330 
   1331   // See if we have a target specific intrinsic.
   1332   const char *Name = getContext().BuiltinInfo.GetName(BuiltinID);
   1333   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
   1334   if (const char *Prefix =
   1335       llvm::Triple::getArchTypePrefix(Target.getTriple().getArch()))
   1336     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
   1337 
   1338   if (IntrinsicID != Intrinsic::not_intrinsic) {
   1339     SmallVector<Value*, 16> Args;
   1340 
   1341     // Find out if any arguments are required to be integer constant
   1342     // expressions.
   1343     unsigned ICEArguments = 0;
   1344     ASTContext::GetBuiltinTypeError Error;
   1345     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
   1346     assert(Error == ASTContext::GE_None && "Should not codegen an error");
   1347 
   1348     Function *F = CGM.getIntrinsic(IntrinsicID);
   1349     llvm::FunctionType *FTy = F->getFunctionType();
   1350 
   1351     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
   1352       Value *ArgValue;
   1353       // If this is a normal argument, just emit it as a scalar.
   1354       if ((ICEArguments & (1 << i)) == 0) {
   1355         ArgValue = EmitScalarExpr(E->getArg(i));
   1356       } else {
   1357         // If this is required to be a constant, constant fold it so that we
   1358         // know that the generated intrinsic gets a ConstantInt.
   1359         llvm::APSInt Result;
   1360         bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
   1361         assert(IsConst && "Constant arg isn't actually constant?");
   1362         (void)IsConst;
   1363         ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
   1364       }
   1365 
   1366       // If the intrinsic arg type is different from the builtin arg type
   1367       // we need to do a bit cast.
   1368       llvm::Type *PTy = FTy->getParamType(i);
   1369       if (PTy != ArgValue->getType()) {
   1370         assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
   1371                "Must be able to losslessly bit cast to param");
   1372         ArgValue = Builder.CreateBitCast(ArgValue, PTy);
   1373       }
   1374 
   1375       Args.push_back(ArgValue);
   1376     }
   1377 
   1378     Value *V = Builder.CreateCall(F, Args);
   1379     QualType BuiltinRetType = E->getType();
   1380 
   1381     llvm::Type *RetTy = VoidTy;
   1382     if (!BuiltinRetType->isVoidType())
   1383       RetTy = ConvertType(BuiltinRetType);
   1384 
   1385     if (RetTy != V->getType()) {
   1386       assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
   1387              "Must be able to losslessly bit cast result type");
   1388       V = Builder.CreateBitCast(V, RetTy);
   1389     }
   1390 
   1391     return RValue::get(V);
   1392   }
   1393 
   1394   // See if we have a target specific builtin that needs to be lowered.
   1395   if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
   1396     return RValue::get(V);
   1397 
   1398   ErrorUnsupported(E, "builtin function");
   1399 
   1400   // Unknown builtin, for now just dump it out and return undef.
   1401   if (hasAggregateLLVMType(E->getType()))
   1402     return RValue::getAggregate(CreateMemTemp(E->getType()));
   1403   return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
   1404 }
   1405 
   1406 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
   1407                                               const CallExpr *E) {
   1408   switch (Target.getTriple().getArch()) {
   1409   case llvm::Triple::arm:
   1410   case llvm::Triple::thumb:
   1411     return EmitARMBuiltinExpr(BuiltinID, E);
   1412   case llvm::Triple::x86:
   1413   case llvm::Triple::x86_64:
   1414     return EmitX86BuiltinExpr(BuiltinID, E);
   1415   case llvm::Triple::ppc:
   1416   case llvm::Triple::ppc64:
   1417     return EmitPPCBuiltinExpr(BuiltinID, E);
   1418   default:
   1419     return 0;
   1420   }
   1421 }
   1422 
   1423 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
   1424                                      NeonTypeFlags TypeFlags) {
   1425   int IsQuad = TypeFlags.isQuad();
   1426   switch (TypeFlags.getEltType()) {
   1427   case NeonTypeFlags::Int8:
   1428   case NeonTypeFlags::Poly8:
   1429     return llvm::VectorType::get(CGF->Int8Ty, 8 << IsQuad);
   1430   case NeonTypeFlags::Int16:
   1431   case NeonTypeFlags::Poly16:
   1432   case NeonTypeFlags::Float16:
   1433     return llvm::VectorType::get(CGF->Int16Ty, 4 << IsQuad);
   1434   case NeonTypeFlags::Int32:
   1435     return llvm::VectorType::get(CGF->Int32Ty, 2 << IsQuad);
   1436   case NeonTypeFlags::Int64:
   1437     return llvm::VectorType::get(CGF->Int64Ty, 1 << IsQuad);
   1438   case NeonTypeFlags::Float32:
   1439     return llvm::VectorType::get(CGF->FloatTy, 2 << IsQuad);
   1440   }
   1441   llvm_unreachable("Invalid NeonTypeFlags element type!");
   1442 }
   1443 
   1444 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
   1445   unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
   1446   Value* SV = llvm::ConstantVector::getSplat(nElts, C);
   1447   return Builder.CreateShuffleVector(V, V, SV, "lane");
   1448 }
   1449 
   1450 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
   1451                                      const char *name,
   1452                                      unsigned shift, bool rightshift) {
   1453   unsigned j = 0;
   1454   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
   1455        ai != ae; ++ai, ++j)
   1456     if (shift > 0 && shift == j)
   1457       Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
   1458     else
   1459       Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
   1460 
   1461   return Builder.CreateCall(F, Ops, name);
   1462 }
   1463 
   1464 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
   1465                                             bool neg) {
   1466   int SV = cast<ConstantInt>(V)->getSExtValue();
   1467 
   1468   llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
   1469   llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV);
   1470   return llvm::ConstantVector::getSplat(VTy->getNumElements(), C);
   1471 }
   1472 
   1473 /// GetPointeeAlignment - Given an expression with a pointer type, find the
   1474 /// alignment of the type referenced by the pointer.  Skip over implicit
   1475 /// casts.
   1476 std::pair<llvm::Value*, unsigned>
   1477 CodeGenFunction::EmitPointerWithAlignment(const Expr *Addr) {
   1478   assert(Addr->getType()->isPointerType());
   1479   Addr = Addr->IgnoreParens();
   1480   if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Addr)) {
   1481     if ((ICE->getCastKind() == CK_BitCast || ICE->getCastKind() == CK_NoOp) &&
   1482         ICE->getSubExpr()->getType()->isPointerType()) {
   1483       std::pair<llvm::Value*, unsigned> Ptr =
   1484           EmitPointerWithAlignment(ICE->getSubExpr());
   1485       Ptr.first = Builder.CreateBitCast(Ptr.first,
   1486                                         ConvertType(Addr->getType()));
   1487       return Ptr;
   1488     } else if (ICE->getCastKind() == CK_ArrayToPointerDecay) {
   1489       LValue LV = EmitLValue(ICE->getSubExpr());
   1490       unsigned Align = LV.getAlignment().getQuantity();
   1491       if (!Align) {
   1492         // FIXME: Once LValues are fixed to always set alignment,
   1493         // zap this code.
   1494         QualType PtTy = ICE->getSubExpr()->getType();
   1495         if (!PtTy->isIncompleteType())
   1496           Align = getContext().getTypeAlignInChars(PtTy).getQuantity();
   1497         else
   1498           Align = 1;
   1499       }
   1500       return std::make_pair(LV.getAddress(), Align);
   1501     }
   1502   }
   1503   if (const UnaryOperator *UO = dyn_cast<UnaryOperator>(Addr)) {
   1504     if (UO->getOpcode() == UO_AddrOf) {
   1505       LValue LV = EmitLValue(UO->getSubExpr());
   1506       unsigned Align = LV.getAlignment().getQuantity();
   1507       if (!Align) {
   1508         // FIXME: Once LValues are fixed to always set alignment,
   1509         // zap this code.
   1510         QualType PtTy = UO->getSubExpr()->getType();
   1511         if (!PtTy->isIncompleteType())
   1512           Align = getContext().getTypeAlignInChars(PtTy).getQuantity();
   1513         else
   1514           Align = 1;
   1515       }
   1516       return std::make_pair(LV.getAddress(), Align);
   1517     }
   1518   }
   1519 
   1520   unsigned Align = 1;
   1521   QualType PtTy = Addr->getType()->getPointeeType();
   1522   if (!PtTy->isIncompleteType())
   1523     Align = getContext().getTypeAlignInChars(PtTy).getQuantity();
   1524 
   1525   return std::make_pair(EmitScalarExpr(Addr), Align);
   1526 }
   1527 
   1528 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
   1529                                            const CallExpr *E) {
   1530   if (BuiltinID == ARM::BI__clear_cache) {
   1531     const FunctionDecl *FD = E->getDirectCallee();
   1532     // Oddly people write this call without args on occasion and gcc accepts
   1533     // it - it's also marked as varargs in the description file.
   1534     SmallVector<Value*, 2> Ops;
   1535     for (unsigned i = 0; i < E->getNumArgs(); i++)
   1536       Ops.push_back(EmitScalarExpr(E->getArg(i)));
   1537     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
   1538     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
   1539     StringRef Name = FD->getName();
   1540     return Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
   1541   }
   1542 
   1543   if (BuiltinID == ARM::BI__builtin_arm_ldrexd) {
   1544     Function *F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
   1545 
   1546     Value *LdPtr = EmitScalarExpr(E->getArg(0));
   1547     Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
   1548 
   1549     Value *Val0 = Builder.CreateExtractValue(Val, 1);
   1550     Value *Val1 = Builder.CreateExtractValue(Val, 0);
   1551     Val0 = Builder.CreateZExt(Val0, Int64Ty);
   1552     Val1 = Builder.CreateZExt(Val1, Int64Ty);
   1553 
   1554     Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
   1555     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
   1556     return Builder.CreateOr(Val, Val1);
   1557   }
   1558 
   1559   if (BuiltinID == ARM::BI__builtin_arm_strexd) {
   1560     Function *F = CGM.getIntrinsic(Intrinsic::arm_strexd);
   1561     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, NULL);
   1562 
   1563     Value *One = llvm::ConstantInt::get(Int32Ty, 1);
   1564     Value *Tmp = Builder.CreateAlloca(Int64Ty, One);
   1565     Value *Val = EmitScalarExpr(E->getArg(0));
   1566     Builder.CreateStore(Val, Tmp);
   1567 
   1568     Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
   1569     Val = Builder.CreateLoad(LdPtr);
   1570 
   1571     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
   1572     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
   1573     Value *StPtr = EmitScalarExpr(E->getArg(1));
   1574     return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "strexd");
   1575   }
   1576 
   1577   SmallVector<Value*, 4> Ops;
   1578   llvm::Value *Align = 0;
   1579   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
   1580     if (i == 0) {
   1581       switch (BuiltinID) {
   1582       case ARM::BI__builtin_neon_vld1_v:
   1583       case ARM::BI__builtin_neon_vld1q_v:
   1584       case ARM::BI__builtin_neon_vld1q_lane_v:
   1585       case ARM::BI__builtin_neon_vld1_lane_v:
   1586       case ARM::BI__builtin_neon_vld1_dup_v:
   1587       case ARM::BI__builtin_neon_vld1q_dup_v:
   1588       case ARM::BI__builtin_neon_vst1_v:
   1589       case ARM::BI__builtin_neon_vst1q_v:
   1590       case ARM::BI__builtin_neon_vst1q_lane_v:
   1591       case ARM::BI__builtin_neon_vst1_lane_v:
   1592       case ARM::BI__builtin_neon_vst2_v:
   1593       case ARM::BI__builtin_neon_vst2q_v:
   1594       case ARM::BI__builtin_neon_vst2_lane_v:
   1595       case ARM::BI__builtin_neon_vst2q_lane_v:
   1596       case ARM::BI__builtin_neon_vst3_v:
   1597       case ARM::BI__builtin_neon_vst3q_v:
   1598       case ARM::BI__builtin_neon_vst3_lane_v:
   1599       case ARM::BI__builtin_neon_vst3q_lane_v:
   1600       case ARM::BI__builtin_neon_vst4_v:
   1601       case ARM::BI__builtin_neon_vst4q_v:
   1602       case ARM::BI__builtin_neon_vst4_lane_v:
   1603       case ARM::BI__builtin_neon_vst4q_lane_v:
   1604         // Get the alignment for the argument in addition to the value;
   1605         // we'll use it later.
   1606         std::pair<llvm::Value*, unsigned> Src =
   1607             EmitPointerWithAlignment(E->getArg(0));
   1608         Ops.push_back(Src.first);
   1609         Align = Builder.getInt32(Src.second);
   1610         continue;
   1611       }
   1612     }
   1613     if (i == 1) {
   1614       switch (BuiltinID) {
   1615       case ARM::BI__builtin_neon_vld2_v:
   1616       case ARM::BI__builtin_neon_vld2q_v:
   1617       case ARM::BI__builtin_neon_vld3_v:
   1618       case ARM::BI__builtin_neon_vld3q_v:
   1619       case ARM::BI__builtin_neon_vld4_v:
   1620       case ARM::BI__builtin_neon_vld4q_v:
   1621       case ARM::BI__builtin_neon_vld2_lane_v:
   1622       case ARM::BI__builtin_neon_vld2q_lane_v:
   1623       case ARM::BI__builtin_neon_vld3_lane_v:
   1624       case ARM::BI__builtin_neon_vld3q_lane_v:
   1625       case ARM::BI__builtin_neon_vld4_lane_v:
   1626       case ARM::BI__builtin_neon_vld4q_lane_v:
   1627       case ARM::BI__builtin_neon_vld2_dup_v:
   1628       case ARM::BI__builtin_neon_vld3_dup_v:
   1629       case ARM::BI__builtin_neon_vld4_dup_v:
   1630         // Get the alignment for the argument in addition to the value;
   1631         // we'll use it later.
   1632         std::pair<llvm::Value*, unsigned> Src =
   1633             EmitPointerWithAlignment(E->getArg(1));
   1634         Ops.push_back(Src.first);
   1635         Align = Builder.getInt32(Src.second);
   1636         continue;
   1637       }
   1638     }
   1639     Ops.push_back(EmitScalarExpr(E->getArg(i)));
   1640   }
   1641 
   1642   // vget_lane and vset_lane are not overloaded and do not have an extra
   1643   // argument that specifies the vector type.
   1644   switch (BuiltinID) {
   1645   default: break;
   1646   case ARM::BI__builtin_neon_vget_lane_i8:
   1647   case ARM::BI__builtin_neon_vget_lane_i16:
   1648   case ARM::BI__builtin_neon_vget_lane_i32:
   1649   case ARM::BI__builtin_neon_vget_lane_i64:
   1650   case ARM::BI__builtin_neon_vget_lane_f32:
   1651   case ARM::BI__builtin_neon_vgetq_lane_i8:
   1652   case ARM::BI__builtin_neon_vgetq_lane_i16:
   1653   case ARM::BI__builtin_neon_vgetq_lane_i32:
   1654   case ARM::BI__builtin_neon_vgetq_lane_i64:
   1655   case ARM::BI__builtin_neon_vgetq_lane_f32:
   1656     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
   1657                                         "vget_lane");
   1658   case ARM::BI__builtin_neon_vset_lane_i8:
   1659   case ARM::BI__builtin_neon_vset_lane_i16:
   1660   case ARM::BI__builtin_neon_vset_lane_i32:
   1661   case ARM::BI__builtin_neon_vset_lane_i64:
   1662   case ARM::BI__builtin_neon_vset_lane_f32:
   1663   case ARM::BI__builtin_neon_vsetq_lane_i8:
   1664   case ARM::BI__builtin_neon_vsetq_lane_i16:
   1665   case ARM::BI__builtin_neon_vsetq_lane_i32:
   1666   case ARM::BI__builtin_neon_vsetq_lane_i64:
   1667   case ARM::BI__builtin_neon_vsetq_lane_f32:
   1668     Ops.push_back(EmitScalarExpr(E->getArg(2)));
   1669     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
   1670   }
   1671 
   1672   // Get the last argument, which specifies the vector type.
   1673   llvm::APSInt Result;
   1674   const Expr *Arg = E->getArg(E->getNumArgs()-1);
   1675   if (!Arg->isIntegerConstantExpr(Result, getContext()))
   1676     return 0;
   1677 
   1678   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
   1679       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
   1680     // Determine the overloaded type of this builtin.
   1681     llvm::Type *Ty;
   1682     if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
   1683       Ty = FloatTy;
   1684     else
   1685       Ty = DoubleTy;
   1686 
   1687     // Determine whether this is an unsigned conversion or not.
   1688     bool usgn = Result.getZExtValue() == 1;
   1689     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
   1690 
   1691     // Call the appropriate intrinsic.
   1692     Function *F = CGM.getIntrinsic(Int, Ty);
   1693     return Builder.CreateCall(F, Ops, "vcvtr");
   1694   }
   1695 
   1696   // Determine the type of this overloaded NEON intrinsic.
   1697   NeonTypeFlags Type(Result.getZExtValue());
   1698   bool usgn = Type.isUnsigned();
   1699   bool quad = Type.isQuad();
   1700   bool rightShift = false;
   1701 
   1702   llvm::VectorType *VTy = GetNeonType(this, Type);
   1703   llvm::Type *Ty = VTy;
   1704   if (!Ty)
   1705     return 0;
   1706 
   1707   unsigned Int;
   1708   switch (BuiltinID) {
   1709   default: return 0;
   1710   case ARM::BI__builtin_neon_vabd_v:
   1711   case ARM::BI__builtin_neon_vabdq_v:
   1712     Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds;
   1713     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
   1714   case ARM::BI__builtin_neon_vabs_v:
   1715   case ARM::BI__builtin_neon_vabsq_v:
   1716     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vabs, Ty),
   1717                         Ops, "vabs");
   1718   case ARM::BI__builtin_neon_vaddhn_v:
   1719     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vaddhn, Ty),
   1720                         Ops, "vaddhn");
   1721   case ARM::BI__builtin_neon_vcale_v:
   1722     std::swap(Ops[0], Ops[1]);
   1723   case ARM::BI__builtin_neon_vcage_v: {
   1724     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacged);
   1725     return EmitNeonCall(F, Ops, "vcage");
   1726   }
   1727   case ARM::BI__builtin_neon_vcaleq_v:
   1728     std::swap(Ops[0], Ops[1]);
   1729   case ARM::BI__builtin_neon_vcageq_v: {
   1730     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq);
   1731     return EmitNeonCall(F, Ops, "vcage");
   1732   }
   1733   case ARM::BI__builtin_neon_vcalt_v:
   1734     std::swap(Ops[0], Ops[1]);
   1735   case ARM::BI__builtin_neon_vcagt_v: {
   1736     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd);
   1737     return EmitNeonCall(F, Ops, "vcagt");
   1738   }
   1739   case ARM::BI__builtin_neon_vcaltq_v:
   1740     std::swap(Ops[0], Ops[1]);
   1741   case ARM::BI__builtin_neon_vcagtq_v: {
   1742     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq);
   1743     return EmitNeonCall(F, Ops, "vcagt");
   1744   }
   1745   case ARM::BI__builtin_neon_vcls_v:
   1746   case ARM::BI__builtin_neon_vclsq_v: {
   1747     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, Ty);
   1748     return EmitNeonCall(F, Ops, "vcls");
   1749   }
   1750   case ARM::BI__builtin_neon_vclz_v:
   1751   case ARM::BI__builtin_neon_vclzq_v: {
   1752     // Generate target-independent intrinsic; also need to add second argument
   1753     // for whether or not clz of zero is undefined; on ARM it isn't.
   1754     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ty);
   1755     Ops.push_back(Builder.getInt1(Target.isCLZForZeroUndef()));
   1756     return EmitNeonCall(F, Ops, "vclz");
   1757   }
   1758   case ARM::BI__builtin_neon_vcnt_v:
   1759   case ARM::BI__builtin_neon_vcntq_v: {
   1760     // generate target-independent intrinsic
   1761     Function *F = CGM.getIntrinsic(Intrinsic::ctpop, Ty);
   1762     return EmitNeonCall(F, Ops, "vctpop");
   1763   }
   1764   case ARM::BI__builtin_neon_vcvt_f16_v: {
   1765     assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad &&
   1766            "unexpected vcvt_f16_v builtin");
   1767     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvtfp2hf);
   1768     return EmitNeonCall(F, Ops, "vcvt");
   1769   }
   1770   case ARM::BI__builtin_neon_vcvt_f32_f16: {
   1771     assert(Type.getEltType() == NeonTypeFlags::Float16 && !quad &&
   1772            "unexpected vcvt_f32_f16 builtin");
   1773     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvthf2fp);
   1774     return EmitNeonCall(F, Ops, "vcvt");
   1775   }
   1776   case ARM::BI__builtin_neon_vcvt_f32_v:
   1777   case ARM::BI__builtin_neon_vcvtq_f32_v:
   1778     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1779     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
   1780     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
   1781                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
   1782   case ARM::BI__builtin_neon_vcvt_s32_v:
   1783   case ARM::BI__builtin_neon_vcvt_u32_v:
   1784   case ARM::BI__builtin_neon_vcvtq_s32_v:
   1785   case ARM::BI__builtin_neon_vcvtq_u32_v: {
   1786     llvm::Type *FloatTy =
   1787       GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
   1788     Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
   1789     return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
   1790                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
   1791   }
   1792   case ARM::BI__builtin_neon_vcvt_n_f32_v:
   1793   case ARM::BI__builtin_neon_vcvtq_n_f32_v: {
   1794     llvm::Type *FloatTy =
   1795       GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
   1796     llvm::Type *Tys[2] = { FloatTy, Ty };
   1797     Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp
   1798                : Intrinsic::arm_neon_vcvtfxs2fp;
   1799     Function *F = CGM.getIntrinsic(Int, Tys);
   1800     return EmitNeonCall(F, Ops, "vcvt_n");
   1801   }
   1802   case ARM::BI__builtin_neon_vcvt_n_s32_v:
   1803   case ARM::BI__builtin_neon_vcvt_n_u32_v:
   1804   case ARM::BI__builtin_neon_vcvtq_n_s32_v:
   1805   case ARM::BI__builtin_neon_vcvtq_n_u32_v: {
   1806     llvm::Type *FloatTy =
   1807       GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, quad));
   1808     llvm::Type *Tys[2] = { Ty, FloatTy };
   1809     Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu
   1810                : Intrinsic::arm_neon_vcvtfp2fxs;
   1811     Function *F = CGM.getIntrinsic(Int, Tys);
   1812     return EmitNeonCall(F, Ops, "vcvt_n");
   1813   }
   1814   case ARM::BI__builtin_neon_vext_v:
   1815   case ARM::BI__builtin_neon_vextq_v: {
   1816     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
   1817     SmallVector<Constant*, 16> Indices;
   1818     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
   1819       Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
   1820 
   1821     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1822     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
   1823     Value *SV = llvm::ConstantVector::get(Indices);
   1824     return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
   1825   }
   1826   case ARM::BI__builtin_neon_vhadd_v:
   1827   case ARM::BI__builtin_neon_vhaddq_v:
   1828     Int = usgn ? Intrinsic::arm_neon_vhaddu : Intrinsic::arm_neon_vhadds;
   1829     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhadd");
   1830   case ARM::BI__builtin_neon_vhsub_v:
   1831   case ARM::BI__builtin_neon_vhsubq_v:
   1832     Int = usgn ? Intrinsic::arm_neon_vhsubu : Intrinsic::arm_neon_vhsubs;
   1833     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhsub");
   1834   case ARM::BI__builtin_neon_vld1_v:
   1835   case ARM::BI__builtin_neon_vld1q_v:
   1836     Ops.push_back(Align);
   1837     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty),
   1838                         Ops, "vld1");
   1839   case ARM::BI__builtin_neon_vld1q_lane_v:
   1840     // Handle 64-bit integer elements as a special case.  Use shuffles of
   1841     // one-element vectors to avoid poor code for i64 in the backend.
   1842     if (VTy->getElementType()->isIntegerTy(64)) {
   1843       // Extract the other lane.
   1844       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
   1845       int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
   1846       Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
   1847       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
   1848       // Load the value as a one-element vector.
   1849       Ty = llvm::VectorType::get(VTy->getElementType(), 1);
   1850       Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty);
   1851       Value *Ld = Builder.CreateCall2(F, Ops[0], Align);
   1852       // Combine them.
   1853       SmallVector<Constant*, 2> Indices;
   1854       Indices.push_back(ConstantInt::get(Int32Ty, 1-Lane));
   1855       Indices.push_back(ConstantInt::get(Int32Ty, Lane));
   1856       SV = llvm::ConstantVector::get(Indices);
   1857       return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
   1858     }
   1859     // fall through
   1860   case ARM::BI__builtin_neon_vld1_lane_v: {
   1861     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
   1862     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
   1863     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1864     LoadInst *Ld = Builder.CreateLoad(Ops[0]);
   1865     Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
   1866     return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
   1867   }
   1868   case ARM::BI__builtin_neon_vld1_dup_v:
   1869   case ARM::BI__builtin_neon_vld1q_dup_v: {
   1870     Value *V = UndefValue::get(Ty);
   1871     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
   1872     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1873     LoadInst *Ld = Builder.CreateLoad(Ops[0]);
   1874     Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
   1875     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
   1876     Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
   1877     return EmitNeonSplat(Ops[0], CI);
   1878   }
   1879   case ARM::BI__builtin_neon_vld2_v:
   1880   case ARM::BI__builtin_neon_vld2q_v: {
   1881     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2, Ty);
   1882     Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld2");
   1883     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
   1884     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1885     return Builder.CreateStore(Ops[1], Ops[0]);
   1886   }
   1887   case ARM::BI__builtin_neon_vld3_v:
   1888   case ARM::BI__builtin_neon_vld3q_v: {
   1889     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3, Ty);
   1890     Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld3");
   1891     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
   1892     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1893     return Builder.CreateStore(Ops[1], Ops[0]);
   1894   }
   1895   case ARM::BI__builtin_neon_vld4_v:
   1896   case ARM::BI__builtin_neon_vld4q_v: {
   1897     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4, Ty);
   1898     Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld4");
   1899     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
   1900     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1901     return Builder.CreateStore(Ops[1], Ops[0]);
   1902   }
   1903   case ARM::BI__builtin_neon_vld2_lane_v:
   1904   case ARM::BI__builtin_neon_vld2q_lane_v: {
   1905     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2lane, Ty);
   1906     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
   1907     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
   1908     Ops.push_back(Align);
   1909     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
   1910     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
   1911     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1912     return Builder.CreateStore(Ops[1], Ops[0]);
   1913   }
   1914   case ARM::BI__builtin_neon_vld3_lane_v:
   1915   case ARM::BI__builtin_neon_vld3q_lane_v: {
   1916     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3lane, Ty);
   1917     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
   1918     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
   1919     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
   1920     Ops.push_back(Align);
   1921     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
   1922     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
   1923     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1924     return Builder.CreateStore(Ops[1], Ops[0]);
   1925   }
   1926   case ARM::BI__builtin_neon_vld4_lane_v:
   1927   case ARM::BI__builtin_neon_vld4q_lane_v: {
   1928     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4lane, Ty);
   1929     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
   1930     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
   1931     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
   1932     Ops[5] = Builder.CreateBitCast(Ops[5], Ty);
   1933     Ops.push_back(Align);
   1934     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
   1935     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
   1936     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1937     return Builder.CreateStore(Ops[1], Ops[0]);
   1938   }
   1939   case ARM::BI__builtin_neon_vld2_dup_v:
   1940   case ARM::BI__builtin_neon_vld3_dup_v:
   1941   case ARM::BI__builtin_neon_vld4_dup_v: {
   1942     // Handle 64-bit elements as a special-case.  There is no "dup" needed.
   1943     if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
   1944       switch (BuiltinID) {
   1945       case ARM::BI__builtin_neon_vld2_dup_v:
   1946         Int = Intrinsic::arm_neon_vld2;
   1947         break;
   1948       case ARM::BI__builtin_neon_vld3_dup_v:
   1949         Int = Intrinsic::arm_neon_vld3;
   1950         break;
   1951       case ARM::BI__builtin_neon_vld4_dup_v:
   1952         Int = Intrinsic::arm_neon_vld4;
   1953         break;
   1954       default: llvm_unreachable("unknown vld_dup intrinsic?");
   1955       }
   1956       Function *F = CGM.getIntrinsic(Int, Ty);
   1957       Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup");
   1958       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
   1959       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1960       return Builder.CreateStore(Ops[1], Ops[0]);
   1961     }
   1962     switch (BuiltinID) {
   1963     case ARM::BI__builtin_neon_vld2_dup_v:
   1964       Int = Intrinsic::arm_neon_vld2lane;
   1965       break;
   1966     case ARM::BI__builtin_neon_vld3_dup_v:
   1967       Int = Intrinsic::arm_neon_vld3lane;
   1968       break;
   1969     case ARM::BI__builtin_neon_vld4_dup_v:
   1970       Int = Intrinsic::arm_neon_vld4lane;
   1971       break;
   1972     default: llvm_unreachable("unknown vld_dup intrinsic?");
   1973     }
   1974     Function *F = CGM.getIntrinsic(Int, Ty);
   1975     llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
   1976 
   1977     SmallVector<Value*, 6> Args;
   1978     Args.push_back(Ops[1]);
   1979     Args.append(STy->getNumElements(), UndefValue::get(Ty));
   1980 
   1981     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
   1982     Args.push_back(CI);
   1983     Args.push_back(Align);
   1984 
   1985     Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
   1986     // splat lane 0 to all elts in each vector of the result.
   1987     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
   1988       Value *Val = Builder.CreateExtractValue(Ops[1], i);
   1989       Value *Elt = Builder.CreateBitCast(Val, Ty);
   1990       Elt = EmitNeonSplat(Elt, CI);
   1991       Elt = Builder.CreateBitCast(Elt, Val->getType());
   1992       Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
   1993     }
   1994     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
   1995     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1996     return Builder.CreateStore(Ops[1], Ops[0]);
   1997   }
   1998   case ARM::BI__builtin_neon_vmax_v:
   1999   case ARM::BI__builtin_neon_vmaxq_v:
   2000     Int = usgn ? Intrinsic::arm_neon_vmaxu : Intrinsic::arm_neon_vmaxs;
   2001     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
   2002   case ARM::BI__builtin_neon_vmin_v:
   2003   case ARM::BI__builtin_neon_vminq_v:
   2004     Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins;
   2005     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
   2006   case ARM::BI__builtin_neon_vmovl_v: {
   2007     llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
   2008     Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
   2009     if (usgn)
   2010       return Builder.CreateZExt(Ops[0], Ty, "vmovl");
   2011     return Builder.CreateSExt(Ops[0], Ty, "vmovl");
   2012   }
   2013   case ARM::BI__builtin_neon_vmovn_v: {
   2014     llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
   2015     Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
   2016     return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
   2017   }
   2018   case ARM::BI__builtin_neon_vmul_v:
   2019   case ARM::BI__builtin_neon_vmulq_v:
   2020     assert(Type.isPoly() && "vmul builtin only supported for polynomial types");
   2021     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vmulp, Ty),
   2022                         Ops, "vmul");
   2023   case ARM::BI__builtin_neon_vmull_v:
   2024     Int = usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
   2025     Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
   2026     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
   2027   case ARM::BI__builtin_neon_vpadal_v:
   2028   case ARM::BI__builtin_neon_vpadalq_v: {
   2029     Int = usgn ? Intrinsic::arm_neon_vpadalu : Intrinsic::arm_neon_vpadals;
   2030     // The source operand type has twice as many elements of half the size.
   2031     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
   2032     llvm::Type *EltTy =
   2033       llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
   2034     llvm::Type *NarrowTy =
   2035       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
   2036     llvm::Type *Tys[2] = { Ty, NarrowTy };
   2037     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpadal");
   2038   }
   2039   case ARM::BI__builtin_neon_vpadd_v:
   2040     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vpadd, Ty),
   2041                         Ops, "vpadd");
   2042   case ARM::BI__builtin_neon_vpaddl_v:
   2043   case ARM::BI__builtin_neon_vpaddlq_v: {
   2044     Int = usgn ? Intrinsic::arm_neon_vpaddlu : Intrinsic::arm_neon_vpaddls;
   2045     // The source operand type has twice as many elements of half the size.
   2046     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
   2047     llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
   2048     llvm::Type *NarrowTy =
   2049       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
   2050     llvm::Type *Tys[2] = { Ty, NarrowTy };
   2051     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
   2052   }
   2053   case ARM::BI__builtin_neon_vpmax_v:
   2054     Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
   2055     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
   2056   case ARM::BI__builtin_neon_vpmin_v:
   2057     Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
   2058     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
   2059   case ARM::BI__builtin_neon_vqabs_v:
   2060   case ARM::BI__builtin_neon_vqabsq_v:
   2061     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqabs, Ty),
   2062                         Ops, "vqabs");
   2063   case ARM::BI__builtin_neon_vqadd_v:
   2064   case ARM::BI__builtin_neon_vqaddq_v:
   2065     Int = usgn ? Intrinsic::arm_neon_vqaddu : Intrinsic::arm_neon_vqadds;
   2066     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqadd");
   2067   case ARM::BI__builtin_neon_vqdmlal_v:
   2068     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlal, Ty),
   2069                         Ops, "vqdmlal");
   2070   case ARM::BI__builtin_neon_vqdmlsl_v:
   2071     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlsl, Ty),
   2072                         Ops, "vqdmlsl");
   2073   case ARM::BI__builtin_neon_vqdmulh_v:
   2074   case ARM::BI__builtin_neon_vqdmulhq_v:
   2075     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmulh, Ty),
   2076                         Ops, "vqdmulh");
   2077   case ARM::BI__builtin_neon_vqdmull_v:
   2078     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty),
   2079                         Ops, "vqdmull");
   2080   case ARM::BI__builtin_neon_vqmovn_v:
   2081     Int = usgn ? Intrinsic::arm_neon_vqmovnu : Intrinsic::arm_neon_vqmovns;
   2082     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqmovn");
   2083   case ARM::BI__builtin_neon_vqmovun_v:
   2084     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqmovnsu, Ty),
   2085                         Ops, "vqdmull");
   2086   case ARM::BI__builtin_neon_vqneg_v:
   2087   case ARM::BI__builtin_neon_vqnegq_v:
   2088     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqneg, Ty),
   2089                         Ops, "vqneg");
   2090   case ARM::BI__builtin_neon_vqrdmulh_v:
   2091   case ARM::BI__builtin_neon_vqrdmulhq_v:
   2092     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrdmulh, Ty),
   2093                         Ops, "vqrdmulh");
   2094   case ARM::BI__builtin_neon_vqrshl_v:
   2095   case ARM::BI__builtin_neon_vqrshlq_v:
   2096     Int = usgn ? Intrinsic::arm_neon_vqrshiftu : Intrinsic::arm_neon_vqrshifts;
   2097     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshl");
   2098   case ARM::BI__builtin_neon_vqrshrn_n_v:
   2099     Int = usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
   2100     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
   2101                         1, true);
   2102   case ARM::BI__builtin_neon_vqrshrun_n_v:
   2103     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
   2104                         Ops, "vqrshrun_n", 1, true);
   2105   case ARM::BI__builtin_neon_vqshl_v:
   2106   case ARM::BI__builtin_neon_vqshlq_v:
   2107     Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
   2108     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl");
   2109   case ARM::BI__builtin_neon_vqshl_n_v:
   2110   case ARM::BI__builtin_neon_vqshlq_n_v:
   2111     Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
   2112     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
   2113                         1, false);
   2114   case ARM::BI__builtin_neon_vqshlu_n_v:
   2115   case ARM::BI__builtin_neon_vqshluq_n_v:
   2116     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftsu, Ty),
   2117                         Ops, "vqshlu", 1, false);
   2118   case ARM::BI__builtin_neon_vqshrn_n_v:
   2119     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
   2120     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
   2121                         1, true);
   2122   case ARM::BI__builtin_neon_vqshrun_n_v:
   2123     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
   2124                         Ops, "vqshrun_n", 1, true);
   2125   case ARM::BI__builtin_neon_vqsub_v:
   2126   case ARM::BI__builtin_neon_vqsubq_v:
   2127     Int = usgn ? Intrinsic::arm_neon_vqsubu : Intrinsic::arm_neon_vqsubs;
   2128     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqsub");
   2129   case ARM::BI__builtin_neon_vraddhn_v:
   2130     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vraddhn, Ty),
   2131                         Ops, "vraddhn");
   2132   case ARM::BI__builtin_neon_vrecpe_v:
   2133   case ARM::BI__builtin_neon_vrecpeq_v:
   2134     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
   2135                         Ops, "vrecpe");
   2136   case ARM::BI__builtin_neon_vrecps_v:
   2137   case ARM::BI__builtin_neon_vrecpsq_v:
   2138     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecps, Ty),
   2139                         Ops, "vrecps");
   2140   case ARM::BI__builtin_neon_vrhadd_v:
   2141   case ARM::BI__builtin_neon_vrhaddq_v:
   2142     Int = usgn ? Intrinsic::arm_neon_vrhaddu : Intrinsic::arm_neon_vrhadds;
   2143     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrhadd");
   2144   case ARM::BI__builtin_neon_vrshl_v:
   2145   case ARM::BI__builtin_neon_vrshlq_v:
   2146     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
   2147     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshl");
   2148   case ARM::BI__builtin_neon_vrshrn_n_v:
   2149     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
   2150                         Ops, "vrshrn_n", 1, true);
   2151   case ARM::BI__builtin_neon_vrshr_n_v:
   2152   case ARM::BI__builtin_neon_vrshrq_n_v:
   2153     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
   2154     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 1, true);
   2155   case ARM::BI__builtin_neon_vrsqrte_v:
   2156   case ARM::BI__builtin_neon_vrsqrteq_v:
   2157     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrte, Ty),
   2158                         Ops, "vrsqrte");
   2159   case ARM::BI__builtin_neon_vrsqrts_v:
   2160   case ARM::BI__builtin_neon_vrsqrtsq_v:
   2161     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrts, Ty),
   2162                         Ops, "vrsqrts");
   2163   case ARM::BI__builtin_neon_vrsra_n_v:
   2164   case ARM::BI__builtin_neon_vrsraq_n_v:
   2165     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   2166     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
   2167     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
   2168     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
   2169     Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]);
   2170     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
   2171   case ARM::BI__builtin_neon_vrsubhn_v:
   2172     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsubhn, Ty),
   2173                         Ops, "vrsubhn");
   2174   case ARM::BI__builtin_neon_vshl_v:
   2175   case ARM::BI__builtin_neon_vshlq_v:
   2176     Int = usgn ? Intrinsic::arm_neon_vshiftu : Intrinsic::arm_neon_vshifts;
   2177     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshl");
   2178   case ARM::BI__builtin_neon_vshll_n_v:
   2179     Int = usgn ? Intrinsic::arm_neon_vshiftlu : Intrinsic::arm_neon_vshiftls;
   2180     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshll", 1);
   2181   case ARM::BI__builtin_neon_vshl_n_v:
   2182   case ARM::BI__builtin_neon_vshlq_n_v:
   2183     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
   2184     return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], "vshl_n");
   2185   case ARM::BI__builtin_neon_vshrn_n_v:
   2186     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftn, Ty),
   2187                         Ops, "vshrn_n", 1, true);
   2188   case ARM::BI__builtin_neon_vshr_n_v:
   2189   case ARM::BI__builtin_neon_vshrq_n_v:
   2190     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   2191     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
   2192     if (usgn)
   2193       return Builder.CreateLShr(Ops[0], Ops[1], "vshr_n");
   2194     else
   2195       return Builder.CreateAShr(Ops[0], Ops[1], "vshr_n");
   2196   case ARM::BI__builtin_neon_vsri_n_v:
   2197   case ARM::BI__builtin_neon_vsriq_n_v:
   2198     rightShift = true;
   2199   case ARM::BI__builtin_neon_vsli_n_v:
   2200   case ARM::BI__builtin_neon_vsliq_n_v:
   2201     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
   2202     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
   2203                         Ops, "vsli_n");
   2204   case ARM::BI__builtin_neon_vsra_n_v:
   2205   case ARM::BI__builtin_neon_vsraq_n_v:
   2206     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   2207     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
   2208     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, false);
   2209     if (usgn)
   2210       Ops[1] = Builder.CreateLShr(Ops[1], Ops[2], "vsra_n");
   2211     else
   2212       Ops[1] = Builder.CreateAShr(Ops[1], Ops[2], "vsra_n");
   2213     return Builder.CreateAdd(Ops[0], Ops[1]);
   2214   case ARM::BI__builtin_neon_vst1_v:
   2215   case ARM::BI__builtin_neon_vst1q_v:
   2216     Ops.push_back(Align);
   2217     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, Ty),
   2218                         Ops, "");
   2219   case ARM::BI__builtin_neon_vst1q_lane_v:
   2220     // Handle 64-bit integer elements as a special case.  Use a shuffle to get
   2221     // a one-element vector and avoid poor code for i64 in the backend.
   2222     if (VTy->getElementType()->isIntegerTy(64)) {
   2223       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
   2224       Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
   2225       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
   2226       Ops[2] = Align;
   2227       return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
   2228                                                  Ops[1]->getType()), Ops);
   2229     }
   2230     // fall through
   2231   case ARM::BI__builtin_neon_vst1_lane_v: {
   2232     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
   2233     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
   2234     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
   2235     StoreInst *St = Builder.CreateStore(Ops[1],
   2236                                         Builder.CreateBitCast(Ops[0], Ty));
   2237     St->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
   2238     return St;
   2239   }
   2240   case ARM::BI__builtin_neon_vst2_v:
   2241   case ARM::BI__builtin_neon_vst2q_v:
   2242     Ops.push_back(Align);
   2243     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2, Ty),
   2244                         Ops, "");
   2245   case ARM::BI__builtin_neon_vst2_lane_v:
   2246   case ARM::BI__builtin_neon_vst2q_lane_v:
   2247     Ops.push_back(Align);
   2248     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2lane, Ty),
   2249                         Ops, "");
   2250   case ARM::BI__builtin_neon_vst3_v:
   2251   case ARM::BI__builtin_neon_vst3q_v:
   2252     Ops.push_back(Align);
   2253     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3, Ty),
   2254                         Ops, "");
   2255   case ARM::BI__builtin_neon_vst3_lane_v:
   2256   case ARM::BI__builtin_neon_vst3q_lane_v:
   2257     Ops.push_back(Align);
   2258     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3lane, Ty),
   2259                         Ops, "");
   2260   case ARM::BI__builtin_neon_vst4_v:
   2261   case ARM::BI__builtin_neon_vst4q_v:
   2262     Ops.push_back(Align);
   2263     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4, Ty),
   2264                         Ops, "");
   2265   case ARM::BI__builtin_neon_vst4_lane_v:
   2266   case ARM::BI__builtin_neon_vst4q_lane_v:
   2267     Ops.push_back(Align);
   2268     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4lane, Ty),
   2269                         Ops, "");
   2270   case ARM::BI__builtin_neon_vsubhn_v:
   2271     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vsubhn, Ty),
   2272                         Ops, "vsubhn");
   2273   case ARM::BI__builtin_neon_vtbl1_v:
   2274     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
   2275                         Ops, "vtbl1");
   2276   case ARM::BI__builtin_neon_vtbl2_v:
   2277     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
   2278                         Ops, "vtbl2");
   2279   case ARM::BI__builtin_neon_vtbl3_v:
   2280     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
   2281                         Ops, "vtbl3");
   2282   case ARM::BI__builtin_neon_vtbl4_v:
   2283     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
   2284                         Ops, "vtbl4");
   2285   case ARM::BI__builtin_neon_vtbx1_v:
   2286     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
   2287                         Ops, "vtbx1");
   2288   case ARM::BI__builtin_neon_vtbx2_v:
   2289     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
   2290                         Ops, "vtbx2");
   2291   case ARM::BI__builtin_neon_vtbx3_v:
   2292     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
   2293                         Ops, "vtbx3");
   2294   case ARM::BI__builtin_neon_vtbx4_v:
   2295     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
   2296                         Ops, "vtbx4");
   2297   case ARM::BI__builtin_neon_vtst_v:
   2298   case ARM::BI__builtin_neon_vtstq_v: {
   2299     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   2300     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
   2301     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
   2302     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
   2303                                 ConstantAggregateZero::get(Ty));
   2304     return Builder.CreateSExt(Ops[0], Ty, "vtst");
   2305   }
   2306   case ARM::BI__builtin_neon_vtrn_v:
   2307   case ARM::BI__builtin_neon_vtrnq_v: {
   2308     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
   2309     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
   2310     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
   2311     Value *SV = 0;
   2312 
   2313     for (unsigned vi = 0; vi != 2; ++vi) {
   2314       SmallVector<Constant*, 16> Indices;
   2315       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
   2316         Indices.push_back(Builder.getInt32(i+vi));
   2317         Indices.push_back(Builder.getInt32(i+e+vi));
   2318       }
   2319       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
   2320       SV = llvm::ConstantVector::get(Indices);
   2321       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
   2322       SV = Builder.CreateStore(SV, Addr);
   2323     }
   2324     return SV;
   2325   }
   2326   case ARM::BI__builtin_neon_vuzp_v:
   2327   case ARM::BI__builtin_neon_vuzpq_v: {
   2328     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
   2329     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
   2330     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
   2331     Value *SV = 0;
   2332 
   2333     for (unsigned vi = 0; vi != 2; ++vi) {
   2334       SmallVector<Constant*, 16> Indices;
   2335       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
   2336         Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
   2337 
   2338       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
   2339       SV = llvm::ConstantVector::get(Indices);
   2340       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
   2341       SV = Builder.CreateStore(SV, Addr);
   2342     }
   2343     return SV;
   2344   }
   2345   case ARM::BI__builtin_neon_vzip_v:
   2346   case ARM::BI__builtin_neon_vzipq_v: {
   2347     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
   2348     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
   2349     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
   2350     Value *SV = 0;
   2351 
   2352     for (unsigned vi = 0; vi != 2; ++vi) {
   2353       SmallVector<Constant*, 16> Indices;
   2354       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
   2355         Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
   2356         Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
   2357       }
   2358       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
   2359       SV = llvm::ConstantVector::get(Indices);
   2360       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
   2361       SV = Builder.CreateStore(SV, Addr);
   2362     }
   2363     return SV;
   2364   }
   2365   }
   2366 }
   2367 
   2368 llvm::Value *CodeGenFunction::
   2369 BuildVector(ArrayRef<llvm::Value*> Ops) {
   2370   assert((Ops.size() & (Ops.size() - 1)) == 0 &&
   2371          "Not a power-of-two sized vector!");
   2372   bool AllConstants = true;
   2373   for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
   2374     AllConstants &= isa<Constant>(Ops[i]);
   2375 
   2376   // If this is a constant vector, create a ConstantVector.
   2377   if (AllConstants) {
   2378     SmallVector<llvm::Constant*, 16> CstOps;
   2379     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
   2380       CstOps.push_back(cast<Constant>(Ops[i]));
   2381     return llvm::ConstantVector::get(CstOps);
   2382   }
   2383 
   2384   // Otherwise, insertelement the values to build the vector.
   2385   Value *Result =
   2386     llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
   2387 
   2388   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
   2389     Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
   2390 
   2391   return Result;
   2392 }
   2393 
   2394 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
   2395                                            const CallExpr *E) {
   2396   SmallVector<Value*, 4> Ops;
   2397 
   2398   // Find out if any arguments are required to be integer constant expressions.
   2399   unsigned ICEArguments = 0;
   2400   ASTContext::GetBuiltinTypeError Error;
   2401   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
   2402   assert(Error == ASTContext::GE_None && "Should not codegen an error");
   2403 
   2404   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
   2405     // If this is a normal argument, just emit it as a scalar.
   2406     if ((ICEArguments & (1 << i)) == 0) {
   2407       Ops.push_back(EmitScalarExpr(E->getArg(i)));
   2408       continue;
   2409     }
   2410 
   2411     // If this is required to be a constant, constant fold it so that we know
   2412     // that the generated intrinsic gets a ConstantInt.
   2413     llvm::APSInt Result;
   2414     bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
   2415     assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
   2416     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
   2417   }
   2418 
   2419   switch (BuiltinID) {
   2420   default: return 0;
   2421   case X86::BI__builtin_ia32_vec_init_v8qi:
   2422   case X86::BI__builtin_ia32_vec_init_v4hi:
   2423   case X86::BI__builtin_ia32_vec_init_v2si:
   2424     return Builder.CreateBitCast(BuildVector(Ops),
   2425                                  llvm::Type::getX86_MMXTy(getLLVMContext()));
   2426   case X86::BI__builtin_ia32_vec_ext_v2si:
   2427     return Builder.CreateExtractElement(Ops[0],
   2428                                   llvm::ConstantInt::get(Ops[1]->getType(), 0));
   2429   case X86::BI__builtin_ia32_ldmxcsr: {
   2430     llvm::Type *PtrTy = Int8PtrTy;
   2431     Value *One = llvm::ConstantInt::get(Int32Ty, 1);
   2432     Value *Tmp = Builder.CreateAlloca(Int32Ty, One);
   2433     Builder.CreateStore(Ops[0], Tmp);
   2434     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
   2435                               Builder.CreateBitCast(Tmp, PtrTy));
   2436   }
   2437   case X86::BI__builtin_ia32_stmxcsr: {
   2438     llvm::Type *PtrTy = Int8PtrTy;
   2439     Value *One = llvm::ConstantInt::get(Int32Ty, 1);
   2440     Value *Tmp = Builder.CreateAlloca(Int32Ty, One);
   2441     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
   2442                        Builder.CreateBitCast(Tmp, PtrTy));
   2443     return Builder.CreateLoad(Tmp, "stmxcsr");
   2444   }
   2445   case X86::BI__builtin_ia32_storehps:
   2446   case X86::BI__builtin_ia32_storelps: {
   2447     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
   2448     llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
   2449 
   2450     // cast val v2i64
   2451     Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
   2452 
   2453     // extract (0, 1)
   2454     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
   2455     llvm::Value *Idx = llvm::ConstantInt::get(Int32Ty, Index);
   2456     Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
   2457 
   2458     // cast pointer to i64 & store
   2459     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
   2460     return Builder.CreateStore(Ops[1], Ops[0]);
   2461   }
   2462   case X86::BI__builtin_ia32_palignr: {
   2463     unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
   2464 
   2465     // If palignr is shifting the pair of input vectors less than 9 bytes,
   2466     // emit a shuffle instruction.
   2467     if (shiftVal <= 8) {
   2468       SmallVector<llvm::Constant*, 8> Indices;
   2469       for (unsigned i = 0; i != 8; ++i)
   2470         Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
   2471 
   2472       Value* SV = llvm::ConstantVector::get(Indices);
   2473       return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
   2474     }
   2475 
   2476     // If palignr is shifting the pair of input vectors more than 8 but less
   2477     // than 16 bytes, emit a logical right shift of the destination.
   2478     if (shiftVal < 16) {
   2479       // MMX has these as 1 x i64 vectors for some odd optimization reasons.
   2480       llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 1);
   2481 
   2482       Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
   2483       Ops[1] = llvm::ConstantInt::get(VecTy, (shiftVal-8) * 8);
   2484 
   2485       // create i32 constant
   2486       llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_mmx_psrl_q);
   2487       return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
   2488     }
   2489 
   2490     // If palignr is shifting the pair of vectors more than 16 bytes, emit zero.
   2491     return llvm::Constant::getNullValue(ConvertType(E->getType()));
   2492   }
   2493   case X86::BI__builtin_ia32_palignr128: {
   2494     unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
   2495 
   2496     // If palignr is shifting the pair of input vectors less than 17 bytes,
   2497     // emit a shuffle instruction.
   2498     if (shiftVal <= 16) {
   2499       SmallVector<llvm::Constant*, 16> Indices;
   2500       for (unsigned i = 0; i != 16; ++i)
   2501         Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
   2502 
   2503       Value* SV = llvm::ConstantVector::get(Indices);
   2504       return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
   2505     }
   2506 
   2507     // If palignr is shifting the pair of input vectors more than 16 but less
   2508     // than 32 bytes, emit a logical right shift of the destination.
   2509     if (shiftVal < 32) {
   2510       llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
   2511 
   2512       Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
   2513       Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8);
   2514 
   2515       // create i32 constant
   2516       llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq);
   2517       return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
   2518     }
   2519 
   2520     // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
   2521     return llvm::Constant::getNullValue(ConvertType(E->getType()));
   2522   }
   2523   case X86::BI__builtin_ia32_palignr256: {
   2524     unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
   2525 
   2526     // If palignr is shifting the pair of input vectors less than 17 bytes,
   2527     // emit a shuffle instruction.
   2528     if (shiftVal <= 16) {
   2529       SmallVector<llvm::Constant*, 32> Indices;
   2530       // 256-bit palignr operates on 128-bit lanes so we need to handle that
   2531       for (unsigned l = 0; l != 2; ++l) {
   2532         unsigned LaneStart = l * 16;
   2533         unsigned LaneEnd = (l+1) * 16;
   2534         for (unsigned i = 0; i != 16; ++i) {
   2535           unsigned Idx = shiftVal + i + LaneStart;
   2536           if (Idx >= LaneEnd) Idx += 16; // end of lane, switch operand
   2537           Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx));
   2538         }
   2539       }
   2540 
   2541       Value* SV = llvm::ConstantVector::get(Indices);
   2542       return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
   2543     }
   2544 
   2545     // If palignr is shifting the pair of input vectors more than 16 but less
   2546     // than 32 bytes, emit a logical right shift of the destination.
   2547     if (shiftVal < 32) {
   2548       llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 4);
   2549 
   2550       Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
   2551       Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8);
   2552 
   2553       // create i32 constant
   2554       llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_avx2_psrl_dq);
   2555       return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
   2556     }
   2557 
   2558     // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
   2559     return llvm::Constant::getNullValue(ConvertType(E->getType()));
   2560   }
   2561   case X86::BI__builtin_ia32_movntps:
   2562   case X86::BI__builtin_ia32_movntps256:
   2563   case X86::BI__builtin_ia32_movntpd:
   2564   case X86::BI__builtin_ia32_movntpd256:
   2565   case X86::BI__builtin_ia32_movntdq:
   2566   case X86::BI__builtin_ia32_movntdq256:
   2567   case X86::BI__builtin_ia32_movnti: {
   2568     llvm::MDNode *Node = llvm::MDNode::get(getLLVMContext(),
   2569                                            Builder.getInt32(1));
   2570 
   2571     // Convert the type of the pointer to a pointer to the stored type.
   2572     Value *BC = Builder.CreateBitCast(Ops[0],
   2573                                 llvm::PointerType::getUnqual(Ops[1]->getType()),
   2574                                       "cast");
   2575     StoreInst *SI = Builder.CreateStore(Ops[1], BC);
   2576     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
   2577     SI->setAlignment(16);
   2578     return SI;
   2579   }
   2580   // 3DNow!
   2581   case X86::BI__builtin_ia32_pswapdsf:
   2582   case X86::BI__builtin_ia32_pswapdsi: {
   2583     const char *name = 0;
   2584     Intrinsic::ID ID = Intrinsic::not_intrinsic;
   2585     switch(BuiltinID) {
   2586     default: llvm_unreachable("Unsupported intrinsic!");
   2587     case X86::BI__builtin_ia32_pswapdsf:
   2588     case X86::BI__builtin_ia32_pswapdsi:
   2589       name = "pswapd";
   2590       ID = Intrinsic::x86_3dnowa_pswapd;
   2591       break;
   2592     }
   2593     llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
   2594     Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
   2595     llvm::Function *F = CGM.getIntrinsic(ID);
   2596     return Builder.CreateCall(F, Ops, name);
   2597   }
   2598   case X86::BI__builtin_ia32_rdrand16_step:
   2599   case X86::BI__builtin_ia32_rdrand32_step:
   2600   case X86::BI__builtin_ia32_rdrand64_step: {
   2601     Intrinsic::ID ID;
   2602     switch (BuiltinID) {
   2603     default: llvm_unreachable("Unsupported intrinsic!");
   2604     case X86::BI__builtin_ia32_rdrand16_step:
   2605       ID = Intrinsic::x86_rdrand_16;
   2606       break;
   2607     case X86::BI__builtin_ia32_rdrand32_step:
   2608       ID = Intrinsic::x86_rdrand_32;
   2609       break;
   2610     case X86::BI__builtin_ia32_rdrand64_step:
   2611       ID = Intrinsic::x86_rdrand_64;
   2612       break;
   2613     }
   2614 
   2615     Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
   2616     Builder.CreateStore(Builder.CreateExtractValue(Call, 0), Ops[0]);
   2617     return Builder.CreateExtractValue(Call, 1);
   2618   }
   2619   }
   2620 }
   2621 
   2622 
   2623 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
   2624                                            const CallExpr *E) {
   2625   SmallVector<Value*, 4> Ops;
   2626 
   2627   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
   2628     Ops.push_back(EmitScalarExpr(E->getArg(i)));
   2629 
   2630   Intrinsic::ID ID = Intrinsic::not_intrinsic;
   2631 
   2632   switch (BuiltinID) {
   2633   default: return 0;
   2634 
   2635   // vec_ld, vec_lvsl, vec_lvsr
   2636   case PPC::BI__builtin_altivec_lvx:
   2637   case PPC::BI__builtin_altivec_lvxl:
   2638   case PPC::BI__builtin_altivec_lvebx:
   2639   case PPC::BI__builtin_altivec_lvehx:
   2640   case PPC::BI__builtin_altivec_lvewx:
   2641   case PPC::BI__builtin_altivec_lvsl:
   2642   case PPC::BI__builtin_altivec_lvsr:
   2643   {
   2644     Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
   2645 
   2646     Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
   2647     Ops.pop_back();
   2648 
   2649     switch (BuiltinID) {
   2650     default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
   2651     case PPC::BI__builtin_altivec_lvx:
   2652       ID = Intrinsic::ppc_altivec_lvx;
   2653       break;
   2654     case PPC::BI__builtin_altivec_lvxl:
   2655       ID = Intrinsic::ppc_altivec_lvxl;
   2656       break;
   2657     case PPC::BI__builtin_altivec_lvebx:
   2658       ID = Intrinsic::ppc_altivec_lvebx;
   2659       break;
   2660     case PPC::BI__builtin_altivec_lvehx:
   2661       ID = Intrinsic::ppc_altivec_lvehx;
   2662       break;
   2663     case PPC::BI__builtin_altivec_lvewx:
   2664       ID = Intrinsic::ppc_altivec_lvewx;
   2665       break;
   2666     case PPC::BI__builtin_altivec_lvsl:
   2667       ID = Intrinsic::ppc_altivec_lvsl;
   2668       break;
   2669     case PPC::BI__builtin_altivec_lvsr:
   2670       ID = Intrinsic::ppc_altivec_lvsr;
   2671       break;
   2672     }
   2673     llvm::Function *F = CGM.getIntrinsic(ID);
   2674     return Builder.CreateCall(F, Ops, "");
   2675   }
   2676 
   2677   // vec_st
   2678   case PPC::BI__builtin_altivec_stvx:
   2679   case PPC::BI__builtin_altivec_stvxl:
   2680   case PPC::BI__builtin_altivec_stvebx:
   2681   case PPC::BI__builtin_altivec_stvehx:
   2682   case PPC::BI__builtin_altivec_stvewx:
   2683   {
   2684     Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
   2685     Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
   2686     Ops.pop_back();
   2687 
   2688     switch (BuiltinID) {
   2689     default: llvm_unreachable("Unsupported st intrinsic!");
   2690     case PPC::BI__builtin_altivec_stvx:
   2691       ID = Intrinsic::ppc_altivec_stvx;
   2692       break;
   2693     case PPC::BI__builtin_altivec_stvxl:
   2694       ID = Intrinsic::ppc_altivec_stvxl;
   2695       break;
   2696     case PPC::BI__builtin_altivec_stvebx:
   2697       ID = Intrinsic::ppc_altivec_stvebx;
   2698       break;
   2699     case PPC::BI__builtin_altivec_stvehx:
   2700       ID = Intrinsic::ppc_altivec_stvehx;
   2701       break;
   2702     case PPC::BI__builtin_altivec_stvewx:
   2703       ID = Intrinsic::ppc_altivec_stvewx;
   2704       break;
   2705     }
   2706     llvm::Function *F = CGM.getIntrinsic(ID);
   2707     return Builder.CreateCall(F, Ops, "");
   2708   }
   2709   }
   2710 }
   2711