Home | History | Annotate | Download | only in CodeGen
      1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This contains code to emit Builtin calls as LLVM code.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "TargetInfo.h"
     15 #include "CodeGenFunction.h"
     16 #include "CodeGenModule.h"
     17 #include "CGObjCRuntime.h"
     18 #include "clang/Basic/TargetInfo.h"
     19 #include "clang/AST/APValue.h"
     20 #include "clang/AST/ASTContext.h"
     21 #include "clang/AST/Decl.h"
     22 #include "clang/Basic/TargetBuiltins.h"
     23 #include "llvm/Intrinsics.h"
     24 #include "llvm/Target/TargetData.h"
     25 
     26 using namespace clang;
     27 using namespace CodeGen;
     28 using namespace llvm;
     29 
     30 /// getBuiltinLibFunction - Given a builtin id for a function like
     31 /// "__builtin_fabsf", return a Function* for "fabsf".
     32 llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
     33                                                   unsigned BuiltinID) {
     34   assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
     35 
     36   // Get the name, skip over the __builtin_ prefix (if necessary).
     37   StringRef Name;
     38   GlobalDecl D(FD);
     39 
     40   // If the builtin has been declared explicitly with an assembler label,
     41   // use the mangled name. This differs from the plain label on platforms
     42   // that prefix labels.
     43   if (FD->hasAttr<AsmLabelAttr>())
     44     Name = getMangledName(D);
     45   else
     46     Name = Context.BuiltinInfo.GetName(BuiltinID) + 10;
     47 
     48   llvm::FunctionType *Ty =
     49     cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
     50 
     51   return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
     52 }
     53 
     54 /// Emit the conversions required to turn the given value into an
     55 /// integer of the given size.
     56 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
     57                         QualType T, llvm::IntegerType *IntType) {
     58   V = CGF.EmitToMemory(V, T);
     59 
     60   if (V->getType()->isPointerTy())
     61     return CGF.Builder.CreatePtrToInt(V, IntType);
     62 
     63   assert(V->getType() == IntType);
     64   return V;
     65 }
     66 
     67 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
     68                           QualType T, llvm::Type *ResultType) {
     69   V = CGF.EmitFromMemory(V, T);
     70 
     71   if (ResultType->isPointerTy())
     72     return CGF.Builder.CreateIntToPtr(V, ResultType);
     73 
     74   assert(V->getType() == ResultType);
     75   return V;
     76 }
     77 
     78 /// Utility to insert an atomic instruction based on Instrinsic::ID
     79 /// and the expression node.
     80 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
     81                                llvm::AtomicRMWInst::BinOp Kind,
     82                                const CallExpr *E) {
     83   QualType T = E->getType();
     84   assert(E->getArg(0)->getType()->isPointerType());
     85   assert(CGF.getContext().hasSameUnqualifiedType(T,
     86                                   E->getArg(0)->getType()->getPointeeType()));
     87   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
     88 
     89   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
     90   unsigned AddrSpace =
     91     cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
     92 
     93   llvm::IntegerType *IntType =
     94     llvm::IntegerType::get(CGF.getLLVMContext(),
     95                            CGF.getContext().getTypeSize(T));
     96   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
     97 
     98   llvm::Value *Args[2];
     99   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
    100   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
    101   llvm::Type *ValueType = Args[1]->getType();
    102   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
    103 
    104   llvm::Value *Result =
    105       CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
    106                                   llvm::SequentiallyConsistent);
    107   Result = EmitFromInt(CGF, Result, T, ValueType);
    108   return RValue::get(Result);
    109 }
    110 
    111 /// Utility to insert an atomic instruction based Instrinsic::ID and
    112 /// the expression node, where the return value is the result of the
    113 /// operation.
    114 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
    115                                    llvm::AtomicRMWInst::BinOp Kind,
    116                                    const CallExpr *E,
    117                                    Instruction::BinaryOps Op) {
    118   QualType T = E->getType();
    119   assert(E->getArg(0)->getType()->isPointerType());
    120   assert(CGF.getContext().hasSameUnqualifiedType(T,
    121                                   E->getArg(0)->getType()->getPointeeType()));
    122   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
    123 
    124   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
    125   unsigned AddrSpace =
    126     cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
    127 
    128   llvm::IntegerType *IntType =
    129     llvm::IntegerType::get(CGF.getLLVMContext(),
    130                            CGF.getContext().getTypeSize(T));
    131   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
    132 
    133   llvm::Value *Args[2];
    134   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
    135   llvm::Type *ValueType = Args[1]->getType();
    136   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
    137   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
    138 
    139   llvm::Value *Result =
    140       CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1],
    141                                   llvm::SequentiallyConsistent);
    142   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
    143   Result = EmitFromInt(CGF, Result, T, ValueType);
    144   return RValue::get(Result);
    145 }
    146 
    147 /// EmitFAbs - Emit a call to fabs/fabsf/fabsl, depending on the type of ValTy,
    148 /// which must be a scalar floating point type.
    149 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V, QualType ValTy) {
    150   const BuiltinType *ValTyP = ValTy->getAs<BuiltinType>();
    151   assert(ValTyP && "isn't scalar fp type!");
    152 
    153   StringRef FnName;
    154   switch (ValTyP->getKind()) {
    155   default: llvm_unreachable("Isn't a scalar fp type!");
    156   case BuiltinType::Float:      FnName = "fabsf"; break;
    157   case BuiltinType::Double:     FnName = "fabs"; break;
    158   case BuiltinType::LongDouble: FnName = "fabsl"; break;
    159   }
    160 
    161   // The prototype is something that takes and returns whatever V's type is.
    162   llvm::FunctionType *FT = llvm::FunctionType::get(V->getType(), V->getType(),
    163                                                    false);
    164   llvm::Value *Fn = CGF.CGM.CreateRuntimeFunction(FT, FnName);
    165 
    166   return CGF.Builder.CreateCall(Fn, V, "abs");
    167 }
    168 
    169 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn,
    170                               const CallExpr *E, llvm::Value *calleeValue) {
    171   return CGF.EmitCall(E->getCallee()->getType(), calleeValue,
    172                       ReturnValueSlot(), E->arg_begin(), E->arg_end(), Fn);
    173 }
    174 
    175 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
    176                                         unsigned BuiltinID, const CallExpr *E) {
    177   // See if we can constant fold this builtin.  If so, don't emit it at all.
    178   Expr::EvalResult Result;
    179   if (E->Evaluate(Result, CGM.getContext()) &&
    180       !Result.hasSideEffects()) {
    181     if (Result.Val.isInt())
    182       return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
    183                                                 Result.Val.getInt()));
    184     if (Result.Val.isFloat())
    185       return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
    186                                                Result.Val.getFloat()));
    187   }
    188 
    189   switch (BuiltinID) {
    190   default: break;  // Handle intrinsics and libm functions below.
    191   case Builtin::BI__builtin___CFStringMakeConstantString:
    192   case Builtin::BI__builtin___NSStringMakeConstantString:
    193     return RValue::get(CGM.EmitConstantExpr(E, E->getType(), 0));
    194   case Builtin::BI__builtin_stdarg_start:
    195   case Builtin::BI__builtin_va_start:
    196   case Builtin::BI__builtin_va_end: {
    197     Value *ArgValue = EmitVAListRef(E->getArg(0));
    198     llvm::Type *DestType = Int8PtrTy;
    199     if (ArgValue->getType() != DestType)
    200       ArgValue = Builder.CreateBitCast(ArgValue, DestType,
    201                                        ArgValue->getName().data());
    202 
    203     Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ?
    204       Intrinsic::vaend : Intrinsic::vastart;
    205     return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue));
    206   }
    207   case Builtin::BI__builtin_va_copy: {
    208     Value *DstPtr = EmitVAListRef(E->getArg(0));
    209     Value *SrcPtr = EmitVAListRef(E->getArg(1));
    210 
    211     llvm::Type *Type = Int8PtrTy;
    212 
    213     DstPtr = Builder.CreateBitCast(DstPtr, Type);
    214     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
    215     return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy),
    216                                            DstPtr, SrcPtr));
    217   }
    218   case Builtin::BI__builtin_abs: {
    219     Value *ArgValue = EmitScalarExpr(E->getArg(0));
    220 
    221     Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
    222     Value *CmpResult =
    223     Builder.CreateICmpSGE(ArgValue,
    224                           llvm::Constant::getNullValue(ArgValue->getType()),
    225                                                             "abscond");
    226     Value *Result =
    227       Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
    228 
    229     return RValue::get(Result);
    230   }
    231   case Builtin::BI__builtin_ctz:
    232   case Builtin::BI__builtin_ctzl:
    233   case Builtin::BI__builtin_ctzll: {
    234     Value *ArgValue = EmitScalarExpr(E->getArg(0));
    235 
    236     llvm::Type *ArgType = ArgValue->getType();
    237     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
    238 
    239     llvm::Type *ResultType = ConvertType(E->getType());
    240     Value *Result = Builder.CreateCall(F, ArgValue);
    241     if (Result->getType() != ResultType)
    242       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
    243                                      "cast");
    244     return RValue::get(Result);
    245   }
    246   case Builtin::BI__builtin_clz:
    247   case Builtin::BI__builtin_clzl:
    248   case Builtin::BI__builtin_clzll: {
    249     Value *ArgValue = EmitScalarExpr(E->getArg(0));
    250 
    251     llvm::Type *ArgType = ArgValue->getType();
    252     Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
    253 
    254     llvm::Type *ResultType = ConvertType(E->getType());
    255     Value *Result = Builder.CreateCall(F, ArgValue);
    256     if (Result->getType() != ResultType)
    257       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
    258                                      "cast");
    259     return RValue::get(Result);
    260   }
    261   case Builtin::BI__builtin_ffs:
    262   case Builtin::BI__builtin_ffsl:
    263   case Builtin::BI__builtin_ffsll: {
    264     // ffs(x) -> x ? cttz(x) + 1 : 0
    265     Value *ArgValue = EmitScalarExpr(E->getArg(0));
    266 
    267     llvm::Type *ArgType = ArgValue->getType();
    268     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
    269 
    270     llvm::Type *ResultType = ConvertType(E->getType());
    271     Value *Tmp = Builder.CreateAdd(Builder.CreateCall(F, ArgValue),
    272                                    llvm::ConstantInt::get(ArgType, 1));
    273     Value *Zero = llvm::Constant::getNullValue(ArgType);
    274     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
    275     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
    276     if (Result->getType() != ResultType)
    277       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
    278                                      "cast");
    279     return RValue::get(Result);
    280   }
    281   case Builtin::BI__builtin_parity:
    282   case Builtin::BI__builtin_parityl:
    283   case Builtin::BI__builtin_parityll: {
    284     // parity(x) -> ctpop(x) & 1
    285     Value *ArgValue = EmitScalarExpr(E->getArg(0));
    286 
    287     llvm::Type *ArgType = ArgValue->getType();
    288     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
    289 
    290     llvm::Type *ResultType = ConvertType(E->getType());
    291     Value *Tmp = Builder.CreateCall(F, ArgValue);
    292     Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
    293     if (Result->getType() != ResultType)
    294       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
    295                                      "cast");
    296     return RValue::get(Result);
    297   }
    298   case Builtin::BI__builtin_popcount:
    299   case Builtin::BI__builtin_popcountl:
    300   case Builtin::BI__builtin_popcountll: {
    301     Value *ArgValue = EmitScalarExpr(E->getArg(0));
    302 
    303     llvm::Type *ArgType = ArgValue->getType();
    304     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
    305 
    306     llvm::Type *ResultType = ConvertType(E->getType());
    307     Value *Result = Builder.CreateCall(F, ArgValue);
    308     if (Result->getType() != ResultType)
    309       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
    310                                      "cast");
    311     return RValue::get(Result);
    312   }
    313   case Builtin::BI__builtin_expect: {
    314     Value *ArgValue = EmitScalarExpr(E->getArg(0));
    315     llvm::Type *ArgType = ArgValue->getType();
    316 
    317     Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
    318     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
    319 
    320     Value *Result = Builder.CreateCall2(FnExpect, ArgValue, ExpectedValue,
    321                                         "expval");
    322     return RValue::get(Result);
    323   }
    324   case Builtin::BI__builtin_bswap32:
    325   case Builtin::BI__builtin_bswap64: {
    326     Value *ArgValue = EmitScalarExpr(E->getArg(0));
    327     llvm::Type *ArgType = ArgValue->getType();
    328     Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType);
    329     return RValue::get(Builder.CreateCall(F, ArgValue));
    330   }
    331   case Builtin::BI__builtin_object_size: {
    332     // We pass this builtin onto the optimizer so that it can
    333     // figure out the object size in more complex cases.
    334     llvm::Type *ResType = ConvertType(E->getType());
    335 
    336     // LLVM only supports 0 and 2, make sure that we pass along that
    337     // as a boolean.
    338     Value *Ty = EmitScalarExpr(E->getArg(1));
    339     ConstantInt *CI = dyn_cast<ConstantInt>(Ty);
    340     assert(CI);
    341     uint64_t val = CI->getZExtValue();
    342     CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1);
    343 
    344     Value *F = CGM.getIntrinsic(Intrinsic::objectsize, ResType);
    345     return RValue::get(Builder.CreateCall2(F,
    346                                            EmitScalarExpr(E->getArg(0)),
    347                                            CI));
    348   }
    349   case Builtin::BI__builtin_prefetch: {
    350     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
    351     // FIXME: Technically these constants should of type 'int', yes?
    352     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
    353       llvm::ConstantInt::get(Int32Ty, 0);
    354     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
    355       llvm::ConstantInt::get(Int32Ty, 3);
    356     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
    357     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
    358     return RValue::get(Builder.CreateCall4(F, Address, RW, Locality, Data));
    359   }
    360   case Builtin::BI__builtin_trap: {
    361     Value *F = CGM.getIntrinsic(Intrinsic::trap);
    362     return RValue::get(Builder.CreateCall(F));
    363   }
    364   case Builtin::BI__builtin_unreachable: {
    365     if (CatchUndefined)
    366       EmitBranch(getTrapBB());
    367     else
    368       Builder.CreateUnreachable();
    369 
    370     // We do need to preserve an insertion point.
    371     EmitBlock(createBasicBlock("unreachable.cont"));
    372 
    373     return RValue::get(0);
    374   }
    375 
    376   case Builtin::BI__builtin_powi:
    377   case Builtin::BI__builtin_powif:
    378   case Builtin::BI__builtin_powil: {
    379     Value *Base = EmitScalarExpr(E->getArg(0));
    380     Value *Exponent = EmitScalarExpr(E->getArg(1));
    381     llvm::Type *ArgType = Base->getType();
    382     Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
    383     return RValue::get(Builder.CreateCall2(F, Base, Exponent));
    384   }
    385 
    386   case Builtin::BI__builtin_isgreater:
    387   case Builtin::BI__builtin_isgreaterequal:
    388   case Builtin::BI__builtin_isless:
    389   case Builtin::BI__builtin_islessequal:
    390   case Builtin::BI__builtin_islessgreater:
    391   case Builtin::BI__builtin_isunordered: {
    392     // Ordered comparisons: we know the arguments to these are matching scalar
    393     // floating point values.
    394     Value *LHS = EmitScalarExpr(E->getArg(0));
    395     Value *RHS = EmitScalarExpr(E->getArg(1));
    396 
    397     switch (BuiltinID) {
    398     default: llvm_unreachable("Unknown ordered comparison");
    399     case Builtin::BI__builtin_isgreater:
    400       LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
    401       break;
    402     case Builtin::BI__builtin_isgreaterequal:
    403       LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
    404       break;
    405     case Builtin::BI__builtin_isless:
    406       LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
    407       break;
    408     case Builtin::BI__builtin_islessequal:
    409       LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
    410       break;
    411     case Builtin::BI__builtin_islessgreater:
    412       LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
    413       break;
    414     case Builtin::BI__builtin_isunordered:
    415       LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
    416       break;
    417     }
    418     // ZExt bool to int type.
    419     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
    420   }
    421   case Builtin::BI__builtin_isnan: {
    422     Value *V = EmitScalarExpr(E->getArg(0));
    423     V = Builder.CreateFCmpUNO(V, V, "cmp");
    424     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
    425   }
    426 
    427   case Builtin::BI__builtin_isinf: {
    428     // isinf(x) --> fabs(x) == infinity
    429     Value *V = EmitScalarExpr(E->getArg(0));
    430     V = EmitFAbs(*this, V, E->getArg(0)->getType());
    431 
    432     V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf");
    433     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
    434   }
    435 
    436   // TODO: BI__builtin_isinf_sign
    437   //   isinf_sign(x) -> isinf(x) ? (signbit(x) ? -1 : 1) : 0
    438 
    439   case Builtin::BI__builtin_isnormal: {
    440     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
    441     Value *V = EmitScalarExpr(E->getArg(0));
    442     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
    443 
    444     Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
    445     Value *IsLessThanInf =
    446       Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
    447     APFloat Smallest = APFloat::getSmallestNormalized(
    448                    getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
    449     Value *IsNormal =
    450       Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
    451                             "isnormal");
    452     V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
    453     V = Builder.CreateAnd(V, IsNormal, "and");
    454     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
    455   }
    456 
    457   case Builtin::BI__builtin_isfinite: {
    458     // isfinite(x) --> x == x && fabs(x) != infinity;
    459     Value *V = EmitScalarExpr(E->getArg(0));
    460     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
    461 
    462     Value *Abs = EmitFAbs(*this, V, E->getArg(0)->getType());
    463     Value *IsNotInf =
    464       Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
    465 
    466     V = Builder.CreateAnd(Eq, IsNotInf, "and");
    467     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
    468   }
    469 
    470   case Builtin::BI__builtin_fpclassify: {
    471     Value *V = EmitScalarExpr(E->getArg(5));
    472     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
    473 
    474     // Create Result
    475     BasicBlock *Begin = Builder.GetInsertBlock();
    476     BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
    477     Builder.SetInsertPoint(End);
    478     PHINode *Result =
    479       Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
    480                         "fpclassify_result");
    481 
    482     // if (V==0) return FP_ZERO
    483     Builder.SetInsertPoint(Begin);
    484     Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
    485                                           "iszero");
    486     Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
    487     BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
    488     Builder.CreateCondBr(IsZero, End, NotZero);
    489     Result->addIncoming(ZeroLiteral, Begin);
    490 
    491     // if (V != V) return FP_NAN
    492     Builder.SetInsertPoint(NotZero);
    493     Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
    494     Value *NanLiteral = EmitScalarExpr(E->getArg(0));
    495     BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
    496     Builder.CreateCondBr(IsNan, End, NotNan);
    497     Result->addIncoming(NanLiteral, NotZero);
    498 
    499     // if (fabs(V) == infinity) return FP_INFINITY
    500     Builder.SetInsertPoint(NotNan);
    501     Value *VAbs = EmitFAbs(*this, V, E->getArg(5)->getType());
    502     Value *IsInf =
    503       Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
    504                             "isinf");
    505     Value *InfLiteral = EmitScalarExpr(E->getArg(1));
    506     BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
    507     Builder.CreateCondBr(IsInf, End, NotInf);
    508     Result->addIncoming(InfLiteral, NotNan);
    509 
    510     // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
    511     Builder.SetInsertPoint(NotInf);
    512     APFloat Smallest = APFloat::getSmallestNormalized(
    513         getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
    514     Value *IsNormal =
    515       Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
    516                             "isnormal");
    517     Value *NormalResult =
    518       Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
    519                            EmitScalarExpr(E->getArg(3)));
    520     Builder.CreateBr(End);
    521     Result->addIncoming(NormalResult, NotInf);
    522 
    523     // return Result
    524     Builder.SetInsertPoint(End);
    525     return RValue::get(Result);
    526   }
    527 
    528   case Builtin::BIalloca:
    529   case Builtin::BI__builtin_alloca: {
    530     Value *Size = EmitScalarExpr(E->getArg(0));
    531     return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size));
    532   }
    533   case Builtin::BIbzero:
    534   case Builtin::BI__builtin_bzero: {
    535     Value *Address = EmitScalarExpr(E->getArg(0));
    536     Value *SizeVal = EmitScalarExpr(E->getArg(1));
    537     Builder.CreateMemSet(Address, Builder.getInt8(0), SizeVal, 1, false);
    538     return RValue::get(Address);
    539   }
    540   case Builtin::BImemcpy:
    541   case Builtin::BI__builtin_memcpy: {
    542     Value *Address = EmitScalarExpr(E->getArg(0));
    543     Value *SrcAddr = EmitScalarExpr(E->getArg(1));
    544     Value *SizeVal = EmitScalarExpr(E->getArg(2));
    545     Builder.CreateMemCpy(Address, SrcAddr, SizeVal, 1, false);
    546     return RValue::get(Address);
    547   }
    548 
    549   case Builtin::BI__builtin___memcpy_chk: {
    550     // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
    551     llvm::APSInt Size, DstSize;
    552     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
    553         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
    554       break;
    555     if (Size.ugt(DstSize))
    556       break;
    557     Value *Dest = EmitScalarExpr(E->getArg(0));
    558     Value *Src = EmitScalarExpr(E->getArg(1));
    559     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
    560     Builder.CreateMemCpy(Dest, Src, SizeVal, 1, false);
    561     return RValue::get(Dest);
    562   }
    563 
    564   case Builtin::BI__builtin_objc_memmove_collectable: {
    565     Value *Address = EmitScalarExpr(E->getArg(0));
    566     Value *SrcAddr = EmitScalarExpr(E->getArg(1));
    567     Value *SizeVal = EmitScalarExpr(E->getArg(2));
    568     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
    569                                                   Address, SrcAddr, SizeVal);
    570     return RValue::get(Address);
    571   }
    572 
    573   case Builtin::BI__builtin___memmove_chk: {
    574     // fold __builtin_memmove_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
    575     llvm::APSInt Size, DstSize;
    576     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
    577         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
    578       break;
    579     if (Size.ugt(DstSize))
    580       break;
    581     Value *Dest = EmitScalarExpr(E->getArg(0));
    582     Value *Src = EmitScalarExpr(E->getArg(1));
    583     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
    584     Builder.CreateMemMove(Dest, Src, SizeVal, 1, false);
    585     return RValue::get(Dest);
    586   }
    587 
    588   case Builtin::BImemmove:
    589   case Builtin::BI__builtin_memmove: {
    590     Value *Address = EmitScalarExpr(E->getArg(0));
    591     Value *SrcAddr = EmitScalarExpr(E->getArg(1));
    592     Value *SizeVal = EmitScalarExpr(E->getArg(2));
    593     Builder.CreateMemMove(Address, SrcAddr, SizeVal, 1, false);
    594     return RValue::get(Address);
    595   }
    596   case Builtin::BImemset:
    597   case Builtin::BI__builtin_memset: {
    598     Value *Address = EmitScalarExpr(E->getArg(0));
    599     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
    600                                          Builder.getInt8Ty());
    601     Value *SizeVal = EmitScalarExpr(E->getArg(2));
    602     Builder.CreateMemSet(Address, ByteVal, SizeVal, 1, false);
    603     return RValue::get(Address);
    604   }
    605   case Builtin::BI__builtin___memset_chk: {
    606     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
    607     llvm::APSInt Size, DstSize;
    608     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
    609         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
    610       break;
    611     if (Size.ugt(DstSize))
    612       break;
    613     Value *Address = EmitScalarExpr(E->getArg(0));
    614     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
    615                                          Builder.getInt8Ty());
    616     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
    617     Builder.CreateMemSet(Address, ByteVal, SizeVal, 1, false);
    618 
    619     return RValue::get(Address);
    620   }
    621   case Builtin::BI__builtin_dwarf_cfa: {
    622     // The offset in bytes from the first argument to the CFA.
    623     //
    624     // Why on earth is this in the frontend?  Is there any reason at
    625     // all that the backend can't reasonably determine this while
    626     // lowering llvm.eh.dwarf.cfa()?
    627     //
    628     // TODO: If there's a satisfactory reason, add a target hook for
    629     // this instead of hard-coding 0, which is correct for most targets.
    630     int32_t Offset = 0;
    631 
    632     Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
    633     return RValue::get(Builder.CreateCall(F,
    634                                       llvm::ConstantInt::get(Int32Ty, Offset)));
    635   }
    636   case Builtin::BI__builtin_return_address: {
    637     Value *Depth = EmitScalarExpr(E->getArg(0));
    638     Depth = Builder.CreateIntCast(Depth, Int32Ty, false);
    639     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
    640     return RValue::get(Builder.CreateCall(F, Depth));
    641   }
    642   case Builtin::BI__builtin_frame_address: {
    643     Value *Depth = EmitScalarExpr(E->getArg(0));
    644     Depth = Builder.CreateIntCast(Depth, Int32Ty, false);
    645     Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
    646     return RValue::get(Builder.CreateCall(F, Depth));
    647   }
    648   case Builtin::BI__builtin_extract_return_addr: {
    649     Value *Address = EmitScalarExpr(E->getArg(0));
    650     Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
    651     return RValue::get(Result);
    652   }
    653   case Builtin::BI__builtin_frob_return_addr: {
    654     Value *Address = EmitScalarExpr(E->getArg(0));
    655     Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
    656     return RValue::get(Result);
    657   }
    658   case Builtin::BI__builtin_dwarf_sp_column: {
    659     llvm::IntegerType *Ty
    660       = cast<llvm::IntegerType>(ConvertType(E->getType()));
    661     int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
    662     if (Column == -1) {
    663       CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
    664       return RValue::get(llvm::UndefValue::get(Ty));
    665     }
    666     return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
    667   }
    668   case Builtin::BI__builtin_init_dwarf_reg_size_table: {
    669     Value *Address = EmitScalarExpr(E->getArg(0));
    670     if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
    671       CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
    672     return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
    673   }
    674   case Builtin::BI__builtin_eh_return: {
    675     Value *Int = EmitScalarExpr(E->getArg(0));
    676     Value *Ptr = EmitScalarExpr(E->getArg(1));
    677 
    678     llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
    679     assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
    680            "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
    681     Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
    682                                   ? Intrinsic::eh_return_i32
    683                                   : Intrinsic::eh_return_i64);
    684     Builder.CreateCall2(F, Int, Ptr);
    685     Builder.CreateUnreachable();
    686 
    687     // We do need to preserve an insertion point.
    688     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
    689 
    690     return RValue::get(0);
    691   }
    692   case Builtin::BI__builtin_unwind_init: {
    693     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
    694     return RValue::get(Builder.CreateCall(F));
    695   }
    696   case Builtin::BI__builtin_extend_pointer: {
    697     // Extends a pointer to the size of an _Unwind_Word, which is
    698     // uint64_t on all platforms.  Generally this gets poked into a
    699     // register and eventually used as an address, so if the
    700     // addressing registers are wider than pointers and the platform
    701     // doesn't implicitly ignore high-order bits when doing
    702     // addressing, we need to make sure we zext / sext based on
    703     // the platform's expectations.
    704     //
    705     // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
    706 
    707     // Cast the pointer to intptr_t.
    708     Value *Ptr = EmitScalarExpr(E->getArg(0));
    709     Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
    710 
    711     // If that's 64 bits, we're done.
    712     if (IntPtrTy->getBitWidth() == 64)
    713       return RValue::get(Result);
    714 
    715     // Otherwise, ask the codegen data what to do.
    716     if (getTargetHooks().extendPointerWithSExt())
    717       return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
    718     else
    719       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
    720   }
    721   case Builtin::BI__builtin_setjmp: {
    722     // Buffer is a void**.
    723     Value *Buf = EmitScalarExpr(E->getArg(0));
    724 
    725     // Store the frame pointer to the setjmp buffer.
    726     Value *FrameAddr =
    727       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
    728                          ConstantInt::get(Int32Ty, 0));
    729     Builder.CreateStore(FrameAddr, Buf);
    730 
    731     // Store the stack pointer to the setjmp buffer.
    732     Value *StackAddr =
    733       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
    734     Value *StackSaveSlot =
    735       Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2));
    736     Builder.CreateStore(StackAddr, StackSaveSlot);
    737 
    738     // Call LLVM's EH setjmp, which is lightweight.
    739     Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
    740     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
    741     return RValue::get(Builder.CreateCall(F, Buf));
    742   }
    743   case Builtin::BI__builtin_longjmp: {
    744     Value *Buf = EmitScalarExpr(E->getArg(0));
    745     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
    746 
    747     // Call LLVM's EH longjmp, which is lightweight.
    748     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
    749 
    750     // longjmp doesn't return; mark this as unreachable.
    751     Builder.CreateUnreachable();
    752 
    753     // We do need to preserve an insertion point.
    754     EmitBlock(createBasicBlock("longjmp.cont"));
    755 
    756     return RValue::get(0);
    757   }
    758   case Builtin::BI__sync_fetch_and_add:
    759   case Builtin::BI__sync_fetch_and_sub:
    760   case Builtin::BI__sync_fetch_and_or:
    761   case Builtin::BI__sync_fetch_and_and:
    762   case Builtin::BI__sync_fetch_and_xor:
    763   case Builtin::BI__sync_add_and_fetch:
    764   case Builtin::BI__sync_sub_and_fetch:
    765   case Builtin::BI__sync_and_and_fetch:
    766   case Builtin::BI__sync_or_and_fetch:
    767   case Builtin::BI__sync_xor_and_fetch:
    768   case Builtin::BI__sync_val_compare_and_swap:
    769   case Builtin::BI__sync_bool_compare_and_swap:
    770   case Builtin::BI__sync_lock_test_and_set:
    771   case Builtin::BI__sync_lock_release:
    772   case Builtin::BI__sync_swap:
    773     llvm_unreachable("Shouldn't make it through sema");
    774   case Builtin::BI__sync_fetch_and_add_1:
    775   case Builtin::BI__sync_fetch_and_add_2:
    776   case Builtin::BI__sync_fetch_and_add_4:
    777   case Builtin::BI__sync_fetch_and_add_8:
    778   case Builtin::BI__sync_fetch_and_add_16:
    779     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
    780   case Builtin::BI__sync_fetch_and_sub_1:
    781   case Builtin::BI__sync_fetch_and_sub_2:
    782   case Builtin::BI__sync_fetch_and_sub_4:
    783   case Builtin::BI__sync_fetch_and_sub_8:
    784   case Builtin::BI__sync_fetch_and_sub_16:
    785     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
    786   case Builtin::BI__sync_fetch_and_or_1:
    787   case Builtin::BI__sync_fetch_and_or_2:
    788   case Builtin::BI__sync_fetch_and_or_4:
    789   case Builtin::BI__sync_fetch_and_or_8:
    790   case Builtin::BI__sync_fetch_and_or_16:
    791     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
    792   case Builtin::BI__sync_fetch_and_and_1:
    793   case Builtin::BI__sync_fetch_and_and_2:
    794   case Builtin::BI__sync_fetch_and_and_4:
    795   case Builtin::BI__sync_fetch_and_and_8:
    796   case Builtin::BI__sync_fetch_and_and_16:
    797     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
    798   case Builtin::BI__sync_fetch_and_xor_1:
    799   case Builtin::BI__sync_fetch_and_xor_2:
    800   case Builtin::BI__sync_fetch_and_xor_4:
    801   case Builtin::BI__sync_fetch_and_xor_8:
    802   case Builtin::BI__sync_fetch_and_xor_16:
    803     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
    804 
    805   // Clang extensions: not overloaded yet.
    806   case Builtin::BI__sync_fetch_and_min:
    807     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
    808   case Builtin::BI__sync_fetch_and_max:
    809     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
    810   case Builtin::BI__sync_fetch_and_umin:
    811     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
    812   case Builtin::BI__sync_fetch_and_umax:
    813     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
    814 
    815   case Builtin::BI__sync_add_and_fetch_1:
    816   case Builtin::BI__sync_add_and_fetch_2:
    817   case Builtin::BI__sync_add_and_fetch_4:
    818   case Builtin::BI__sync_add_and_fetch_8:
    819   case Builtin::BI__sync_add_and_fetch_16:
    820     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
    821                                 llvm::Instruction::Add);
    822   case Builtin::BI__sync_sub_and_fetch_1:
    823   case Builtin::BI__sync_sub_and_fetch_2:
    824   case Builtin::BI__sync_sub_and_fetch_4:
    825   case Builtin::BI__sync_sub_and_fetch_8:
    826   case Builtin::BI__sync_sub_and_fetch_16:
    827     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
    828                                 llvm::Instruction::Sub);
    829   case Builtin::BI__sync_and_and_fetch_1:
    830   case Builtin::BI__sync_and_and_fetch_2:
    831   case Builtin::BI__sync_and_and_fetch_4:
    832   case Builtin::BI__sync_and_and_fetch_8:
    833   case Builtin::BI__sync_and_and_fetch_16:
    834     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
    835                                 llvm::Instruction::And);
    836   case Builtin::BI__sync_or_and_fetch_1:
    837   case Builtin::BI__sync_or_and_fetch_2:
    838   case Builtin::BI__sync_or_and_fetch_4:
    839   case Builtin::BI__sync_or_and_fetch_8:
    840   case Builtin::BI__sync_or_and_fetch_16:
    841     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
    842                                 llvm::Instruction::Or);
    843   case Builtin::BI__sync_xor_and_fetch_1:
    844   case Builtin::BI__sync_xor_and_fetch_2:
    845   case Builtin::BI__sync_xor_and_fetch_4:
    846   case Builtin::BI__sync_xor_and_fetch_8:
    847   case Builtin::BI__sync_xor_and_fetch_16:
    848     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
    849                                 llvm::Instruction::Xor);
    850 
    851   case Builtin::BI__sync_val_compare_and_swap_1:
    852   case Builtin::BI__sync_val_compare_and_swap_2:
    853   case Builtin::BI__sync_val_compare_and_swap_4:
    854   case Builtin::BI__sync_val_compare_and_swap_8:
    855   case Builtin::BI__sync_val_compare_and_swap_16: {
    856     QualType T = E->getType();
    857     llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0));
    858     unsigned AddrSpace =
    859       cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
    860 
    861     llvm::IntegerType *IntType =
    862       llvm::IntegerType::get(getLLVMContext(),
    863                              getContext().getTypeSize(T));
    864     llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
    865 
    866     Value *Args[3];
    867     Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType);
    868     Args[1] = EmitScalarExpr(E->getArg(1));
    869     llvm::Type *ValueType = Args[1]->getType();
    870     Args[1] = EmitToInt(*this, Args[1], T, IntType);
    871     Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
    872 
    873     Value *Result = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
    874                                                 llvm::SequentiallyConsistent);
    875     Result = EmitFromInt(*this, Result, T, ValueType);
    876     return RValue::get(Result);
    877   }
    878 
    879   case Builtin::BI__sync_bool_compare_and_swap_1:
    880   case Builtin::BI__sync_bool_compare_and_swap_2:
    881   case Builtin::BI__sync_bool_compare_and_swap_4:
    882   case Builtin::BI__sync_bool_compare_and_swap_8:
    883   case Builtin::BI__sync_bool_compare_and_swap_16: {
    884     QualType T = E->getArg(1)->getType();
    885     llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0));
    886     unsigned AddrSpace =
    887       cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
    888 
    889     llvm::IntegerType *IntType =
    890       llvm::IntegerType::get(getLLVMContext(),
    891                              getContext().getTypeSize(T));
    892     llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
    893 
    894     Value *Args[3];
    895     Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType);
    896     Args[1] = EmitToInt(*this, EmitScalarExpr(E->getArg(1)), T, IntType);
    897     Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType);
    898 
    899     Value *OldVal = Args[1];
    900     Value *PrevVal = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2],
    901                                                  llvm::SequentiallyConsistent);
    902     Value *Result = Builder.CreateICmpEQ(PrevVal, OldVal);
    903     // zext bool to int.
    904     Result = Builder.CreateZExt(Result, ConvertType(E->getType()));
    905     return RValue::get(Result);
    906   }
    907 
    908   case Builtin::BI__sync_swap_1:
    909   case Builtin::BI__sync_swap_2:
    910   case Builtin::BI__sync_swap_4:
    911   case Builtin::BI__sync_swap_8:
    912   case Builtin::BI__sync_swap_16:
    913     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
    914 
    915   case Builtin::BI__sync_lock_test_and_set_1:
    916   case Builtin::BI__sync_lock_test_and_set_2:
    917   case Builtin::BI__sync_lock_test_and_set_4:
    918   case Builtin::BI__sync_lock_test_and_set_8:
    919   case Builtin::BI__sync_lock_test_and_set_16:
    920     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
    921 
    922   case Builtin::BI__sync_lock_release_1:
    923   case Builtin::BI__sync_lock_release_2:
    924   case Builtin::BI__sync_lock_release_4:
    925   case Builtin::BI__sync_lock_release_8:
    926   case Builtin::BI__sync_lock_release_16: {
    927     Value *Ptr = EmitScalarExpr(E->getArg(0));
    928     llvm::Type *ElLLVMTy =
    929       cast<llvm::PointerType>(Ptr->getType())->getElementType();
    930     llvm::StoreInst *Store =
    931       Builder.CreateStore(llvm::Constant::getNullValue(ElLLVMTy), Ptr);
    932     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
    933     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
    934     Store->setAlignment(StoreSize.getQuantity());
    935     Store->setAtomic(llvm::Release);
    936     return RValue::get(0);
    937   }
    938 
    939   case Builtin::BI__sync_synchronize: {
    940     // We assume this is supposed to correspond to a C++0x-style
    941     // sequentially-consistent fence (i.e. this is only usable for
    942     // synchonization, not device I/O or anything like that). This intrinsic
    943     // is really badly designed in the sense that in theory, there isn't
    944     // any way to safely use it... but in practice, it mostly works
    945     // to use it with non-atomic loads and stores to get acquire/release
    946     // semantics.
    947     Builder.CreateFence(llvm::SequentiallyConsistent);
    948     return RValue::get(0);
    949   }
    950 
    951   case Builtin::BI__atomic_thread_fence:
    952   case Builtin::BI__atomic_signal_fence: {
    953     llvm::SynchronizationScope Scope;
    954     if (BuiltinID == Builtin::BI__atomic_signal_fence)
    955       Scope = llvm::SingleThread;
    956     else
    957       Scope = llvm::CrossThread;
    958     Value *Order = EmitScalarExpr(E->getArg(0));
    959     if (isa<llvm::ConstantInt>(Order)) {
    960       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
    961       switch (ord) {
    962       case 0:  // memory_order_relaxed
    963       default: // invalid order
    964         break;
    965       case 1:  // memory_order_consume
    966       case 2:  // memory_order_acquire
    967         Builder.CreateFence(llvm::Acquire, Scope);
    968         break;
    969       case 3:  // memory_order_release
    970         Builder.CreateFence(llvm::Release, Scope);
    971         break;
    972       case 4:  // memory_order_acq_rel
    973         Builder.CreateFence(llvm::AcquireRelease, Scope);
    974         break;
    975       case 5:  // memory_order_seq_cst
    976         Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
    977         break;
    978       }
    979       return RValue::get(0);
    980     }
    981 
    982     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
    983     AcquireBB = createBasicBlock("acquire", CurFn);
    984     ReleaseBB = createBasicBlock("release", CurFn);
    985     AcqRelBB = createBasicBlock("acqrel", CurFn);
    986     SeqCstBB = createBasicBlock("seqcst", CurFn);
    987     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
    988 
    989     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
    990     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
    991 
    992     Builder.SetInsertPoint(AcquireBB);
    993     Builder.CreateFence(llvm::Acquire, Scope);
    994     Builder.CreateBr(ContBB);
    995     SI->addCase(Builder.getInt32(1), AcquireBB);
    996     SI->addCase(Builder.getInt32(2), AcquireBB);
    997 
    998     Builder.SetInsertPoint(ReleaseBB);
    999     Builder.CreateFence(llvm::Release, Scope);
   1000     Builder.CreateBr(ContBB);
   1001     SI->addCase(Builder.getInt32(3), ReleaseBB);
   1002 
   1003     Builder.SetInsertPoint(AcqRelBB);
   1004     Builder.CreateFence(llvm::AcquireRelease, Scope);
   1005     Builder.CreateBr(ContBB);
   1006     SI->addCase(Builder.getInt32(4), AcqRelBB);
   1007 
   1008     Builder.SetInsertPoint(SeqCstBB);
   1009     Builder.CreateFence(llvm::SequentiallyConsistent, Scope);
   1010     Builder.CreateBr(ContBB);
   1011     SI->addCase(Builder.getInt32(5), SeqCstBB);
   1012 
   1013     Builder.SetInsertPoint(ContBB);
   1014     return RValue::get(0);
   1015   }
   1016 
   1017     // Library functions with special handling.
   1018   case Builtin::BIsqrt:
   1019   case Builtin::BIsqrtf:
   1020   case Builtin::BIsqrtl: {
   1021     // TODO: there is currently no set of optimizer flags
   1022     // sufficient for us to rewrite sqrt to @llvm.sqrt.
   1023     // -fmath-errno=0 is not good enough; we need finiteness.
   1024     // We could probably precondition the call with an ult
   1025     // against 0, but is that worth the complexity?
   1026     break;
   1027   }
   1028 
   1029   case Builtin::BIpow:
   1030   case Builtin::BIpowf:
   1031   case Builtin::BIpowl: {
   1032     // Rewrite sqrt to intrinsic if allowed.
   1033     if (!FD->hasAttr<ConstAttr>())
   1034       break;
   1035     Value *Base = EmitScalarExpr(E->getArg(0));
   1036     Value *Exponent = EmitScalarExpr(E->getArg(1));
   1037     llvm::Type *ArgType = Base->getType();
   1038     Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
   1039     return RValue::get(Builder.CreateCall2(F, Base, Exponent));
   1040   }
   1041 
   1042   case Builtin::BIfma:
   1043   case Builtin::BIfmaf:
   1044   case Builtin::BIfmal:
   1045   case Builtin::BI__builtin_fma:
   1046   case Builtin::BI__builtin_fmaf:
   1047   case Builtin::BI__builtin_fmal: {
   1048     // Rewrite fma to intrinsic.
   1049     Value *FirstArg = EmitScalarExpr(E->getArg(0));
   1050     llvm::Type *ArgType = FirstArg->getType();
   1051     Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
   1052     return RValue::get(Builder.CreateCall3(F, FirstArg,
   1053                                               EmitScalarExpr(E->getArg(1)),
   1054                                               EmitScalarExpr(E->getArg(2))));
   1055   }
   1056 
   1057   case Builtin::BI__builtin_signbit:
   1058   case Builtin::BI__builtin_signbitf:
   1059   case Builtin::BI__builtin_signbitl: {
   1060     LLVMContext &C = CGM.getLLVMContext();
   1061 
   1062     Value *Arg = EmitScalarExpr(E->getArg(0));
   1063     llvm::Type *ArgTy = Arg->getType();
   1064     if (ArgTy->isPPC_FP128Ty())
   1065       break; // FIXME: I'm not sure what the right implementation is here.
   1066     int ArgWidth = ArgTy->getPrimitiveSizeInBits();
   1067     llvm::Type *ArgIntTy = llvm::IntegerType::get(C, ArgWidth);
   1068     Value *BCArg = Builder.CreateBitCast(Arg, ArgIntTy);
   1069     Value *ZeroCmp = llvm::Constant::getNullValue(ArgIntTy);
   1070     Value *Result = Builder.CreateICmpSLT(BCArg, ZeroCmp);
   1071     return RValue::get(Builder.CreateZExt(Result, ConvertType(E->getType())));
   1072   }
   1073   case Builtin::BI__builtin_annotation: {
   1074     llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
   1075     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
   1076                                       AnnVal->getType());
   1077 
   1078     // Get the annotation string, go through casts. Sema requires this to be a
   1079     // non-wide string literal, potentially casted, so the cast<> is safe.
   1080     const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
   1081     llvm::StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
   1082     return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
   1083   }
   1084   }
   1085 
   1086   // If this is an alias for a lib function (e.g. __builtin_sin), emit
   1087   // the call using the normal call path, but using the unmangled
   1088   // version of the function name.
   1089   if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
   1090     return emitLibraryCall(*this, FD, E,
   1091                            CGM.getBuiltinLibFunction(FD, BuiltinID));
   1092 
   1093   // If this is a predefined lib function (e.g. malloc), emit the call
   1094   // using exactly the normal call path.
   1095   if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
   1096     return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee()));
   1097 
   1098   // See if we have a target specific intrinsic.
   1099   const char *Name = getContext().BuiltinInfo.GetName(BuiltinID);
   1100   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
   1101   if (const char *Prefix =
   1102       llvm::Triple::getArchTypePrefix(Target.getTriple().getArch()))
   1103     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
   1104 
   1105   if (IntrinsicID != Intrinsic::not_intrinsic) {
   1106     SmallVector<Value*, 16> Args;
   1107 
   1108     // Find out if any arguments are required to be integer constant
   1109     // expressions.
   1110     unsigned ICEArguments = 0;
   1111     ASTContext::GetBuiltinTypeError Error;
   1112     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
   1113     assert(Error == ASTContext::GE_None && "Should not codegen an error");
   1114 
   1115     Function *F = CGM.getIntrinsic(IntrinsicID);
   1116     llvm::FunctionType *FTy = F->getFunctionType();
   1117 
   1118     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
   1119       Value *ArgValue;
   1120       // If this is a normal argument, just emit it as a scalar.
   1121       if ((ICEArguments & (1 << i)) == 0) {
   1122         ArgValue = EmitScalarExpr(E->getArg(i));
   1123       } else {
   1124         // If this is required to be a constant, constant fold it so that we
   1125         // know that the generated intrinsic gets a ConstantInt.
   1126         llvm::APSInt Result;
   1127         bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
   1128         assert(IsConst && "Constant arg isn't actually constant?");
   1129         (void)IsConst;
   1130         ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
   1131       }
   1132 
   1133       // If the intrinsic arg type is different from the builtin arg type
   1134       // we need to do a bit cast.
   1135       llvm::Type *PTy = FTy->getParamType(i);
   1136       if (PTy != ArgValue->getType()) {
   1137         assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
   1138                "Must be able to losslessly bit cast to param");
   1139         ArgValue = Builder.CreateBitCast(ArgValue, PTy);
   1140       }
   1141 
   1142       Args.push_back(ArgValue);
   1143     }
   1144 
   1145     Value *V = Builder.CreateCall(F, Args);
   1146     QualType BuiltinRetType = E->getType();
   1147 
   1148     llvm::Type *RetTy = llvm::Type::getVoidTy(getLLVMContext());
   1149     if (!BuiltinRetType->isVoidType()) RetTy = ConvertType(BuiltinRetType);
   1150 
   1151     if (RetTy != V->getType()) {
   1152       assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
   1153              "Must be able to losslessly bit cast result type");
   1154       V = Builder.CreateBitCast(V, RetTy);
   1155     }
   1156 
   1157     return RValue::get(V);
   1158   }
   1159 
   1160   // See if we have a target specific builtin that needs to be lowered.
   1161   if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
   1162     return RValue::get(V);
   1163 
   1164   ErrorUnsupported(E, "builtin function");
   1165 
   1166   // Unknown builtin, for now just dump it out and return undef.
   1167   if (hasAggregateLLVMType(E->getType()))
   1168     return RValue::getAggregate(CreateMemTemp(E->getType()));
   1169   return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
   1170 }
   1171 
   1172 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
   1173                                               const CallExpr *E) {
   1174   switch (Target.getTriple().getArch()) {
   1175   case llvm::Triple::arm:
   1176   case llvm::Triple::thumb:
   1177     return EmitARMBuiltinExpr(BuiltinID, E);
   1178   case llvm::Triple::x86:
   1179   case llvm::Triple::x86_64:
   1180     return EmitX86BuiltinExpr(BuiltinID, E);
   1181   case llvm::Triple::ppc:
   1182   case llvm::Triple::ppc64:
   1183     return EmitPPCBuiltinExpr(BuiltinID, E);
   1184   default:
   1185     return 0;
   1186   }
   1187 }
   1188 
   1189 static llvm::VectorType *GetNeonType(LLVMContext &C, unsigned type, bool q) {
   1190   switch (type) {
   1191     default: break;
   1192     case 0:
   1193     case 5: return llvm::VectorType::get(llvm::Type::getInt8Ty(C), 8 << (int)q);
   1194     case 6:
   1195     case 7:
   1196     case 1: return llvm::VectorType::get(llvm::Type::getInt16Ty(C),4 << (int)q);
   1197     case 2: return llvm::VectorType::get(llvm::Type::getInt32Ty(C),2 << (int)q);
   1198     case 3: return llvm::VectorType::get(llvm::Type::getInt64Ty(C),1 << (int)q);
   1199     case 4: return llvm::VectorType::get(llvm::Type::getFloatTy(C),2 << (int)q);
   1200   };
   1201   return 0;
   1202 }
   1203 
   1204 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
   1205   unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
   1206   SmallVector<Constant*, 16> Indices(nElts, C);
   1207   Value* SV = llvm::ConstantVector::get(Indices);
   1208   return Builder.CreateShuffleVector(V, V, SV, "lane");
   1209 }
   1210 
   1211 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
   1212                                      const char *name,
   1213                                      unsigned shift, bool rightshift) {
   1214   unsigned j = 0;
   1215   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
   1216        ai != ae; ++ai, ++j)
   1217     if (shift > 0 && shift == j)
   1218       Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
   1219     else
   1220       Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
   1221 
   1222   return Builder.CreateCall(F, Ops, name);
   1223 }
   1224 
   1225 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
   1226                                             bool neg) {
   1227   ConstantInt *CI = cast<ConstantInt>(V);
   1228   int SV = CI->getSExtValue();
   1229 
   1230   llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
   1231   llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV);
   1232   SmallVector<llvm::Constant*, 16> CV(VTy->getNumElements(), C);
   1233   return llvm::ConstantVector::get(CV);
   1234 }
   1235 
   1236 /// GetPointeeAlignment - Given an expression with a pointer type, find the
   1237 /// alignment of the type referenced by the pointer.  Skip over implicit
   1238 /// casts.
   1239 static Value *GetPointeeAlignment(CodeGenFunction &CGF, const Expr *Addr) {
   1240   unsigned Align = 1;
   1241   // Check if the type is a pointer.  The implicit cast operand might not be.
   1242   while (Addr->getType()->isPointerType()) {
   1243     QualType PtTy = Addr->getType()->getPointeeType();
   1244     unsigned NewA = CGF.getContext().getTypeAlignInChars(PtTy).getQuantity();
   1245     if (NewA > Align)
   1246       Align = NewA;
   1247 
   1248     // If the address is an implicit cast, repeat with the cast operand.
   1249     if (const ImplicitCastExpr *CastAddr = dyn_cast<ImplicitCastExpr>(Addr)) {
   1250       Addr = CastAddr->getSubExpr();
   1251       continue;
   1252     }
   1253     break;
   1254   }
   1255   return llvm::ConstantInt::get(CGF.Int32Ty, Align);
   1256 }
   1257 
   1258 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
   1259                                            const CallExpr *E) {
   1260   if (BuiltinID == ARM::BI__clear_cache) {
   1261     const FunctionDecl *FD = E->getDirectCallee();
   1262     // Oddly people write this call without args on occasion and gcc accepts
   1263     // it - it's also marked as varargs in the description file.
   1264     SmallVector<Value*, 2> Ops;
   1265     for (unsigned i = 0; i < E->getNumArgs(); i++)
   1266       Ops.push_back(EmitScalarExpr(E->getArg(i)));
   1267     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
   1268     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
   1269     StringRef Name = FD->getName();
   1270     return Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
   1271   }
   1272 
   1273   if (BuiltinID == ARM::BI__builtin_arm_ldrexd) {
   1274     Function *F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
   1275 
   1276     Value *LdPtr = EmitScalarExpr(E->getArg(0));
   1277     Value *Val = Builder.CreateCall(F, LdPtr, "ldrexd");
   1278 
   1279     Value *Val0 = Builder.CreateExtractValue(Val, 1);
   1280     Value *Val1 = Builder.CreateExtractValue(Val, 0);
   1281     Val0 = Builder.CreateZExt(Val0, Int64Ty);
   1282     Val1 = Builder.CreateZExt(Val1, Int64Ty);
   1283 
   1284     Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
   1285     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
   1286     return Builder.CreateOr(Val, Val1);
   1287   }
   1288 
   1289   if (BuiltinID == ARM::BI__builtin_arm_strexd) {
   1290     Function *F = CGM.getIntrinsic(Intrinsic::arm_strexd);
   1291     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, NULL);
   1292 
   1293     Value *One = llvm::ConstantInt::get(Int32Ty, 1);
   1294     Value *Tmp = Builder.CreateAlloca(Int64Ty, One);
   1295     Value *Val = EmitScalarExpr(E->getArg(0));
   1296     Builder.CreateStore(Val, Tmp);
   1297 
   1298     Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
   1299     Val = Builder.CreateLoad(LdPtr);
   1300 
   1301     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
   1302     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
   1303     Value *StPtr = EmitScalarExpr(E->getArg(1));
   1304     return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "strexd");
   1305   }
   1306 
   1307   SmallVector<Value*, 4> Ops;
   1308   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++)
   1309     Ops.push_back(EmitScalarExpr(E->getArg(i)));
   1310 
   1311   // vget_lane and vset_lane are not overloaded and do not have an extra
   1312   // argument that specifies the vector type.
   1313   switch (BuiltinID) {
   1314   default: break;
   1315   case ARM::BI__builtin_neon_vget_lane_i8:
   1316   case ARM::BI__builtin_neon_vget_lane_i16:
   1317   case ARM::BI__builtin_neon_vget_lane_i32:
   1318   case ARM::BI__builtin_neon_vget_lane_i64:
   1319   case ARM::BI__builtin_neon_vget_lane_f32:
   1320   case ARM::BI__builtin_neon_vgetq_lane_i8:
   1321   case ARM::BI__builtin_neon_vgetq_lane_i16:
   1322   case ARM::BI__builtin_neon_vgetq_lane_i32:
   1323   case ARM::BI__builtin_neon_vgetq_lane_i64:
   1324   case ARM::BI__builtin_neon_vgetq_lane_f32:
   1325     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
   1326                                         "vget_lane");
   1327   case ARM::BI__builtin_neon_vset_lane_i8:
   1328   case ARM::BI__builtin_neon_vset_lane_i16:
   1329   case ARM::BI__builtin_neon_vset_lane_i32:
   1330   case ARM::BI__builtin_neon_vset_lane_i64:
   1331   case ARM::BI__builtin_neon_vset_lane_f32:
   1332   case ARM::BI__builtin_neon_vsetq_lane_i8:
   1333   case ARM::BI__builtin_neon_vsetq_lane_i16:
   1334   case ARM::BI__builtin_neon_vsetq_lane_i32:
   1335   case ARM::BI__builtin_neon_vsetq_lane_i64:
   1336   case ARM::BI__builtin_neon_vsetq_lane_f32:
   1337     Ops.push_back(EmitScalarExpr(E->getArg(2)));
   1338     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
   1339   }
   1340 
   1341   // Get the last argument, which specifies the vector type.
   1342   llvm::APSInt Result;
   1343   const Expr *Arg = E->getArg(E->getNumArgs()-1);
   1344   if (!Arg->isIntegerConstantExpr(Result, getContext()))
   1345     return 0;
   1346 
   1347   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
   1348       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
   1349     // Determine the overloaded type of this builtin.
   1350     llvm::Type *Ty;
   1351     if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
   1352       Ty = llvm::Type::getFloatTy(getLLVMContext());
   1353     else
   1354       Ty = llvm::Type::getDoubleTy(getLLVMContext());
   1355 
   1356     // Determine whether this is an unsigned conversion or not.
   1357     bool usgn = Result.getZExtValue() == 1;
   1358     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
   1359 
   1360     // Call the appropriate intrinsic.
   1361     Function *F = CGM.getIntrinsic(Int, Ty);
   1362     return Builder.CreateCall(F, Ops, "vcvtr");
   1363   }
   1364 
   1365   // Determine the type of this overloaded NEON intrinsic.
   1366   unsigned type = Result.getZExtValue();
   1367   bool usgn = type & 0x08;
   1368   bool quad = type & 0x10;
   1369   bool poly = (type & 0x7) == 5 || (type & 0x7) == 6;
   1370   (void)poly;  // Only used in assert()s.
   1371   bool rightShift = false;
   1372 
   1373   llvm::VectorType *VTy = GetNeonType(getLLVMContext(), type & 0x7, quad);
   1374   llvm::Type *Ty = VTy;
   1375   if (!Ty)
   1376     return 0;
   1377 
   1378   unsigned Int;
   1379   switch (BuiltinID) {
   1380   default: return 0;
   1381   case ARM::BI__builtin_neon_vabd_v:
   1382   case ARM::BI__builtin_neon_vabdq_v:
   1383     Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds;
   1384     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
   1385   case ARM::BI__builtin_neon_vabs_v:
   1386   case ARM::BI__builtin_neon_vabsq_v:
   1387     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vabs, Ty),
   1388                         Ops, "vabs");
   1389   case ARM::BI__builtin_neon_vaddhn_v:
   1390     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vaddhn, Ty),
   1391                         Ops, "vaddhn");
   1392   case ARM::BI__builtin_neon_vcale_v:
   1393     std::swap(Ops[0], Ops[1]);
   1394   case ARM::BI__builtin_neon_vcage_v: {
   1395     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacged);
   1396     return EmitNeonCall(F, Ops, "vcage");
   1397   }
   1398   case ARM::BI__builtin_neon_vcaleq_v:
   1399     std::swap(Ops[0], Ops[1]);
   1400   case ARM::BI__builtin_neon_vcageq_v: {
   1401     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq);
   1402     return EmitNeonCall(F, Ops, "vcage");
   1403   }
   1404   case ARM::BI__builtin_neon_vcalt_v:
   1405     std::swap(Ops[0], Ops[1]);
   1406   case ARM::BI__builtin_neon_vcagt_v: {
   1407     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd);
   1408     return EmitNeonCall(F, Ops, "vcagt");
   1409   }
   1410   case ARM::BI__builtin_neon_vcaltq_v:
   1411     std::swap(Ops[0], Ops[1]);
   1412   case ARM::BI__builtin_neon_vcagtq_v: {
   1413     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq);
   1414     return EmitNeonCall(F, Ops, "vcagt");
   1415   }
   1416   case ARM::BI__builtin_neon_vcls_v:
   1417   case ARM::BI__builtin_neon_vclsq_v: {
   1418     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, Ty);
   1419     return EmitNeonCall(F, Ops, "vcls");
   1420   }
   1421   case ARM::BI__builtin_neon_vclz_v:
   1422   case ARM::BI__builtin_neon_vclzq_v: {
   1423     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vclz, Ty);
   1424     return EmitNeonCall(F, Ops, "vclz");
   1425   }
   1426   case ARM::BI__builtin_neon_vcnt_v:
   1427   case ARM::BI__builtin_neon_vcntq_v: {
   1428     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcnt, Ty);
   1429     return EmitNeonCall(F, Ops, "vcnt");
   1430   }
   1431   case ARM::BI__builtin_neon_vcvt_f16_v: {
   1432     assert((type & 0x7) == 7 && !quad && "unexpected vcvt_f16_v builtin");
   1433     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvtfp2hf);
   1434     return EmitNeonCall(F, Ops, "vcvt");
   1435   }
   1436   case ARM::BI__builtin_neon_vcvt_f32_f16: {
   1437     assert((type & 0x7) == 7 && !quad && "unexpected vcvt_f32_f16 builtin");
   1438     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcvthf2fp);
   1439     return EmitNeonCall(F, Ops, "vcvt");
   1440   }
   1441   case ARM::BI__builtin_neon_vcvt_f32_v:
   1442   case ARM::BI__builtin_neon_vcvtq_f32_v: {
   1443     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1444     Ty = GetNeonType(getLLVMContext(), 4, quad);
   1445     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
   1446                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
   1447   }
   1448   case ARM::BI__builtin_neon_vcvt_s32_v:
   1449   case ARM::BI__builtin_neon_vcvt_u32_v:
   1450   case ARM::BI__builtin_neon_vcvtq_s32_v:
   1451   case ARM::BI__builtin_neon_vcvtq_u32_v: {
   1452     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(getLLVMContext(), 4, quad));
   1453     return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
   1454                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
   1455   }
   1456   case ARM::BI__builtin_neon_vcvt_n_f32_v:
   1457   case ARM::BI__builtin_neon_vcvtq_n_f32_v: {
   1458     llvm::Type *Tys[2] = { GetNeonType(getLLVMContext(), 4, quad), Ty };
   1459     Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp : Intrinsic::arm_neon_vcvtfxs2fp;
   1460     Function *F = CGM.getIntrinsic(Int, Tys);
   1461     return EmitNeonCall(F, Ops, "vcvt_n");
   1462   }
   1463   case ARM::BI__builtin_neon_vcvt_n_s32_v:
   1464   case ARM::BI__builtin_neon_vcvt_n_u32_v:
   1465   case ARM::BI__builtin_neon_vcvtq_n_s32_v:
   1466   case ARM::BI__builtin_neon_vcvtq_n_u32_v: {
   1467     llvm::Type *Tys[2] = { Ty, GetNeonType(getLLVMContext(), 4, quad) };
   1468     Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu : Intrinsic::arm_neon_vcvtfp2fxs;
   1469     Function *F = CGM.getIntrinsic(Int, Tys);
   1470     return EmitNeonCall(F, Ops, "vcvt_n");
   1471   }
   1472   case ARM::BI__builtin_neon_vext_v:
   1473   case ARM::BI__builtin_neon_vextq_v: {
   1474     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
   1475     SmallVector<Constant*, 16> Indices;
   1476     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
   1477       Indices.push_back(ConstantInt::get(Int32Ty, i+CV));
   1478 
   1479     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1480     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
   1481     Value *SV = llvm::ConstantVector::get(Indices);
   1482     return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext");
   1483   }
   1484   case ARM::BI__builtin_neon_vhadd_v:
   1485   case ARM::BI__builtin_neon_vhaddq_v:
   1486     Int = usgn ? Intrinsic::arm_neon_vhaddu : Intrinsic::arm_neon_vhadds;
   1487     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhadd");
   1488   case ARM::BI__builtin_neon_vhsub_v:
   1489   case ARM::BI__builtin_neon_vhsubq_v:
   1490     Int = usgn ? Intrinsic::arm_neon_vhsubu : Intrinsic::arm_neon_vhsubs;
   1491     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vhsub");
   1492   case ARM::BI__builtin_neon_vld1_v:
   1493   case ARM::BI__builtin_neon_vld1q_v:
   1494     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
   1495     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty),
   1496                         Ops, "vld1");
   1497   case ARM::BI__builtin_neon_vld1_lane_v:
   1498   case ARM::BI__builtin_neon_vld1q_lane_v:
   1499     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
   1500     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
   1501     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1502     Ops[0] = Builder.CreateLoad(Ops[0]);
   1503     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
   1504   case ARM::BI__builtin_neon_vld1_dup_v:
   1505   case ARM::BI__builtin_neon_vld1q_dup_v: {
   1506     Value *V = UndefValue::get(Ty);
   1507     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
   1508     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1509     Ops[0] = Builder.CreateLoad(Ops[0]);
   1510     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
   1511     Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
   1512     return EmitNeonSplat(Ops[0], CI);
   1513   }
   1514   case ARM::BI__builtin_neon_vld2_v:
   1515   case ARM::BI__builtin_neon_vld2q_v: {
   1516     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2, Ty);
   1517     Value *Align = GetPointeeAlignment(*this, E->getArg(1));
   1518     Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld2");
   1519     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
   1520     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1521     return Builder.CreateStore(Ops[1], Ops[0]);
   1522   }
   1523   case ARM::BI__builtin_neon_vld3_v:
   1524   case ARM::BI__builtin_neon_vld3q_v: {
   1525     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3, Ty);
   1526     Value *Align = GetPointeeAlignment(*this, E->getArg(1));
   1527     Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld3");
   1528     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
   1529     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1530     return Builder.CreateStore(Ops[1], Ops[0]);
   1531   }
   1532   case ARM::BI__builtin_neon_vld4_v:
   1533   case ARM::BI__builtin_neon_vld4q_v: {
   1534     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4, Ty);
   1535     Value *Align = GetPointeeAlignment(*this, E->getArg(1));
   1536     Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld4");
   1537     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
   1538     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1539     return Builder.CreateStore(Ops[1], Ops[0]);
   1540   }
   1541   case ARM::BI__builtin_neon_vld2_lane_v:
   1542   case ARM::BI__builtin_neon_vld2q_lane_v: {
   1543     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2lane, Ty);
   1544     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
   1545     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
   1546     Ops.push_back(GetPointeeAlignment(*this, E->getArg(1)));
   1547     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
   1548     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
   1549     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1550     return Builder.CreateStore(Ops[1], Ops[0]);
   1551   }
   1552   case ARM::BI__builtin_neon_vld3_lane_v:
   1553   case ARM::BI__builtin_neon_vld3q_lane_v: {
   1554     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3lane, Ty);
   1555     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
   1556     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
   1557     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
   1558     Ops.push_back(GetPointeeAlignment(*this, E->getArg(1)));
   1559     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
   1560     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
   1561     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1562     return Builder.CreateStore(Ops[1], Ops[0]);
   1563   }
   1564   case ARM::BI__builtin_neon_vld4_lane_v:
   1565   case ARM::BI__builtin_neon_vld4q_lane_v: {
   1566     Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4lane, Ty);
   1567     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
   1568     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
   1569     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
   1570     Ops[5] = Builder.CreateBitCast(Ops[5], Ty);
   1571     Ops.push_back(GetPointeeAlignment(*this, E->getArg(1)));
   1572     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
   1573     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
   1574     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1575     return Builder.CreateStore(Ops[1], Ops[0]);
   1576   }
   1577   case ARM::BI__builtin_neon_vld2_dup_v:
   1578   case ARM::BI__builtin_neon_vld3_dup_v:
   1579   case ARM::BI__builtin_neon_vld4_dup_v: {
   1580     // Handle 64-bit elements as a special-case.  There is no "dup" needed.
   1581     if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
   1582       switch (BuiltinID) {
   1583       case ARM::BI__builtin_neon_vld2_dup_v:
   1584         Int = Intrinsic::arm_neon_vld2;
   1585         break;
   1586       case ARM::BI__builtin_neon_vld3_dup_v:
   1587         Int = Intrinsic::arm_neon_vld2;
   1588         break;
   1589       case ARM::BI__builtin_neon_vld4_dup_v:
   1590         Int = Intrinsic::arm_neon_vld2;
   1591         break;
   1592       default: llvm_unreachable("unknown vld_dup intrinsic?");
   1593       }
   1594       Function *F = CGM.getIntrinsic(Int, Ty);
   1595       Value *Align = GetPointeeAlignment(*this, E->getArg(1));
   1596       Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup");
   1597       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
   1598       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1599       return Builder.CreateStore(Ops[1], Ops[0]);
   1600     }
   1601     switch (BuiltinID) {
   1602     case ARM::BI__builtin_neon_vld2_dup_v:
   1603       Int = Intrinsic::arm_neon_vld2lane;
   1604       break;
   1605     case ARM::BI__builtin_neon_vld3_dup_v:
   1606       Int = Intrinsic::arm_neon_vld2lane;
   1607       break;
   1608     case ARM::BI__builtin_neon_vld4_dup_v:
   1609       Int = Intrinsic::arm_neon_vld2lane;
   1610       break;
   1611     default: llvm_unreachable("unknown vld_dup intrinsic?");
   1612     }
   1613     Function *F = CGM.getIntrinsic(Int, Ty);
   1614     llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
   1615 
   1616     SmallVector<Value*, 6> Args;
   1617     Args.push_back(Ops[1]);
   1618     Args.append(STy->getNumElements(), UndefValue::get(Ty));
   1619 
   1620     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
   1621     Args.push_back(CI);
   1622     Args.push_back(GetPointeeAlignment(*this, E->getArg(1)));
   1623 
   1624     Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
   1625     // splat lane 0 to all elts in each vector of the result.
   1626     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
   1627       Value *Val = Builder.CreateExtractValue(Ops[1], i);
   1628       Value *Elt = Builder.CreateBitCast(Val, Ty);
   1629       Elt = EmitNeonSplat(Elt, CI);
   1630       Elt = Builder.CreateBitCast(Elt, Val->getType());
   1631       Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
   1632     }
   1633     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
   1634     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1635     return Builder.CreateStore(Ops[1], Ops[0]);
   1636   }
   1637   case ARM::BI__builtin_neon_vmax_v:
   1638   case ARM::BI__builtin_neon_vmaxq_v:
   1639     Int = usgn ? Intrinsic::arm_neon_vmaxu : Intrinsic::arm_neon_vmaxs;
   1640     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
   1641   case ARM::BI__builtin_neon_vmin_v:
   1642   case ARM::BI__builtin_neon_vminq_v:
   1643     Int = usgn ? Intrinsic::arm_neon_vminu : Intrinsic::arm_neon_vmins;
   1644     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
   1645   case ARM::BI__builtin_neon_vmovl_v: {
   1646     llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
   1647     Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
   1648     if (usgn)
   1649       return Builder.CreateZExt(Ops[0], Ty, "vmovl");
   1650     return Builder.CreateSExt(Ops[0], Ty, "vmovl");
   1651   }
   1652   case ARM::BI__builtin_neon_vmovn_v: {
   1653     llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
   1654     Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
   1655     return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
   1656   }
   1657   case ARM::BI__builtin_neon_vmul_v:
   1658   case ARM::BI__builtin_neon_vmulq_v:
   1659     assert(poly && "vmul builtin only supported for polynomial types");
   1660     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vmulp, Ty),
   1661                         Ops, "vmul");
   1662   case ARM::BI__builtin_neon_vmull_v:
   1663     Int = usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
   1664     Int = poly ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
   1665     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
   1666   case ARM::BI__builtin_neon_vpadal_v:
   1667   case ARM::BI__builtin_neon_vpadalq_v: {
   1668     Int = usgn ? Intrinsic::arm_neon_vpadalu : Intrinsic::arm_neon_vpadals;
   1669     // The source operand type has twice as many elements of half the size.
   1670     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
   1671     llvm::Type *EltTy =
   1672       llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
   1673     llvm::Type *NarrowTy =
   1674       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
   1675     llvm::Type *Tys[2] = { Ty, NarrowTy };
   1676     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpadal");
   1677   }
   1678   case ARM::BI__builtin_neon_vpadd_v:
   1679     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vpadd, Ty),
   1680                         Ops, "vpadd");
   1681   case ARM::BI__builtin_neon_vpaddl_v:
   1682   case ARM::BI__builtin_neon_vpaddlq_v: {
   1683     Int = usgn ? Intrinsic::arm_neon_vpaddlu : Intrinsic::arm_neon_vpaddls;
   1684     // The source operand type has twice as many elements of half the size.
   1685     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
   1686     llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
   1687     llvm::Type *NarrowTy =
   1688       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
   1689     llvm::Type *Tys[2] = { Ty, NarrowTy };
   1690     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
   1691   }
   1692   case ARM::BI__builtin_neon_vpmax_v:
   1693     Int = usgn ? Intrinsic::arm_neon_vpmaxu : Intrinsic::arm_neon_vpmaxs;
   1694     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
   1695   case ARM::BI__builtin_neon_vpmin_v:
   1696     Int = usgn ? Intrinsic::arm_neon_vpminu : Intrinsic::arm_neon_vpmins;
   1697     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
   1698   case ARM::BI__builtin_neon_vqabs_v:
   1699   case ARM::BI__builtin_neon_vqabsq_v:
   1700     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqabs, Ty),
   1701                         Ops, "vqabs");
   1702   case ARM::BI__builtin_neon_vqadd_v:
   1703   case ARM::BI__builtin_neon_vqaddq_v:
   1704     Int = usgn ? Intrinsic::arm_neon_vqaddu : Intrinsic::arm_neon_vqadds;
   1705     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqadd");
   1706   case ARM::BI__builtin_neon_vqdmlal_v:
   1707     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlal, Ty),
   1708                         Ops, "vqdmlal");
   1709   case ARM::BI__builtin_neon_vqdmlsl_v:
   1710     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmlsl, Ty),
   1711                         Ops, "vqdmlsl");
   1712   case ARM::BI__builtin_neon_vqdmulh_v:
   1713   case ARM::BI__builtin_neon_vqdmulhq_v:
   1714     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmulh, Ty),
   1715                         Ops, "vqdmulh");
   1716   case ARM::BI__builtin_neon_vqdmull_v:
   1717     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, Ty),
   1718                         Ops, "vqdmull");
   1719   case ARM::BI__builtin_neon_vqmovn_v:
   1720     Int = usgn ? Intrinsic::arm_neon_vqmovnu : Intrinsic::arm_neon_vqmovns;
   1721     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqmovn");
   1722   case ARM::BI__builtin_neon_vqmovun_v:
   1723     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqmovnsu, Ty),
   1724                         Ops, "vqdmull");
   1725   case ARM::BI__builtin_neon_vqneg_v:
   1726   case ARM::BI__builtin_neon_vqnegq_v:
   1727     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqneg, Ty),
   1728                         Ops, "vqneg");
   1729   case ARM::BI__builtin_neon_vqrdmulh_v:
   1730   case ARM::BI__builtin_neon_vqrdmulhq_v:
   1731     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrdmulh, Ty),
   1732                         Ops, "vqrdmulh");
   1733   case ARM::BI__builtin_neon_vqrshl_v:
   1734   case ARM::BI__builtin_neon_vqrshlq_v:
   1735     Int = usgn ? Intrinsic::arm_neon_vqrshiftu : Intrinsic::arm_neon_vqrshifts;
   1736     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshl");
   1737   case ARM::BI__builtin_neon_vqrshrn_n_v:
   1738     Int = usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
   1739     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
   1740                         1, true);
   1741   case ARM::BI__builtin_neon_vqrshrun_n_v:
   1742     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
   1743                         Ops, "vqrshrun_n", 1, true);
   1744   case ARM::BI__builtin_neon_vqshl_v:
   1745   case ARM::BI__builtin_neon_vqshlq_v:
   1746     Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
   1747     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl");
   1748   case ARM::BI__builtin_neon_vqshl_n_v:
   1749   case ARM::BI__builtin_neon_vqshlq_n_v:
   1750     Int = usgn ? Intrinsic::arm_neon_vqshiftu : Intrinsic::arm_neon_vqshifts;
   1751     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
   1752                         1, false);
   1753   case ARM::BI__builtin_neon_vqshlu_n_v:
   1754   case ARM::BI__builtin_neon_vqshluq_n_v:
   1755     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftsu, Ty),
   1756                         Ops, "vqshlu", 1, false);
   1757   case ARM::BI__builtin_neon_vqshrn_n_v:
   1758     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
   1759     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
   1760                         1, true);
   1761   case ARM::BI__builtin_neon_vqshrun_n_v:
   1762     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
   1763                         Ops, "vqshrun_n", 1, true);
   1764   case ARM::BI__builtin_neon_vqsub_v:
   1765   case ARM::BI__builtin_neon_vqsubq_v:
   1766     Int = usgn ? Intrinsic::arm_neon_vqsubu : Intrinsic::arm_neon_vqsubs;
   1767     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqsub");
   1768   case ARM::BI__builtin_neon_vraddhn_v:
   1769     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vraddhn, Ty),
   1770                         Ops, "vraddhn");
   1771   case ARM::BI__builtin_neon_vrecpe_v:
   1772   case ARM::BI__builtin_neon_vrecpeq_v:
   1773     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
   1774                         Ops, "vrecpe");
   1775   case ARM::BI__builtin_neon_vrecps_v:
   1776   case ARM::BI__builtin_neon_vrecpsq_v:
   1777     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecps, Ty),
   1778                         Ops, "vrecps");
   1779   case ARM::BI__builtin_neon_vrhadd_v:
   1780   case ARM::BI__builtin_neon_vrhaddq_v:
   1781     Int = usgn ? Intrinsic::arm_neon_vrhaddu : Intrinsic::arm_neon_vrhadds;
   1782     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrhadd");
   1783   case ARM::BI__builtin_neon_vrshl_v:
   1784   case ARM::BI__builtin_neon_vrshlq_v:
   1785     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
   1786     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshl");
   1787   case ARM::BI__builtin_neon_vrshrn_n_v:
   1788     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
   1789                         Ops, "vrshrn_n", 1, true);
   1790   case ARM::BI__builtin_neon_vrshr_n_v:
   1791   case ARM::BI__builtin_neon_vrshrq_n_v:
   1792     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
   1793     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 1, true);
   1794   case ARM::BI__builtin_neon_vrsqrte_v:
   1795   case ARM::BI__builtin_neon_vrsqrteq_v:
   1796     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrte, Ty),
   1797                         Ops, "vrsqrte");
   1798   case ARM::BI__builtin_neon_vrsqrts_v:
   1799   case ARM::BI__builtin_neon_vrsqrtsq_v:
   1800     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsqrts, Ty),
   1801                         Ops, "vrsqrts");
   1802   case ARM::BI__builtin_neon_vrsra_n_v:
   1803   case ARM::BI__builtin_neon_vrsraq_n_v:
   1804     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1805     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
   1806     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
   1807     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
   1808     Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]);
   1809     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
   1810   case ARM::BI__builtin_neon_vrsubhn_v:
   1811     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrsubhn, Ty),
   1812                         Ops, "vrsubhn");
   1813   case ARM::BI__builtin_neon_vshl_v:
   1814   case ARM::BI__builtin_neon_vshlq_v:
   1815     Int = usgn ? Intrinsic::arm_neon_vshiftu : Intrinsic::arm_neon_vshifts;
   1816     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshl");
   1817   case ARM::BI__builtin_neon_vshll_n_v:
   1818     Int = usgn ? Intrinsic::arm_neon_vshiftlu : Intrinsic::arm_neon_vshiftls;
   1819     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vshll", 1);
   1820   case ARM::BI__builtin_neon_vshl_n_v:
   1821   case ARM::BI__builtin_neon_vshlq_n_v:
   1822     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
   1823     return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], "vshl_n");
   1824   case ARM::BI__builtin_neon_vshrn_n_v:
   1825     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftn, Ty),
   1826                         Ops, "vshrn_n", 1, true);
   1827   case ARM::BI__builtin_neon_vshr_n_v:
   1828   case ARM::BI__builtin_neon_vshrq_n_v:
   1829     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1830     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
   1831     if (usgn)
   1832       return Builder.CreateLShr(Ops[0], Ops[1], "vshr_n");
   1833     else
   1834       return Builder.CreateAShr(Ops[0], Ops[1], "vshr_n");
   1835   case ARM::BI__builtin_neon_vsri_n_v:
   1836   case ARM::BI__builtin_neon_vsriq_n_v:
   1837     rightShift = true;
   1838   case ARM::BI__builtin_neon_vsli_n_v:
   1839   case ARM::BI__builtin_neon_vsliq_n_v:
   1840     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
   1841     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
   1842                         Ops, "vsli_n");
   1843   case ARM::BI__builtin_neon_vsra_n_v:
   1844   case ARM::BI__builtin_neon_vsraq_n_v:
   1845     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1846     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
   1847     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, false);
   1848     if (usgn)
   1849       Ops[1] = Builder.CreateLShr(Ops[1], Ops[2], "vsra_n");
   1850     else
   1851       Ops[1] = Builder.CreateAShr(Ops[1], Ops[2], "vsra_n");
   1852     return Builder.CreateAdd(Ops[0], Ops[1]);
   1853   case ARM::BI__builtin_neon_vst1_v:
   1854   case ARM::BI__builtin_neon_vst1q_v:
   1855     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
   1856     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, Ty),
   1857                         Ops, "");
   1858   case ARM::BI__builtin_neon_vst1_lane_v:
   1859   case ARM::BI__builtin_neon_vst1q_lane_v:
   1860     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
   1861     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
   1862     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
   1863     return Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty));
   1864   case ARM::BI__builtin_neon_vst2_v:
   1865   case ARM::BI__builtin_neon_vst2q_v:
   1866     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
   1867     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2, Ty),
   1868                         Ops, "");
   1869   case ARM::BI__builtin_neon_vst2_lane_v:
   1870   case ARM::BI__builtin_neon_vst2q_lane_v:
   1871     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
   1872     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst2lane, Ty),
   1873                         Ops, "");
   1874   case ARM::BI__builtin_neon_vst3_v:
   1875   case ARM::BI__builtin_neon_vst3q_v:
   1876     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
   1877     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3, Ty),
   1878                         Ops, "");
   1879   case ARM::BI__builtin_neon_vst3_lane_v:
   1880   case ARM::BI__builtin_neon_vst3q_lane_v:
   1881     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
   1882     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst3lane, Ty),
   1883                         Ops, "");
   1884   case ARM::BI__builtin_neon_vst4_v:
   1885   case ARM::BI__builtin_neon_vst4q_v:
   1886     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
   1887     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4, Ty),
   1888                         Ops, "");
   1889   case ARM::BI__builtin_neon_vst4_lane_v:
   1890   case ARM::BI__builtin_neon_vst4q_lane_v:
   1891     Ops.push_back(GetPointeeAlignment(*this, E->getArg(0)));
   1892     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4lane, Ty),
   1893                         Ops, "");
   1894   case ARM::BI__builtin_neon_vsubhn_v:
   1895     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vsubhn, Ty),
   1896                         Ops, "vsubhn");
   1897   case ARM::BI__builtin_neon_vtbl1_v:
   1898     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
   1899                         Ops, "vtbl1");
   1900   case ARM::BI__builtin_neon_vtbl2_v:
   1901     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
   1902                         Ops, "vtbl2");
   1903   case ARM::BI__builtin_neon_vtbl3_v:
   1904     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
   1905                         Ops, "vtbl3");
   1906   case ARM::BI__builtin_neon_vtbl4_v:
   1907     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
   1908                         Ops, "vtbl4");
   1909   case ARM::BI__builtin_neon_vtbx1_v:
   1910     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
   1911                         Ops, "vtbx1");
   1912   case ARM::BI__builtin_neon_vtbx2_v:
   1913     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
   1914                         Ops, "vtbx2");
   1915   case ARM::BI__builtin_neon_vtbx3_v:
   1916     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
   1917                         Ops, "vtbx3");
   1918   case ARM::BI__builtin_neon_vtbx4_v:
   1919     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
   1920                         Ops, "vtbx4");
   1921   case ARM::BI__builtin_neon_vtst_v:
   1922   case ARM::BI__builtin_neon_vtstq_v: {
   1923     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
   1924     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
   1925     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
   1926     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
   1927                                 ConstantAggregateZero::get(Ty));
   1928     return Builder.CreateSExt(Ops[0], Ty, "vtst");
   1929   }
   1930   case ARM::BI__builtin_neon_vtrn_v:
   1931   case ARM::BI__builtin_neon_vtrnq_v: {
   1932     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
   1933     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
   1934     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
   1935     Value *SV = 0;
   1936 
   1937     for (unsigned vi = 0; vi != 2; ++vi) {
   1938       SmallVector<Constant*, 16> Indices;
   1939       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
   1940         Indices.push_back(ConstantInt::get(Int32Ty, i+vi));
   1941         Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi));
   1942       }
   1943       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
   1944       SV = llvm::ConstantVector::get(Indices);
   1945       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
   1946       SV = Builder.CreateStore(SV, Addr);
   1947     }
   1948     return SV;
   1949   }
   1950   case ARM::BI__builtin_neon_vuzp_v:
   1951   case ARM::BI__builtin_neon_vuzpq_v: {
   1952     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
   1953     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
   1954     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
   1955     Value *SV = 0;
   1956 
   1957     for (unsigned vi = 0; vi != 2; ++vi) {
   1958       SmallVector<Constant*, 16> Indices;
   1959       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
   1960         Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
   1961 
   1962       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
   1963       SV = llvm::ConstantVector::get(Indices);
   1964       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
   1965       SV = Builder.CreateStore(SV, Addr);
   1966     }
   1967     return SV;
   1968   }
   1969   case ARM::BI__builtin_neon_vzip_v:
   1970   case ARM::BI__builtin_neon_vzipq_v: {
   1971     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
   1972     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
   1973     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
   1974     Value *SV = 0;
   1975 
   1976     for (unsigned vi = 0; vi != 2; ++vi) {
   1977       SmallVector<Constant*, 16> Indices;
   1978       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
   1979         Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
   1980         Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
   1981       }
   1982       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
   1983       SV = llvm::ConstantVector::get(Indices);
   1984       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
   1985       SV = Builder.CreateStore(SV, Addr);
   1986     }
   1987     return SV;
   1988   }
   1989   }
   1990 }
   1991 
   1992 llvm::Value *CodeGenFunction::
   1993 BuildVector(const SmallVectorImpl<llvm::Value*> &Ops) {
   1994   assert((Ops.size() & (Ops.size() - 1)) == 0 &&
   1995          "Not a power-of-two sized vector!");
   1996   bool AllConstants = true;
   1997   for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
   1998     AllConstants &= isa<Constant>(Ops[i]);
   1999 
   2000   // If this is a constant vector, create a ConstantVector.
   2001   if (AllConstants) {
   2002     std::vector<llvm::Constant*> CstOps;
   2003     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
   2004       CstOps.push_back(cast<Constant>(Ops[i]));
   2005     return llvm::ConstantVector::get(CstOps);
   2006   }
   2007 
   2008   // Otherwise, insertelement the values to build the vector.
   2009   Value *Result =
   2010     llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
   2011 
   2012   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
   2013     Result = Builder.CreateInsertElement(Result, Ops[i],
   2014                llvm::ConstantInt::get(llvm::Type::getInt32Ty(getLLVMContext()), i));
   2015 
   2016   return Result;
   2017 }
   2018 
   2019 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
   2020                                            const CallExpr *E) {
   2021   SmallVector<Value*, 4> Ops;
   2022 
   2023   // Find out if any arguments are required to be integer constant expressions.
   2024   unsigned ICEArguments = 0;
   2025   ASTContext::GetBuiltinTypeError Error;
   2026   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
   2027   assert(Error == ASTContext::GE_None && "Should not codegen an error");
   2028 
   2029   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
   2030     // If this is a normal argument, just emit it as a scalar.
   2031     if ((ICEArguments & (1 << i)) == 0) {
   2032       Ops.push_back(EmitScalarExpr(E->getArg(i)));
   2033       continue;
   2034     }
   2035 
   2036     // If this is required to be a constant, constant fold it so that we know
   2037     // that the generated intrinsic gets a ConstantInt.
   2038     llvm::APSInt Result;
   2039     bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
   2040     assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
   2041     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
   2042   }
   2043 
   2044   switch (BuiltinID) {
   2045   default: return 0;
   2046   case X86::BI__builtin_ia32_pslldi128:
   2047   case X86::BI__builtin_ia32_psllqi128:
   2048   case X86::BI__builtin_ia32_psllwi128:
   2049   case X86::BI__builtin_ia32_psradi128:
   2050   case X86::BI__builtin_ia32_psrawi128:
   2051   case X86::BI__builtin_ia32_psrldi128:
   2052   case X86::BI__builtin_ia32_psrlqi128:
   2053   case X86::BI__builtin_ia32_psrlwi128: {
   2054     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty, "zext");
   2055     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
   2056     llvm::Value *Zero = llvm::ConstantInt::get(Int32Ty, 0);
   2057     Ops[1] = Builder.CreateInsertElement(llvm::UndefValue::get(Ty),
   2058                                          Ops[1], Zero, "insert");
   2059     Ops[1] = Builder.CreateBitCast(Ops[1], Ops[0]->getType(), "bitcast");
   2060     const char *name = 0;
   2061     Intrinsic::ID ID = Intrinsic::not_intrinsic;
   2062 
   2063     switch (BuiltinID) {
   2064     default: llvm_unreachable("Unsupported shift intrinsic!");
   2065     case X86::BI__builtin_ia32_pslldi128:
   2066       name = "pslldi";
   2067       ID = Intrinsic::x86_sse2_psll_d;
   2068       break;
   2069     case X86::BI__builtin_ia32_psllqi128:
   2070       name = "psllqi";
   2071       ID = Intrinsic::x86_sse2_psll_q;
   2072       break;
   2073     case X86::BI__builtin_ia32_psllwi128:
   2074       name = "psllwi";
   2075       ID = Intrinsic::x86_sse2_psll_w;
   2076       break;
   2077     case X86::BI__builtin_ia32_psradi128:
   2078       name = "psradi";
   2079       ID = Intrinsic::x86_sse2_psra_d;
   2080       break;
   2081     case X86::BI__builtin_ia32_psrawi128:
   2082       name = "psrawi";
   2083       ID = Intrinsic::x86_sse2_psra_w;
   2084       break;
   2085     case X86::BI__builtin_ia32_psrldi128:
   2086       name = "psrldi";
   2087       ID = Intrinsic::x86_sse2_psrl_d;
   2088       break;
   2089     case X86::BI__builtin_ia32_psrlqi128:
   2090       name = "psrlqi";
   2091       ID = Intrinsic::x86_sse2_psrl_q;
   2092       break;
   2093     case X86::BI__builtin_ia32_psrlwi128:
   2094       name = "psrlwi";
   2095       ID = Intrinsic::x86_sse2_psrl_w;
   2096       break;
   2097     }
   2098     llvm::Function *F = CGM.getIntrinsic(ID);
   2099     return Builder.CreateCall(F, Ops, name);
   2100   }
   2101   case X86::BI__builtin_ia32_vec_init_v8qi:
   2102   case X86::BI__builtin_ia32_vec_init_v4hi:
   2103   case X86::BI__builtin_ia32_vec_init_v2si:
   2104     return Builder.CreateBitCast(BuildVector(Ops),
   2105                                  llvm::Type::getX86_MMXTy(getLLVMContext()));
   2106   case X86::BI__builtin_ia32_vec_ext_v2si:
   2107     return Builder.CreateExtractElement(Ops[0],
   2108                                   llvm::ConstantInt::get(Ops[1]->getType(), 0));
   2109   case X86::BI__builtin_ia32_pslldi:
   2110   case X86::BI__builtin_ia32_psllqi:
   2111   case X86::BI__builtin_ia32_psllwi:
   2112   case X86::BI__builtin_ia32_psradi:
   2113   case X86::BI__builtin_ia32_psrawi:
   2114   case X86::BI__builtin_ia32_psrldi:
   2115   case X86::BI__builtin_ia32_psrlqi:
   2116   case X86::BI__builtin_ia32_psrlwi: {
   2117     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty, "zext");
   2118     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 1);
   2119     Ops[1] = Builder.CreateBitCast(Ops[1], Ty, "bitcast");
   2120     const char *name = 0;
   2121     Intrinsic::ID ID = Intrinsic::not_intrinsic;
   2122 
   2123     switch (BuiltinID) {
   2124     default: llvm_unreachable("Unsupported shift intrinsic!");
   2125     case X86::BI__builtin_ia32_pslldi:
   2126       name = "pslldi";
   2127       ID = Intrinsic::x86_mmx_psll_d;
   2128       break;
   2129     case X86::BI__builtin_ia32_psllqi:
   2130       name = "psllqi";
   2131       ID = Intrinsic::x86_mmx_psll_q;
   2132       break;
   2133     case X86::BI__builtin_ia32_psllwi:
   2134       name = "psllwi";
   2135       ID = Intrinsic::x86_mmx_psll_w;
   2136       break;
   2137     case X86::BI__builtin_ia32_psradi:
   2138       name = "psradi";
   2139       ID = Intrinsic::x86_mmx_psra_d;
   2140       break;
   2141     case X86::BI__builtin_ia32_psrawi:
   2142       name = "psrawi";
   2143       ID = Intrinsic::x86_mmx_psra_w;
   2144       break;
   2145     case X86::BI__builtin_ia32_psrldi:
   2146       name = "psrldi";
   2147       ID = Intrinsic::x86_mmx_psrl_d;
   2148       break;
   2149     case X86::BI__builtin_ia32_psrlqi:
   2150       name = "psrlqi";
   2151       ID = Intrinsic::x86_mmx_psrl_q;
   2152       break;
   2153     case X86::BI__builtin_ia32_psrlwi:
   2154       name = "psrlwi";
   2155       ID = Intrinsic::x86_mmx_psrl_w;
   2156       break;
   2157     }
   2158     llvm::Function *F = CGM.getIntrinsic(ID);
   2159     return Builder.CreateCall(F, Ops, name);
   2160   }
   2161   case X86::BI__builtin_ia32_cmpps: {
   2162     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ps);
   2163     return Builder.CreateCall(F, Ops, "cmpps");
   2164   }
   2165   case X86::BI__builtin_ia32_cmpss: {
   2166     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse_cmp_ss);
   2167     return Builder.CreateCall(F, Ops, "cmpss");
   2168   }
   2169   case X86::BI__builtin_ia32_ldmxcsr: {
   2170     llvm::Type *PtrTy = Int8PtrTy;
   2171     Value *One = llvm::ConstantInt::get(Int32Ty, 1);
   2172     Value *Tmp = Builder.CreateAlloca(Int32Ty, One);
   2173     Builder.CreateStore(Ops[0], Tmp);
   2174     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
   2175                               Builder.CreateBitCast(Tmp, PtrTy));
   2176   }
   2177   case X86::BI__builtin_ia32_stmxcsr: {
   2178     llvm::Type *PtrTy = Int8PtrTy;
   2179     Value *One = llvm::ConstantInt::get(Int32Ty, 1);
   2180     Value *Tmp = Builder.CreateAlloca(Int32Ty, One);
   2181     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
   2182                        Builder.CreateBitCast(Tmp, PtrTy));
   2183     return Builder.CreateLoad(Tmp, "stmxcsr");
   2184   }
   2185   case X86::BI__builtin_ia32_cmppd: {
   2186     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_pd);
   2187     return Builder.CreateCall(F, Ops, "cmppd");
   2188   }
   2189   case X86::BI__builtin_ia32_cmpsd: {
   2190     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_cmp_sd);
   2191     return Builder.CreateCall(F, Ops, "cmpsd");
   2192   }
   2193   case X86::BI__builtin_ia32_storehps:
   2194   case X86::BI__builtin_ia32_storelps: {
   2195     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
   2196     llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
   2197 
   2198     // cast val v2i64
   2199     Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
   2200 
   2201     // extract (0, 1)
   2202     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
   2203     llvm::Value *Idx = llvm::ConstantInt::get(Int32Ty, Index);
   2204     Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
   2205 
   2206     // cast pointer to i64 & store
   2207     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
   2208     return Builder.CreateStore(Ops[1], Ops[0]);
   2209   }
   2210   case X86::BI__builtin_ia32_palignr: {
   2211     unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
   2212 
   2213     // If palignr is shifting the pair of input vectors less than 9 bytes,
   2214     // emit a shuffle instruction.
   2215     if (shiftVal <= 8) {
   2216       SmallVector<llvm::Constant*, 8> Indices;
   2217       for (unsigned i = 0; i != 8; ++i)
   2218         Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
   2219 
   2220       Value* SV = llvm::ConstantVector::get(Indices);
   2221       return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
   2222     }
   2223 
   2224     // If palignr is shifting the pair of input vectors more than 8 but less
   2225     // than 16 bytes, emit a logical right shift of the destination.
   2226     if (shiftVal < 16) {
   2227       // MMX has these as 1 x i64 vectors for some odd optimization reasons.
   2228       llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 1);
   2229 
   2230       Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
   2231       Ops[1] = llvm::ConstantInt::get(VecTy, (shiftVal-8) * 8);
   2232 
   2233       // create i32 constant
   2234       llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_mmx_psrl_q);
   2235       return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
   2236     }
   2237 
   2238     // If palignr is shifting the pair of vectors more than 16 bytes, emit zero.
   2239     return llvm::Constant::getNullValue(ConvertType(E->getType()));
   2240   }
   2241   case X86::BI__builtin_ia32_palignr128: {
   2242     unsigned shiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
   2243 
   2244     // If palignr is shifting the pair of input vectors less than 17 bytes,
   2245     // emit a shuffle instruction.
   2246     if (shiftVal <= 16) {
   2247       SmallVector<llvm::Constant*, 16> Indices;
   2248       for (unsigned i = 0; i != 16; ++i)
   2249         Indices.push_back(llvm::ConstantInt::get(Int32Ty, shiftVal + i));
   2250 
   2251       Value* SV = llvm::ConstantVector::get(Indices);
   2252       return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
   2253     }
   2254 
   2255     // If palignr is shifting the pair of input vectors more than 16 but less
   2256     // than 32 bytes, emit a logical right shift of the destination.
   2257     if (shiftVal < 32) {
   2258       llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
   2259 
   2260       Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
   2261       Ops[1] = llvm::ConstantInt::get(Int32Ty, (shiftVal-16) * 8);
   2262 
   2263       // create i32 constant
   2264       llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_sse2_psrl_dq);
   2265       return Builder.CreateCall(F, makeArrayRef(&Ops[0], 2), "palignr");
   2266     }
   2267 
   2268     // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
   2269     return llvm::Constant::getNullValue(ConvertType(E->getType()));
   2270   }
   2271   case X86::BI__builtin_ia32_movntps:
   2272   case X86::BI__builtin_ia32_movntpd:
   2273   case X86::BI__builtin_ia32_movntdq:
   2274   case X86::BI__builtin_ia32_movnti: {
   2275     llvm::MDNode *Node = llvm::MDNode::get(getLLVMContext(),
   2276                                            Builder.getInt32(1));
   2277 
   2278     // Convert the type of the pointer to a pointer to the stored type.
   2279     Value *BC = Builder.CreateBitCast(Ops[0],
   2280                                 llvm::PointerType::getUnqual(Ops[1]->getType()),
   2281                                       "cast");
   2282     StoreInst *SI = Builder.CreateStore(Ops[1], BC);
   2283     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
   2284     SI->setAlignment(16);
   2285     return SI;
   2286   }
   2287   // 3DNow!
   2288   case X86::BI__builtin_ia32_pavgusb:
   2289   case X86::BI__builtin_ia32_pf2id:
   2290   case X86::BI__builtin_ia32_pfacc:
   2291   case X86::BI__builtin_ia32_pfadd:
   2292   case X86::BI__builtin_ia32_pfcmpeq:
   2293   case X86::BI__builtin_ia32_pfcmpge:
   2294   case X86::BI__builtin_ia32_pfcmpgt:
   2295   case X86::BI__builtin_ia32_pfmax:
   2296   case X86::BI__builtin_ia32_pfmin:
   2297   case X86::BI__builtin_ia32_pfmul:
   2298   case X86::BI__builtin_ia32_pfrcp:
   2299   case X86::BI__builtin_ia32_pfrcpit1:
   2300   case X86::BI__builtin_ia32_pfrcpit2:
   2301   case X86::BI__builtin_ia32_pfrsqrt:
   2302   case X86::BI__builtin_ia32_pfrsqit1:
   2303   case X86::BI__builtin_ia32_pfrsqrtit1:
   2304   case X86::BI__builtin_ia32_pfsub:
   2305   case X86::BI__builtin_ia32_pfsubr:
   2306   case X86::BI__builtin_ia32_pi2fd:
   2307   case X86::BI__builtin_ia32_pmulhrw:
   2308   case X86::BI__builtin_ia32_pf2iw:
   2309   case X86::BI__builtin_ia32_pfnacc:
   2310   case X86::BI__builtin_ia32_pfpnacc:
   2311   case X86::BI__builtin_ia32_pi2fw:
   2312   case X86::BI__builtin_ia32_pswapdsf:
   2313   case X86::BI__builtin_ia32_pswapdsi: {
   2314     const char *name = 0;
   2315     Intrinsic::ID ID = Intrinsic::not_intrinsic;
   2316     switch(BuiltinID) {
   2317     case X86::BI__builtin_ia32_pavgusb:
   2318       name = "pavgusb";
   2319       ID = Intrinsic::x86_3dnow_pavgusb;
   2320       break;
   2321     case X86::BI__builtin_ia32_pf2id:
   2322       name = "pf2id";
   2323       ID = Intrinsic::x86_3dnow_pf2id;
   2324       break;
   2325     case X86::BI__builtin_ia32_pfacc:
   2326       name = "pfacc";
   2327       ID = Intrinsic::x86_3dnow_pfacc;
   2328       break;
   2329     case X86::BI__builtin_ia32_pfadd:
   2330       name = "pfadd";
   2331       ID = Intrinsic::x86_3dnow_pfadd;
   2332       break;
   2333     case X86::BI__builtin_ia32_pfcmpeq:
   2334       name = "pfcmpeq";
   2335       ID = Intrinsic::x86_3dnow_pfcmpeq;
   2336       break;
   2337     case X86::BI__builtin_ia32_pfcmpge:
   2338       name = "pfcmpge";
   2339       ID = Intrinsic::x86_3dnow_pfcmpge;
   2340       break;
   2341     case X86::BI__builtin_ia32_pfcmpgt:
   2342       name = "pfcmpgt";
   2343       ID = Intrinsic::x86_3dnow_pfcmpgt;
   2344       break;
   2345     case X86::BI__builtin_ia32_pfmax:
   2346       name = "pfmax";
   2347       ID = Intrinsic::x86_3dnow_pfmax;
   2348       break;
   2349     case X86::BI__builtin_ia32_pfmin:
   2350       name = "pfmin";
   2351       ID = Intrinsic::x86_3dnow_pfmin;
   2352       break;
   2353     case X86::BI__builtin_ia32_pfmul:
   2354       name = "pfmul";
   2355       ID = Intrinsic::x86_3dnow_pfmul;
   2356       break;
   2357     case X86::BI__builtin_ia32_pfrcp:
   2358       name = "pfrcp";
   2359       ID = Intrinsic::x86_3dnow_pfrcp;
   2360       break;
   2361     case X86::BI__builtin_ia32_pfrcpit1:
   2362       name = "pfrcpit1";
   2363       ID = Intrinsic::x86_3dnow_pfrcpit1;
   2364       break;
   2365     case X86::BI__builtin_ia32_pfrcpit2:
   2366       name = "pfrcpit2";
   2367       ID = Intrinsic::x86_3dnow_pfrcpit2;
   2368       break;
   2369     case X86::BI__builtin_ia32_pfrsqrt:
   2370       name = "pfrsqrt";
   2371       ID = Intrinsic::x86_3dnow_pfrsqrt;
   2372       break;
   2373     case X86::BI__builtin_ia32_pfrsqit1:
   2374     case X86::BI__builtin_ia32_pfrsqrtit1:
   2375       name = "pfrsqit1";
   2376       ID = Intrinsic::x86_3dnow_pfrsqit1;
   2377       break;
   2378     case X86::BI__builtin_ia32_pfsub:
   2379       name = "pfsub";
   2380       ID = Intrinsic::x86_3dnow_pfsub;
   2381       break;
   2382     case X86::BI__builtin_ia32_pfsubr:
   2383       name = "pfsubr";
   2384       ID = Intrinsic::x86_3dnow_pfsubr;
   2385       break;
   2386     case X86::BI__builtin_ia32_pi2fd:
   2387       name = "pi2fd";
   2388       ID = Intrinsic::x86_3dnow_pi2fd;
   2389       break;
   2390     case X86::BI__builtin_ia32_pmulhrw:
   2391       name = "pmulhrw";
   2392       ID = Intrinsic::x86_3dnow_pmulhrw;
   2393       break;
   2394     case X86::BI__builtin_ia32_pf2iw:
   2395       name = "pf2iw";
   2396       ID = Intrinsic::x86_3dnowa_pf2iw;
   2397       break;
   2398     case X86::BI__builtin_ia32_pfnacc:
   2399       name = "pfnacc";
   2400       ID = Intrinsic::x86_3dnowa_pfnacc;
   2401       break;
   2402     case X86::BI__builtin_ia32_pfpnacc:
   2403       name = "pfpnacc";
   2404       ID = Intrinsic::x86_3dnowa_pfpnacc;
   2405       break;
   2406     case X86::BI__builtin_ia32_pi2fw:
   2407       name = "pi2fw";
   2408       ID = Intrinsic::x86_3dnowa_pi2fw;
   2409       break;
   2410     case X86::BI__builtin_ia32_pswapdsf:
   2411     case X86::BI__builtin_ia32_pswapdsi:
   2412       name = "pswapd";
   2413       ID = Intrinsic::x86_3dnowa_pswapd;
   2414       break;
   2415     }
   2416     llvm::Function *F = CGM.getIntrinsic(ID);
   2417     return Builder.CreateCall(F, Ops, name);
   2418   }
   2419   }
   2420 }
   2421 
   2422 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
   2423                                            const CallExpr *E) {
   2424   SmallVector<Value*, 4> Ops;
   2425 
   2426   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
   2427     Ops.push_back(EmitScalarExpr(E->getArg(i)));
   2428 
   2429   Intrinsic::ID ID = Intrinsic::not_intrinsic;
   2430 
   2431   switch (BuiltinID) {
   2432   default: return 0;
   2433 
   2434   // vec_ld, vec_lvsl, vec_lvsr
   2435   case PPC::BI__builtin_altivec_lvx:
   2436   case PPC::BI__builtin_altivec_lvxl:
   2437   case PPC::BI__builtin_altivec_lvebx:
   2438   case PPC::BI__builtin_altivec_lvehx:
   2439   case PPC::BI__builtin_altivec_lvewx:
   2440   case PPC::BI__builtin_altivec_lvsl:
   2441   case PPC::BI__builtin_altivec_lvsr:
   2442   {
   2443     Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
   2444 
   2445     Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
   2446     Ops.pop_back();
   2447 
   2448     switch (BuiltinID) {
   2449     default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
   2450     case PPC::BI__builtin_altivec_lvx:
   2451       ID = Intrinsic::ppc_altivec_lvx;
   2452       break;
   2453     case PPC::BI__builtin_altivec_lvxl:
   2454       ID = Intrinsic::ppc_altivec_lvxl;
   2455       break;
   2456     case PPC::BI__builtin_altivec_lvebx:
   2457       ID = Intrinsic::ppc_altivec_lvebx;
   2458       break;
   2459     case PPC::BI__builtin_altivec_lvehx:
   2460       ID = Intrinsic::ppc_altivec_lvehx;
   2461       break;
   2462     case PPC::BI__builtin_altivec_lvewx:
   2463       ID = Intrinsic::ppc_altivec_lvewx;
   2464       break;
   2465     case PPC::BI__builtin_altivec_lvsl:
   2466       ID = Intrinsic::ppc_altivec_lvsl;
   2467       break;
   2468     case PPC::BI__builtin_altivec_lvsr:
   2469       ID = Intrinsic::ppc_altivec_lvsr;
   2470       break;
   2471     }
   2472     llvm::Function *F = CGM.getIntrinsic(ID);
   2473     return Builder.CreateCall(F, Ops, "");
   2474   }
   2475 
   2476   // vec_st
   2477   case PPC::BI__builtin_altivec_stvx:
   2478   case PPC::BI__builtin_altivec_stvxl:
   2479   case PPC::BI__builtin_altivec_stvebx:
   2480   case PPC::BI__builtin_altivec_stvehx:
   2481   case PPC::BI__builtin_altivec_stvewx:
   2482   {
   2483     Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
   2484     Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
   2485     Ops.pop_back();
   2486 
   2487     switch (BuiltinID) {
   2488     default: llvm_unreachable("Unsupported st intrinsic!");
   2489     case PPC::BI__builtin_altivec_stvx:
   2490       ID = Intrinsic::ppc_altivec_stvx;
   2491       break;
   2492     case PPC::BI__builtin_altivec_stvxl:
   2493       ID = Intrinsic::ppc_altivec_stvxl;
   2494       break;
   2495     case PPC::BI__builtin_altivec_stvebx:
   2496       ID = Intrinsic::ppc_altivec_stvebx;
   2497       break;
   2498     case PPC::BI__builtin_altivec_stvehx:
   2499       ID = Intrinsic::ppc_altivec_stvehx;
   2500       break;
   2501     case PPC::BI__builtin_altivec_stvewx:
   2502       ID = Intrinsic::ppc_altivec_stvewx;
   2503       break;
   2504     }
   2505     llvm::Function *F = CGM.getIntrinsic(ID);
   2506     return Builder.CreateCall(F, Ops, "");
   2507   }
   2508   }
   2509   return 0;
   2510 }
   2511