1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit Builtin calls as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CodeGenFunction.h" 15 #include "CGCXXABI.h" 16 #include "CGObjCRuntime.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/ASTContext.h" 20 #include "clang/AST/Decl.h" 21 #include "clang/Basic/TargetBuiltins.h" 22 #include "clang/Basic/TargetInfo.h" 23 #include "clang/CodeGen/CGFunctionInfo.h" 24 #include "llvm/ADT/StringExtras.h" 25 #include "llvm/IR/CallSite.h" 26 #include "llvm/IR/DataLayout.h" 27 #include "llvm/IR/InlineAsm.h" 28 #include "llvm/IR/Intrinsics.h" 29 #include "llvm/IR/MDBuilder.h" 30 #include <sstream> 31 32 using namespace clang; 33 using namespace CodeGen; 34 using namespace llvm; 35 36 /// getBuiltinLibFunction - Given a builtin id for a function like 37 /// "__builtin_fabsf", return a Function* for "fabsf". 38 llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, 39 unsigned BuiltinID) { 40 assert(Context.BuiltinInfo.isLibFunction(BuiltinID)); 41 42 // Get the name, skip over the __builtin_ prefix (if necessary). 43 StringRef Name; 44 GlobalDecl D(FD); 45 46 // If the builtin has been declared explicitly with an assembler label, 47 // use the mangled name. This differs from the plain label on platforms 48 // that prefix labels. 49 if (FD->hasAttr<AsmLabelAttr>()) 50 Name = getMangledName(D); 51 else 52 Name = Context.BuiltinInfo.getName(BuiltinID) + 10; 53 54 llvm::FunctionType *Ty = 55 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType())); 56 57 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false); 58 } 59 60 /// Emit the conversions required to turn the given value into an 61 /// integer of the given size. 62 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V, 63 QualType T, llvm::IntegerType *IntType) { 64 V = CGF.EmitToMemory(V, T); 65 66 if (V->getType()->isPointerTy()) 67 return CGF.Builder.CreatePtrToInt(V, IntType); 68 69 assert(V->getType() == IntType); 70 return V; 71 } 72 73 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, 74 QualType T, llvm::Type *ResultType) { 75 V = CGF.EmitFromMemory(V, T); 76 77 if (ResultType->isPointerTy()) 78 return CGF.Builder.CreateIntToPtr(V, ResultType); 79 80 assert(V->getType() == ResultType); 81 return V; 82 } 83 84 /// Utility to insert an atomic instruction based on Instrinsic::ID 85 /// and the expression node. 86 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF, 87 llvm::AtomicRMWInst::BinOp Kind, 88 const CallExpr *E) { 89 QualType T = E->getType(); 90 assert(E->getArg(0)->getType()->isPointerType()); 91 assert(CGF.getContext().hasSameUnqualifiedType(T, 92 E->getArg(0)->getType()->getPointeeType())); 93 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 94 95 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 96 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 97 98 llvm::IntegerType *IntType = 99 llvm::IntegerType::get(CGF.getLLVMContext(), 100 CGF.getContext().getTypeSize(T)); 101 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 102 103 llvm::Value *Args[2]; 104 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 105 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 106 llvm::Type *ValueType = Args[1]->getType(); 107 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 108 109 llvm::Value *Result = CGF.Builder.CreateAtomicRMW( 110 Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); 111 return EmitFromInt(CGF, Result, T, ValueType); 112 } 113 114 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) { 115 Value *Val = CGF.EmitScalarExpr(E->getArg(0)); 116 Value *Address = CGF.EmitScalarExpr(E->getArg(1)); 117 118 // Convert the type of the pointer to a pointer to the stored type. 119 Val = CGF.EmitToMemory(Val, E->getArg(0)->getType()); 120 Value *BC = CGF.Builder.CreateBitCast( 121 Address, llvm::PointerType::getUnqual(Val->getType()), "cast"); 122 LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType()); 123 LV.setNontemporal(true); 124 CGF.EmitStoreOfScalar(Val, LV, false); 125 return nullptr; 126 } 127 128 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) { 129 Value *Address = CGF.EmitScalarExpr(E->getArg(0)); 130 131 LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType()); 132 LV.setNontemporal(true); 133 return CGF.EmitLoadOfScalar(LV, E->getExprLoc()); 134 } 135 136 static RValue EmitBinaryAtomic(CodeGenFunction &CGF, 137 llvm::AtomicRMWInst::BinOp Kind, 138 const CallExpr *E) { 139 return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E)); 140 } 141 142 /// Utility to insert an atomic instruction based Instrinsic::ID and 143 /// the expression node, where the return value is the result of the 144 /// operation. 145 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, 146 llvm::AtomicRMWInst::BinOp Kind, 147 const CallExpr *E, 148 Instruction::BinaryOps Op, 149 bool Invert = false) { 150 QualType T = E->getType(); 151 assert(E->getArg(0)->getType()->isPointerType()); 152 assert(CGF.getContext().hasSameUnqualifiedType(T, 153 E->getArg(0)->getType()->getPointeeType())); 154 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 155 156 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 157 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 158 159 llvm::IntegerType *IntType = 160 llvm::IntegerType::get(CGF.getLLVMContext(), 161 CGF.getContext().getTypeSize(T)); 162 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 163 164 llvm::Value *Args[2]; 165 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 166 llvm::Type *ValueType = Args[1]->getType(); 167 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 168 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 169 170 llvm::Value *Result = CGF.Builder.CreateAtomicRMW( 171 Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); 172 Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); 173 if (Invert) 174 Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result, 175 llvm::ConstantInt::get(IntType, -1)); 176 Result = EmitFromInt(CGF, Result, T, ValueType); 177 return RValue::get(Result); 178 } 179 180 /// @brief Utility to insert an atomic cmpxchg instruction. 181 /// 182 /// @param CGF The current codegen function. 183 /// @param E Builtin call expression to convert to cmpxchg. 184 /// arg0 - address to operate on 185 /// arg1 - value to compare with 186 /// arg2 - new value 187 /// @param ReturnBool Specifies whether to return success flag of 188 /// cmpxchg result or the old value. 189 /// 190 /// @returns result of cmpxchg, according to ReturnBool 191 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, 192 bool ReturnBool) { 193 QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType(); 194 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 195 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 196 197 llvm::IntegerType *IntType = llvm::IntegerType::get( 198 CGF.getLLVMContext(), CGF.getContext().getTypeSize(T)); 199 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 200 201 Value *Args[3]; 202 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 203 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 204 llvm::Type *ValueType = Args[1]->getType(); 205 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 206 Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType); 207 208 Value *Pair = CGF.Builder.CreateAtomicCmpXchg( 209 Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent, 210 llvm::AtomicOrdering::SequentiallyConsistent); 211 if (ReturnBool) 212 // Extract boolean success flag and zext it to int. 213 return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1), 214 CGF.ConvertType(E->getType())); 215 else 216 // Extract old value and emit it using the same type as compare value. 217 return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T, 218 ValueType); 219 } 220 221 // Emit a simple mangled intrinsic that has 1 argument and a return type 222 // matching the argument type. 223 static Value *emitUnaryBuiltin(CodeGenFunction &CGF, 224 const CallExpr *E, 225 unsigned IntrinsicID) { 226 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 227 228 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 229 return CGF.Builder.CreateCall(F, Src0); 230 } 231 232 // Emit an intrinsic that has 2 operands of the same type as its result. 233 static Value *emitBinaryBuiltin(CodeGenFunction &CGF, 234 const CallExpr *E, 235 unsigned IntrinsicID) { 236 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 237 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 238 239 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 240 return CGF.Builder.CreateCall(F, { Src0, Src1 }); 241 } 242 243 // Emit an intrinsic that has 3 operands of the same type as its result. 244 static Value *emitTernaryBuiltin(CodeGenFunction &CGF, 245 const CallExpr *E, 246 unsigned IntrinsicID) { 247 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 248 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 249 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); 250 251 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 252 return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); 253 } 254 255 // Emit an intrinsic that has 1 float or double operand, and 1 integer. 256 static Value *emitFPIntBuiltin(CodeGenFunction &CGF, 257 const CallExpr *E, 258 unsigned IntrinsicID) { 259 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 260 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 261 262 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 263 return CGF.Builder.CreateCall(F, {Src0, Src1}); 264 } 265 266 /// EmitFAbs - Emit a call to @llvm.fabs(). 267 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { 268 Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); 269 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V); 270 Call->setDoesNotAccessMemory(); 271 return Call; 272 } 273 274 /// Emit the computation of the sign bit for a floating point value. Returns 275 /// the i1 sign bit value. 276 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { 277 LLVMContext &C = CGF.CGM.getLLVMContext(); 278 279 llvm::Type *Ty = V->getType(); 280 int Width = Ty->getPrimitiveSizeInBits(); 281 llvm::Type *IntTy = llvm::IntegerType::get(C, Width); 282 V = CGF.Builder.CreateBitCast(V, IntTy); 283 if (Ty->isPPC_FP128Ty()) { 284 // We want the sign bit of the higher-order double. The bitcast we just 285 // did works as if the double-double was stored to memory and then 286 // read as an i128. The "store" will put the higher-order double in the 287 // lower address in both little- and big-Endian modes, but the "load" 288 // will treat those bits as a different part of the i128: the low bits in 289 // little-Endian, the high bits in big-Endian. Therefore, on big-Endian 290 // we need to shift the high bits down to the low before truncating. 291 Width >>= 1; 292 if (CGF.getTarget().isBigEndian()) { 293 Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width); 294 V = CGF.Builder.CreateLShr(V, ShiftCst); 295 } 296 // We are truncating value in order to extract the higher-order 297 // double, which we will be using to extract the sign from. 298 IntTy = llvm::IntegerType::get(C, Width); 299 V = CGF.Builder.CreateTrunc(V, IntTy); 300 } 301 Value *Zero = llvm::Constant::getNullValue(IntTy); 302 return CGF.Builder.CreateICmpSLT(V, Zero); 303 } 304 305 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn, 306 const CallExpr *E, llvm::Value *calleeValue) { 307 return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E, 308 ReturnValueSlot(), Fn); 309 } 310 311 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* 312 /// depending on IntrinsicID. 313 /// 314 /// \arg CGF The current codegen function. 315 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate. 316 /// \arg X The first argument to the llvm.*.with.overflow.*. 317 /// \arg Y The second argument to the llvm.*.with.overflow.*. 318 /// \arg Carry The carry returned by the llvm.*.with.overflow.*. 319 /// \returns The result (i.e. sum/product) returned by the intrinsic. 320 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, 321 const llvm::Intrinsic::ID IntrinsicID, 322 llvm::Value *X, llvm::Value *Y, 323 llvm::Value *&Carry) { 324 // Make sure we have integers of the same width. 325 assert(X->getType() == Y->getType() && 326 "Arguments must be the same type. (Did you forget to make sure both " 327 "arguments have the same integer width?)"); 328 329 llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); 330 llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y}); 331 Carry = CGF.Builder.CreateExtractValue(Tmp, 1); 332 return CGF.Builder.CreateExtractValue(Tmp, 0); 333 } 334 335 static Value *emitRangedBuiltin(CodeGenFunction &CGF, 336 unsigned IntrinsicID, 337 int low, int high) { 338 llvm::MDBuilder MDHelper(CGF.getLLVMContext()); 339 llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high)); 340 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {}); 341 llvm::Instruction *Call = CGF.Builder.CreateCall(F); 342 Call->setMetadata(llvm::LLVMContext::MD_range, RNode); 343 return Call; 344 } 345 346 namespace { 347 struct WidthAndSignedness { 348 unsigned Width; 349 bool Signed; 350 }; 351 } 352 353 static WidthAndSignedness 354 getIntegerWidthAndSignedness(const clang::ASTContext &context, 355 const clang::QualType Type) { 356 assert(Type->isIntegerType() && "Given type is not an integer."); 357 unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width; 358 bool Signed = Type->isSignedIntegerType(); 359 return {Width, Signed}; 360 } 361 362 // Given one or more integer types, this function produces an integer type that 363 // encompasses them: any value in one of the given types could be expressed in 364 // the encompassing type. 365 static struct WidthAndSignedness 366 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) { 367 assert(Types.size() > 0 && "Empty list of types."); 368 369 // If any of the given types is signed, we must return a signed type. 370 bool Signed = false; 371 for (const auto &Type : Types) { 372 Signed |= Type.Signed; 373 } 374 375 // The encompassing type must have a width greater than or equal to the width 376 // of the specified types. Aditionally, if the encompassing type is signed, 377 // its width must be strictly greater than the width of any unsigned types 378 // given. 379 unsigned Width = 0; 380 for (const auto &Type : Types) { 381 unsigned MinWidth = Type.Width + (Signed && !Type.Signed); 382 if (Width < MinWidth) { 383 Width = MinWidth; 384 } 385 } 386 387 return {Width, Signed}; 388 } 389 390 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) { 391 llvm::Type *DestType = Int8PtrTy; 392 if (ArgValue->getType() != DestType) 393 ArgValue = 394 Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data()); 395 396 Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend; 397 return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue); 398 } 399 400 /// Checks if using the result of __builtin_object_size(p, @p From) in place of 401 /// __builtin_object_size(p, @p To) is correct 402 static bool areBOSTypesCompatible(int From, int To) { 403 // Note: Our __builtin_object_size implementation currently treats Type=0 and 404 // Type=2 identically. Encoding this implementation detail here may make 405 // improving __builtin_object_size difficult in the future, so it's omitted. 406 return From == To || (From == 0 && To == 1) || (From == 3 && To == 2); 407 } 408 409 static llvm::Value * 410 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) { 411 return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true); 412 } 413 414 llvm::Value * 415 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type, 416 llvm::IntegerType *ResType) { 417 uint64_t ObjectSize; 418 if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type)) 419 return emitBuiltinObjectSize(E, Type, ResType); 420 return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true); 421 } 422 423 /// Returns a Value corresponding to the size of the given expression. 424 /// This Value may be either of the following: 425 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on 426 /// it) 427 /// - A call to the @llvm.objectsize intrinsic 428 llvm::Value * 429 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type, 430 llvm::IntegerType *ResType) { 431 // We need to reference an argument if the pointer is a parameter with the 432 // pass_object_size attribute. 433 if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) { 434 auto *Param = dyn_cast<ParmVarDecl>(D->getDecl()); 435 auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>(); 436 if (Param != nullptr && PS != nullptr && 437 areBOSTypesCompatible(PS->getType(), Type)) { 438 auto Iter = SizeArguments.find(Param); 439 assert(Iter != SizeArguments.end()); 440 441 const ImplicitParamDecl *D = Iter->second; 442 auto DIter = LocalDeclMap.find(D); 443 assert(DIter != LocalDeclMap.end()); 444 445 return EmitLoadOfScalar(DIter->second, /*volatile=*/false, 446 getContext().getSizeType(), E->getLocStart()); 447 } 448 } 449 450 // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't 451 // evaluate E for side-effects. In either case, we shouldn't lower to 452 // @llvm.objectsize. 453 if (Type == 3 || E->HasSideEffects(getContext())) 454 return getDefaultBuiltinObjectSizeResult(Type, ResType); 455 456 // LLVM only supports 0 and 2, make sure that we pass along that 457 // as a boolean. 458 auto *CI = ConstantInt::get(Builder.getInt1Ty(), (Type & 2) >> 1); 459 // FIXME: Get right address space. 460 llvm::Type *Tys[] = {ResType, Builder.getInt8PtrTy(0)}; 461 Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys); 462 return Builder.CreateCall(F, {EmitScalarExpr(E), CI}); 463 } 464 465 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, 466 unsigned BuiltinID, const CallExpr *E, 467 ReturnValueSlot ReturnValue) { 468 // See if we can constant fold this builtin. If so, don't emit it at all. 469 Expr::EvalResult Result; 470 if (E->EvaluateAsRValue(Result, CGM.getContext()) && 471 !Result.hasSideEffects()) { 472 if (Result.Val.isInt()) 473 return RValue::get(llvm::ConstantInt::get(getLLVMContext(), 474 Result.Val.getInt())); 475 if (Result.Val.isFloat()) 476 return RValue::get(llvm::ConstantFP::get(getLLVMContext(), 477 Result.Val.getFloat())); 478 } 479 480 switch (BuiltinID) { 481 default: break; // Handle intrinsics and libm functions below. 482 case Builtin::BI__builtin___CFStringMakeConstantString: 483 case Builtin::BI__builtin___NSStringMakeConstantString: 484 return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr)); 485 case Builtin::BI__builtin_stdarg_start: 486 case Builtin::BI__builtin_va_start: 487 case Builtin::BI__va_start: 488 case Builtin::BI__builtin_va_end: 489 return RValue::get( 490 EmitVAStartEnd(BuiltinID == Builtin::BI__va_start 491 ? EmitScalarExpr(E->getArg(0)) 492 : EmitVAListRef(E->getArg(0)).getPointer(), 493 BuiltinID != Builtin::BI__builtin_va_end)); 494 case Builtin::BI__builtin_va_copy: { 495 Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer(); 496 Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer(); 497 498 llvm::Type *Type = Int8PtrTy; 499 500 DstPtr = Builder.CreateBitCast(DstPtr, Type); 501 SrcPtr = Builder.CreateBitCast(SrcPtr, Type); 502 return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), 503 {DstPtr, SrcPtr})); 504 } 505 case Builtin::BI__builtin_abs: 506 case Builtin::BI__builtin_labs: 507 case Builtin::BI__builtin_llabs: { 508 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 509 510 Value *NegOp = Builder.CreateNeg(ArgValue, "neg"); 511 Value *CmpResult = 512 Builder.CreateICmpSGE(ArgValue, 513 llvm::Constant::getNullValue(ArgValue->getType()), 514 "abscond"); 515 Value *Result = 516 Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs"); 517 518 return RValue::get(Result); 519 } 520 case Builtin::BI__builtin_fabs: 521 case Builtin::BI__builtin_fabsf: 522 case Builtin::BI__builtin_fabsl: { 523 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs)); 524 } 525 case Builtin::BI__builtin_fmod: 526 case Builtin::BI__builtin_fmodf: 527 case Builtin::BI__builtin_fmodl: { 528 Value *Arg1 = EmitScalarExpr(E->getArg(0)); 529 Value *Arg2 = EmitScalarExpr(E->getArg(1)); 530 Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod"); 531 return RValue::get(Result); 532 } 533 case Builtin::BI__builtin_copysign: 534 case Builtin::BI__builtin_copysignf: 535 case Builtin::BI__builtin_copysignl: { 536 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign)); 537 } 538 case Builtin::BI__builtin_ceil: 539 case Builtin::BI__builtin_ceilf: 540 case Builtin::BI__builtin_ceill: { 541 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil)); 542 } 543 case Builtin::BI__builtin_floor: 544 case Builtin::BI__builtin_floorf: 545 case Builtin::BI__builtin_floorl: { 546 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor)); 547 } 548 case Builtin::BI__builtin_trunc: 549 case Builtin::BI__builtin_truncf: 550 case Builtin::BI__builtin_truncl: { 551 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc)); 552 } 553 case Builtin::BI__builtin_rint: 554 case Builtin::BI__builtin_rintf: 555 case Builtin::BI__builtin_rintl: { 556 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint)); 557 } 558 case Builtin::BI__builtin_nearbyint: 559 case Builtin::BI__builtin_nearbyintf: 560 case Builtin::BI__builtin_nearbyintl: { 561 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint)); 562 } 563 case Builtin::BI__builtin_round: 564 case Builtin::BI__builtin_roundf: 565 case Builtin::BI__builtin_roundl: { 566 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round)); 567 } 568 case Builtin::BI__builtin_fmin: 569 case Builtin::BI__builtin_fminf: 570 case Builtin::BI__builtin_fminl: { 571 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum)); 572 } 573 case Builtin::BI__builtin_fmax: 574 case Builtin::BI__builtin_fmaxf: 575 case Builtin::BI__builtin_fmaxl: { 576 return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum)); 577 } 578 case Builtin::BI__builtin_conj: 579 case Builtin::BI__builtin_conjf: 580 case Builtin::BI__builtin_conjl: { 581 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 582 Value *Real = ComplexVal.first; 583 Value *Imag = ComplexVal.second; 584 Value *Zero = 585 Imag->getType()->isFPOrFPVectorTy() 586 ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType()) 587 : llvm::Constant::getNullValue(Imag->getType()); 588 589 Imag = Builder.CreateFSub(Zero, Imag, "sub"); 590 return RValue::getComplex(std::make_pair(Real, Imag)); 591 } 592 case Builtin::BI__builtin_creal: 593 case Builtin::BI__builtin_crealf: 594 case Builtin::BI__builtin_creall: 595 case Builtin::BIcreal: 596 case Builtin::BIcrealf: 597 case Builtin::BIcreall: { 598 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 599 return RValue::get(ComplexVal.first); 600 } 601 602 case Builtin::BI__builtin_cimag: 603 case Builtin::BI__builtin_cimagf: 604 case Builtin::BI__builtin_cimagl: 605 case Builtin::BIcimag: 606 case Builtin::BIcimagf: 607 case Builtin::BIcimagl: { 608 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 609 return RValue::get(ComplexVal.second); 610 } 611 612 case Builtin::BI__builtin_ctzs: 613 case Builtin::BI__builtin_ctz: 614 case Builtin::BI__builtin_ctzl: 615 case Builtin::BI__builtin_ctzll: { 616 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 617 618 llvm::Type *ArgType = ArgValue->getType(); 619 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 620 621 llvm::Type *ResultType = ConvertType(E->getType()); 622 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 623 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); 624 if (Result->getType() != ResultType) 625 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 626 "cast"); 627 return RValue::get(Result); 628 } 629 case Builtin::BI__builtin_clzs: 630 case Builtin::BI__builtin_clz: 631 case Builtin::BI__builtin_clzl: 632 case Builtin::BI__builtin_clzll: { 633 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 634 635 llvm::Type *ArgType = ArgValue->getType(); 636 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 637 638 llvm::Type *ResultType = ConvertType(E->getType()); 639 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 640 Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); 641 if (Result->getType() != ResultType) 642 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 643 "cast"); 644 return RValue::get(Result); 645 } 646 case Builtin::BI__builtin_ffs: 647 case Builtin::BI__builtin_ffsl: 648 case Builtin::BI__builtin_ffsll: { 649 // ffs(x) -> x ? cttz(x) + 1 : 0 650 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 651 652 llvm::Type *ArgType = ArgValue->getType(); 653 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 654 655 llvm::Type *ResultType = ConvertType(E->getType()); 656 Value *Tmp = 657 Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}), 658 llvm::ConstantInt::get(ArgType, 1)); 659 Value *Zero = llvm::Constant::getNullValue(ArgType); 660 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); 661 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs"); 662 if (Result->getType() != ResultType) 663 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 664 "cast"); 665 return RValue::get(Result); 666 } 667 case Builtin::BI__builtin_parity: 668 case Builtin::BI__builtin_parityl: 669 case Builtin::BI__builtin_parityll: { 670 // parity(x) -> ctpop(x) & 1 671 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 672 673 llvm::Type *ArgType = ArgValue->getType(); 674 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 675 676 llvm::Type *ResultType = ConvertType(E->getType()); 677 Value *Tmp = Builder.CreateCall(F, ArgValue); 678 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); 679 if (Result->getType() != ResultType) 680 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 681 "cast"); 682 return RValue::get(Result); 683 } 684 case Builtin::BI__builtin_popcount: 685 case Builtin::BI__builtin_popcountl: 686 case Builtin::BI__builtin_popcountll: { 687 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 688 689 llvm::Type *ArgType = ArgValue->getType(); 690 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 691 692 llvm::Type *ResultType = ConvertType(E->getType()); 693 Value *Result = Builder.CreateCall(F, ArgValue); 694 if (Result->getType() != ResultType) 695 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 696 "cast"); 697 return RValue::get(Result); 698 } 699 case Builtin::BI__builtin_unpredictable: { 700 // Always return the argument of __builtin_unpredictable. LLVM does not 701 // handle this builtin. Metadata for this builtin should be added directly 702 // to instructions such as branches or switches that use it. 703 return RValue::get(EmitScalarExpr(E->getArg(0))); 704 } 705 case Builtin::BI__builtin_expect: { 706 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 707 llvm::Type *ArgType = ArgValue->getType(); 708 709 Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); 710 // Don't generate llvm.expect on -O0 as the backend won't use it for 711 // anything. 712 // Note, we still IRGen ExpectedValue because it could have side-effects. 713 if (CGM.getCodeGenOpts().OptimizationLevel == 0) 714 return RValue::get(ArgValue); 715 716 Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); 717 Value *Result = 718 Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval"); 719 return RValue::get(Result); 720 } 721 case Builtin::BI__builtin_assume_aligned: { 722 Value *PtrValue = EmitScalarExpr(E->getArg(0)); 723 Value *OffsetValue = 724 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr; 725 726 Value *AlignmentValue = EmitScalarExpr(E->getArg(1)); 727 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); 728 unsigned Alignment = (unsigned) AlignmentCI->getZExtValue(); 729 730 EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue); 731 return RValue::get(PtrValue); 732 } 733 case Builtin::BI__assume: 734 case Builtin::BI__builtin_assume: { 735 if (E->getArg(0)->HasSideEffects(getContext())) 736 return RValue::get(nullptr); 737 738 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 739 Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume); 740 return RValue::get(Builder.CreateCall(FnAssume, ArgValue)); 741 } 742 case Builtin::BI__builtin_bswap16: 743 case Builtin::BI__builtin_bswap32: 744 case Builtin::BI__builtin_bswap64: { 745 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap)); 746 } 747 case Builtin::BI__builtin_bitreverse8: 748 case Builtin::BI__builtin_bitreverse16: 749 case Builtin::BI__builtin_bitreverse32: 750 case Builtin::BI__builtin_bitreverse64: { 751 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse)); 752 } 753 case Builtin::BI__builtin_object_size: { 754 unsigned Type = 755 E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue(); 756 auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType())); 757 758 // We pass this builtin onto the optimizer so that it can figure out the 759 // object size in more complex cases. 760 return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType)); 761 } 762 case Builtin::BI__builtin_prefetch: { 763 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); 764 // FIXME: Technically these constants should of type 'int', yes? 765 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) : 766 llvm::ConstantInt::get(Int32Ty, 0); 767 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : 768 llvm::ConstantInt::get(Int32Ty, 3); 769 Value *Data = llvm::ConstantInt::get(Int32Ty, 1); 770 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 771 return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data})); 772 } 773 case Builtin::BI__builtin_readcyclecounter: { 774 Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); 775 return RValue::get(Builder.CreateCall(F)); 776 } 777 case Builtin::BI__builtin___clear_cache: { 778 Value *Begin = EmitScalarExpr(E->getArg(0)); 779 Value *End = EmitScalarExpr(E->getArg(1)); 780 Value *F = CGM.getIntrinsic(Intrinsic::clear_cache); 781 return RValue::get(Builder.CreateCall(F, {Begin, End})); 782 } 783 case Builtin::BI__builtin_trap: 784 return RValue::get(EmitTrapCall(Intrinsic::trap)); 785 case Builtin::BI__debugbreak: 786 return RValue::get(EmitTrapCall(Intrinsic::debugtrap)); 787 case Builtin::BI__builtin_unreachable: { 788 if (SanOpts.has(SanitizerKind::Unreachable)) { 789 SanitizerScope SanScope(this); 790 EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()), 791 SanitizerKind::Unreachable), 792 "builtin_unreachable", EmitCheckSourceLocation(E->getExprLoc()), 793 None); 794 } else 795 Builder.CreateUnreachable(); 796 797 // We do need to preserve an insertion point. 798 EmitBlock(createBasicBlock("unreachable.cont")); 799 800 return RValue::get(nullptr); 801 } 802 803 case Builtin::BI__builtin_powi: 804 case Builtin::BI__builtin_powif: 805 case Builtin::BI__builtin_powil: { 806 Value *Base = EmitScalarExpr(E->getArg(0)); 807 Value *Exponent = EmitScalarExpr(E->getArg(1)); 808 llvm::Type *ArgType = Base->getType(); 809 Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); 810 return RValue::get(Builder.CreateCall(F, {Base, Exponent})); 811 } 812 813 case Builtin::BI__builtin_isgreater: 814 case Builtin::BI__builtin_isgreaterequal: 815 case Builtin::BI__builtin_isless: 816 case Builtin::BI__builtin_islessequal: 817 case Builtin::BI__builtin_islessgreater: 818 case Builtin::BI__builtin_isunordered: { 819 // Ordered comparisons: we know the arguments to these are matching scalar 820 // floating point values. 821 Value *LHS = EmitScalarExpr(E->getArg(0)); 822 Value *RHS = EmitScalarExpr(E->getArg(1)); 823 824 switch (BuiltinID) { 825 default: llvm_unreachable("Unknown ordered comparison"); 826 case Builtin::BI__builtin_isgreater: 827 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp"); 828 break; 829 case Builtin::BI__builtin_isgreaterequal: 830 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp"); 831 break; 832 case Builtin::BI__builtin_isless: 833 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp"); 834 break; 835 case Builtin::BI__builtin_islessequal: 836 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp"); 837 break; 838 case Builtin::BI__builtin_islessgreater: 839 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp"); 840 break; 841 case Builtin::BI__builtin_isunordered: 842 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp"); 843 break; 844 } 845 // ZExt bool to int type. 846 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()))); 847 } 848 case Builtin::BI__builtin_isnan: { 849 Value *V = EmitScalarExpr(E->getArg(0)); 850 V = Builder.CreateFCmpUNO(V, V, "cmp"); 851 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 852 } 853 854 case Builtin::BI__builtin_isinf: 855 case Builtin::BI__builtin_isfinite: { 856 // isinf(x) --> fabs(x) == infinity 857 // isfinite(x) --> fabs(x) != infinity 858 // x != NaN via the ordered compare in either case. 859 Value *V = EmitScalarExpr(E->getArg(0)); 860 Value *Fabs = EmitFAbs(*this, V); 861 Constant *Infinity = ConstantFP::getInfinity(V->getType()); 862 CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf) 863 ? CmpInst::FCMP_OEQ 864 : CmpInst::FCMP_ONE; 865 Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf"); 866 return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType()))); 867 } 868 869 case Builtin::BI__builtin_isinf_sign: { 870 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0 871 Value *Arg = EmitScalarExpr(E->getArg(0)); 872 Value *AbsArg = EmitFAbs(*this, Arg); 873 Value *IsInf = Builder.CreateFCmpOEQ( 874 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf"); 875 Value *IsNeg = EmitSignBit(*this, Arg); 876 877 llvm::Type *IntTy = ConvertType(E->getType()); 878 Value *Zero = Constant::getNullValue(IntTy); 879 Value *One = ConstantInt::get(IntTy, 1); 880 Value *NegativeOne = ConstantInt::get(IntTy, -1); 881 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One); 882 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero); 883 return RValue::get(Result); 884 } 885 886 case Builtin::BI__builtin_isnormal: { 887 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min 888 Value *V = EmitScalarExpr(E->getArg(0)); 889 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 890 891 Value *Abs = EmitFAbs(*this, V); 892 Value *IsLessThanInf = 893 Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 894 APFloat Smallest = APFloat::getSmallestNormalized( 895 getContext().getFloatTypeSemantics(E->getArg(0)->getType())); 896 Value *IsNormal = 897 Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest), 898 "isnormal"); 899 V = Builder.CreateAnd(Eq, IsLessThanInf, "and"); 900 V = Builder.CreateAnd(V, IsNormal, "and"); 901 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 902 } 903 904 case Builtin::BI__builtin_fpclassify: { 905 Value *V = EmitScalarExpr(E->getArg(5)); 906 llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); 907 908 // Create Result 909 BasicBlock *Begin = Builder.GetInsertBlock(); 910 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn); 911 Builder.SetInsertPoint(End); 912 PHINode *Result = 913 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4, 914 "fpclassify_result"); 915 916 // if (V==0) return FP_ZERO 917 Builder.SetInsertPoint(Begin); 918 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty), 919 "iszero"); 920 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4)); 921 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn); 922 Builder.CreateCondBr(IsZero, End, NotZero); 923 Result->addIncoming(ZeroLiteral, Begin); 924 925 // if (V != V) return FP_NAN 926 Builder.SetInsertPoint(NotZero); 927 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp"); 928 Value *NanLiteral = EmitScalarExpr(E->getArg(0)); 929 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn); 930 Builder.CreateCondBr(IsNan, End, NotNan); 931 Result->addIncoming(NanLiteral, NotZero); 932 933 // if (fabs(V) == infinity) return FP_INFINITY 934 Builder.SetInsertPoint(NotNan); 935 Value *VAbs = EmitFAbs(*this, V); 936 Value *IsInf = 937 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()), 938 "isinf"); 939 Value *InfLiteral = EmitScalarExpr(E->getArg(1)); 940 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn); 941 Builder.CreateCondBr(IsInf, End, NotInf); 942 Result->addIncoming(InfLiteral, NotNan); 943 944 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL 945 Builder.SetInsertPoint(NotInf); 946 APFloat Smallest = APFloat::getSmallestNormalized( 947 getContext().getFloatTypeSemantics(E->getArg(5)->getType())); 948 Value *IsNormal = 949 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest), 950 "isnormal"); 951 Value *NormalResult = 952 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)), 953 EmitScalarExpr(E->getArg(3))); 954 Builder.CreateBr(End); 955 Result->addIncoming(NormalResult, NotInf); 956 957 // return Result 958 Builder.SetInsertPoint(End); 959 return RValue::get(Result); 960 } 961 962 case Builtin::BIalloca: 963 case Builtin::BI_alloca: 964 case Builtin::BI__builtin_alloca: { 965 Value *Size = EmitScalarExpr(E->getArg(0)); 966 return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size)); 967 } 968 case Builtin::BIbzero: 969 case Builtin::BI__builtin_bzero: { 970 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 971 Value *SizeVal = EmitScalarExpr(E->getArg(1)); 972 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 973 E->getArg(0)->getExprLoc(), FD, 0); 974 Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false); 975 return RValue::get(Dest.getPointer()); 976 } 977 case Builtin::BImemcpy: 978 case Builtin::BI__builtin_memcpy: { 979 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 980 Address Src = EmitPointerWithAlignment(E->getArg(1)); 981 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 982 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 983 E->getArg(0)->getExprLoc(), FD, 0); 984 EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), 985 E->getArg(1)->getExprLoc(), FD, 1); 986 Builder.CreateMemCpy(Dest, Src, SizeVal, false); 987 return RValue::get(Dest.getPointer()); 988 } 989 990 case Builtin::BI__builtin___memcpy_chk: { 991 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. 992 llvm::APSInt Size, DstSize; 993 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 994 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 995 break; 996 if (Size.ugt(DstSize)) 997 break; 998 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 999 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1000 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1001 Builder.CreateMemCpy(Dest, Src, SizeVal, false); 1002 return RValue::get(Dest.getPointer()); 1003 } 1004 1005 case Builtin::BI__builtin_objc_memmove_collectable: { 1006 Address DestAddr = EmitPointerWithAlignment(E->getArg(0)); 1007 Address SrcAddr = EmitPointerWithAlignment(E->getArg(1)); 1008 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1009 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, 1010 DestAddr, SrcAddr, SizeVal); 1011 return RValue::get(DestAddr.getPointer()); 1012 } 1013 1014 case Builtin::BI__builtin___memmove_chk: { 1015 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2. 1016 llvm::APSInt Size, DstSize; 1017 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1018 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1019 break; 1020 if (Size.ugt(DstSize)) 1021 break; 1022 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1023 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1024 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1025 Builder.CreateMemMove(Dest, Src, SizeVal, false); 1026 return RValue::get(Dest.getPointer()); 1027 } 1028 1029 case Builtin::BImemmove: 1030 case Builtin::BI__builtin_memmove: { 1031 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1032 Address Src = EmitPointerWithAlignment(E->getArg(1)); 1033 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1034 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1035 E->getArg(0)->getExprLoc(), FD, 0); 1036 EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), 1037 E->getArg(1)->getExprLoc(), FD, 1); 1038 Builder.CreateMemMove(Dest, Src, SizeVal, false); 1039 return RValue::get(Dest.getPointer()); 1040 } 1041 case Builtin::BImemset: 1042 case Builtin::BI__builtin_memset: { 1043 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1044 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 1045 Builder.getInt8Ty()); 1046 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 1047 EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), 1048 E->getArg(0)->getExprLoc(), FD, 0); 1049 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); 1050 return RValue::get(Dest.getPointer()); 1051 } 1052 case Builtin::BI__builtin___memset_chk: { 1053 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. 1054 llvm::APSInt Size, DstSize; 1055 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 1056 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 1057 break; 1058 if (Size.ugt(DstSize)) 1059 break; 1060 Address Dest = EmitPointerWithAlignment(E->getArg(0)); 1061 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 1062 Builder.getInt8Ty()); 1063 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 1064 Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); 1065 return RValue::get(Dest.getPointer()); 1066 } 1067 case Builtin::BI__builtin_dwarf_cfa: { 1068 // The offset in bytes from the first argument to the CFA. 1069 // 1070 // Why on earth is this in the frontend? Is there any reason at 1071 // all that the backend can't reasonably determine this while 1072 // lowering llvm.eh.dwarf.cfa()? 1073 // 1074 // TODO: If there's a satisfactory reason, add a target hook for 1075 // this instead of hard-coding 0, which is correct for most targets. 1076 int32_t Offset = 0; 1077 1078 Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); 1079 return RValue::get(Builder.CreateCall(F, 1080 llvm::ConstantInt::get(Int32Ty, Offset))); 1081 } 1082 case Builtin::BI__builtin_return_address: { 1083 Value *Depth = 1084 CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this); 1085 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 1086 return RValue::get(Builder.CreateCall(F, Depth)); 1087 } 1088 case Builtin::BI__builtin_frame_address: { 1089 Value *Depth = 1090 CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this); 1091 Value *F = CGM.getIntrinsic(Intrinsic::frameaddress); 1092 return RValue::get(Builder.CreateCall(F, Depth)); 1093 } 1094 case Builtin::BI__builtin_extract_return_addr: { 1095 Value *Address = EmitScalarExpr(E->getArg(0)); 1096 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address); 1097 return RValue::get(Result); 1098 } 1099 case Builtin::BI__builtin_frob_return_addr: { 1100 Value *Address = EmitScalarExpr(E->getArg(0)); 1101 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address); 1102 return RValue::get(Result); 1103 } 1104 case Builtin::BI__builtin_dwarf_sp_column: { 1105 llvm::IntegerType *Ty 1106 = cast<llvm::IntegerType>(ConvertType(E->getType())); 1107 int Column = getTargetHooks().getDwarfEHStackPointer(CGM); 1108 if (Column == -1) { 1109 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column"); 1110 return RValue::get(llvm::UndefValue::get(Ty)); 1111 } 1112 return RValue::get(llvm::ConstantInt::get(Ty, Column, true)); 1113 } 1114 case Builtin::BI__builtin_init_dwarf_reg_size_table: { 1115 Value *Address = EmitScalarExpr(E->getArg(0)); 1116 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address)) 1117 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table"); 1118 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); 1119 } 1120 case Builtin::BI__builtin_eh_return: { 1121 Value *Int = EmitScalarExpr(E->getArg(0)); 1122 Value *Ptr = EmitScalarExpr(E->getArg(1)); 1123 1124 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); 1125 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && 1126 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); 1127 Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 1128 ? Intrinsic::eh_return_i32 1129 : Intrinsic::eh_return_i64); 1130 Builder.CreateCall(F, {Int, Ptr}); 1131 Builder.CreateUnreachable(); 1132 1133 // We do need to preserve an insertion point. 1134 EmitBlock(createBasicBlock("builtin_eh_return.cont")); 1135 1136 return RValue::get(nullptr); 1137 } 1138 case Builtin::BI__builtin_unwind_init: { 1139 Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); 1140 return RValue::get(Builder.CreateCall(F)); 1141 } 1142 case Builtin::BI__builtin_extend_pointer: { 1143 // Extends a pointer to the size of an _Unwind_Word, which is 1144 // uint64_t on all platforms. Generally this gets poked into a 1145 // register and eventually used as an address, so if the 1146 // addressing registers are wider than pointers and the platform 1147 // doesn't implicitly ignore high-order bits when doing 1148 // addressing, we need to make sure we zext / sext based on 1149 // the platform's expectations. 1150 // 1151 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html 1152 1153 // Cast the pointer to intptr_t. 1154 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1155 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast"); 1156 1157 // If that's 64 bits, we're done. 1158 if (IntPtrTy->getBitWidth() == 64) 1159 return RValue::get(Result); 1160 1161 // Otherwise, ask the codegen data what to do. 1162 if (getTargetHooks().extendPointerWithSExt()) 1163 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext")); 1164 else 1165 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext")); 1166 } 1167 case Builtin::BI__builtin_setjmp: { 1168 // Buffer is a void**. 1169 Address Buf = EmitPointerWithAlignment(E->getArg(0)); 1170 1171 // Store the frame pointer to the setjmp buffer. 1172 Value *FrameAddr = 1173 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 1174 ConstantInt::get(Int32Ty, 0)); 1175 Builder.CreateStore(FrameAddr, Buf); 1176 1177 // Store the stack pointer to the setjmp buffer. 1178 Value *StackAddr = 1179 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); 1180 Address StackSaveSlot = 1181 Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize()); 1182 Builder.CreateStore(StackAddr, StackSaveSlot); 1183 1184 // Call LLVM's EH setjmp, which is lightweight. 1185 Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); 1186 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 1187 return RValue::get(Builder.CreateCall(F, Buf.getPointer())); 1188 } 1189 case Builtin::BI__builtin_longjmp: { 1190 Value *Buf = EmitScalarExpr(E->getArg(0)); 1191 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 1192 1193 // Call LLVM's EH longjmp, which is lightweight. 1194 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf); 1195 1196 // longjmp doesn't return; mark this as unreachable. 1197 Builder.CreateUnreachable(); 1198 1199 // We do need to preserve an insertion point. 1200 EmitBlock(createBasicBlock("longjmp.cont")); 1201 1202 return RValue::get(nullptr); 1203 } 1204 case Builtin::BI__sync_fetch_and_add: 1205 case Builtin::BI__sync_fetch_and_sub: 1206 case Builtin::BI__sync_fetch_and_or: 1207 case Builtin::BI__sync_fetch_and_and: 1208 case Builtin::BI__sync_fetch_and_xor: 1209 case Builtin::BI__sync_fetch_and_nand: 1210 case Builtin::BI__sync_add_and_fetch: 1211 case Builtin::BI__sync_sub_and_fetch: 1212 case Builtin::BI__sync_and_and_fetch: 1213 case Builtin::BI__sync_or_and_fetch: 1214 case Builtin::BI__sync_xor_and_fetch: 1215 case Builtin::BI__sync_nand_and_fetch: 1216 case Builtin::BI__sync_val_compare_and_swap: 1217 case Builtin::BI__sync_bool_compare_and_swap: 1218 case Builtin::BI__sync_lock_test_and_set: 1219 case Builtin::BI__sync_lock_release: 1220 case Builtin::BI__sync_swap: 1221 llvm_unreachable("Shouldn't make it through sema"); 1222 case Builtin::BI__sync_fetch_and_add_1: 1223 case Builtin::BI__sync_fetch_and_add_2: 1224 case Builtin::BI__sync_fetch_and_add_4: 1225 case Builtin::BI__sync_fetch_and_add_8: 1226 case Builtin::BI__sync_fetch_and_add_16: 1227 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E); 1228 case Builtin::BI__sync_fetch_and_sub_1: 1229 case Builtin::BI__sync_fetch_and_sub_2: 1230 case Builtin::BI__sync_fetch_and_sub_4: 1231 case Builtin::BI__sync_fetch_and_sub_8: 1232 case Builtin::BI__sync_fetch_and_sub_16: 1233 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E); 1234 case Builtin::BI__sync_fetch_and_or_1: 1235 case Builtin::BI__sync_fetch_and_or_2: 1236 case Builtin::BI__sync_fetch_and_or_4: 1237 case Builtin::BI__sync_fetch_and_or_8: 1238 case Builtin::BI__sync_fetch_and_or_16: 1239 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E); 1240 case Builtin::BI__sync_fetch_and_and_1: 1241 case Builtin::BI__sync_fetch_and_and_2: 1242 case Builtin::BI__sync_fetch_and_and_4: 1243 case Builtin::BI__sync_fetch_and_and_8: 1244 case Builtin::BI__sync_fetch_and_and_16: 1245 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E); 1246 case Builtin::BI__sync_fetch_and_xor_1: 1247 case Builtin::BI__sync_fetch_and_xor_2: 1248 case Builtin::BI__sync_fetch_and_xor_4: 1249 case Builtin::BI__sync_fetch_and_xor_8: 1250 case Builtin::BI__sync_fetch_and_xor_16: 1251 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E); 1252 case Builtin::BI__sync_fetch_and_nand_1: 1253 case Builtin::BI__sync_fetch_and_nand_2: 1254 case Builtin::BI__sync_fetch_and_nand_4: 1255 case Builtin::BI__sync_fetch_and_nand_8: 1256 case Builtin::BI__sync_fetch_and_nand_16: 1257 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E); 1258 1259 // Clang extensions: not overloaded yet. 1260 case Builtin::BI__sync_fetch_and_min: 1261 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E); 1262 case Builtin::BI__sync_fetch_and_max: 1263 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E); 1264 case Builtin::BI__sync_fetch_and_umin: 1265 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E); 1266 case Builtin::BI__sync_fetch_and_umax: 1267 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E); 1268 1269 case Builtin::BI__sync_add_and_fetch_1: 1270 case Builtin::BI__sync_add_and_fetch_2: 1271 case Builtin::BI__sync_add_and_fetch_4: 1272 case Builtin::BI__sync_add_and_fetch_8: 1273 case Builtin::BI__sync_add_and_fetch_16: 1274 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E, 1275 llvm::Instruction::Add); 1276 case Builtin::BI__sync_sub_and_fetch_1: 1277 case Builtin::BI__sync_sub_and_fetch_2: 1278 case Builtin::BI__sync_sub_and_fetch_4: 1279 case Builtin::BI__sync_sub_and_fetch_8: 1280 case Builtin::BI__sync_sub_and_fetch_16: 1281 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E, 1282 llvm::Instruction::Sub); 1283 case Builtin::BI__sync_and_and_fetch_1: 1284 case Builtin::BI__sync_and_and_fetch_2: 1285 case Builtin::BI__sync_and_and_fetch_4: 1286 case Builtin::BI__sync_and_and_fetch_8: 1287 case Builtin::BI__sync_and_and_fetch_16: 1288 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E, 1289 llvm::Instruction::And); 1290 case Builtin::BI__sync_or_and_fetch_1: 1291 case Builtin::BI__sync_or_and_fetch_2: 1292 case Builtin::BI__sync_or_and_fetch_4: 1293 case Builtin::BI__sync_or_and_fetch_8: 1294 case Builtin::BI__sync_or_and_fetch_16: 1295 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E, 1296 llvm::Instruction::Or); 1297 case Builtin::BI__sync_xor_and_fetch_1: 1298 case Builtin::BI__sync_xor_and_fetch_2: 1299 case Builtin::BI__sync_xor_and_fetch_4: 1300 case Builtin::BI__sync_xor_and_fetch_8: 1301 case Builtin::BI__sync_xor_and_fetch_16: 1302 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E, 1303 llvm::Instruction::Xor); 1304 case Builtin::BI__sync_nand_and_fetch_1: 1305 case Builtin::BI__sync_nand_and_fetch_2: 1306 case Builtin::BI__sync_nand_and_fetch_4: 1307 case Builtin::BI__sync_nand_and_fetch_8: 1308 case Builtin::BI__sync_nand_and_fetch_16: 1309 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E, 1310 llvm::Instruction::And, true); 1311 1312 case Builtin::BI__sync_val_compare_and_swap_1: 1313 case Builtin::BI__sync_val_compare_and_swap_2: 1314 case Builtin::BI__sync_val_compare_and_swap_4: 1315 case Builtin::BI__sync_val_compare_and_swap_8: 1316 case Builtin::BI__sync_val_compare_and_swap_16: 1317 return RValue::get(MakeAtomicCmpXchgValue(*this, E, false)); 1318 1319 case Builtin::BI__sync_bool_compare_and_swap_1: 1320 case Builtin::BI__sync_bool_compare_and_swap_2: 1321 case Builtin::BI__sync_bool_compare_and_swap_4: 1322 case Builtin::BI__sync_bool_compare_and_swap_8: 1323 case Builtin::BI__sync_bool_compare_and_swap_16: 1324 return RValue::get(MakeAtomicCmpXchgValue(*this, E, true)); 1325 1326 case Builtin::BI__sync_swap_1: 1327 case Builtin::BI__sync_swap_2: 1328 case Builtin::BI__sync_swap_4: 1329 case Builtin::BI__sync_swap_8: 1330 case Builtin::BI__sync_swap_16: 1331 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1332 1333 case Builtin::BI__sync_lock_test_and_set_1: 1334 case Builtin::BI__sync_lock_test_and_set_2: 1335 case Builtin::BI__sync_lock_test_and_set_4: 1336 case Builtin::BI__sync_lock_test_and_set_8: 1337 case Builtin::BI__sync_lock_test_and_set_16: 1338 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1339 1340 case Builtin::BI__sync_lock_release_1: 1341 case Builtin::BI__sync_lock_release_2: 1342 case Builtin::BI__sync_lock_release_4: 1343 case Builtin::BI__sync_lock_release_8: 1344 case Builtin::BI__sync_lock_release_16: { 1345 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1346 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 1347 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 1348 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 1349 StoreSize.getQuantity() * 8); 1350 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 1351 llvm::StoreInst *Store = 1352 Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr, 1353 StoreSize); 1354 Store->setAtomic(llvm::AtomicOrdering::Release); 1355 return RValue::get(nullptr); 1356 } 1357 1358 case Builtin::BI__sync_synchronize: { 1359 // We assume this is supposed to correspond to a C++0x-style 1360 // sequentially-consistent fence (i.e. this is only usable for 1361 // synchonization, not device I/O or anything like that). This intrinsic 1362 // is really badly designed in the sense that in theory, there isn't 1363 // any way to safely use it... but in practice, it mostly works 1364 // to use it with non-atomic loads and stores to get acquire/release 1365 // semantics. 1366 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent); 1367 return RValue::get(nullptr); 1368 } 1369 1370 case Builtin::BI__builtin_nontemporal_load: 1371 return RValue::get(EmitNontemporalLoad(*this, E)); 1372 case Builtin::BI__builtin_nontemporal_store: 1373 return RValue::get(EmitNontemporalStore(*this, E)); 1374 case Builtin::BI__c11_atomic_is_lock_free: 1375 case Builtin::BI__atomic_is_lock_free: { 1376 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the 1377 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since 1378 // _Atomic(T) is always properly-aligned. 1379 const char *LibCallName = "__atomic_is_lock_free"; 1380 CallArgList Args; 1381 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))), 1382 getContext().getSizeType()); 1383 if (BuiltinID == Builtin::BI__atomic_is_lock_free) 1384 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))), 1385 getContext().VoidPtrTy); 1386 else 1387 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)), 1388 getContext().VoidPtrTy); 1389 const CGFunctionInfo &FuncInfo = 1390 CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args); 1391 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); 1392 llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); 1393 return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args); 1394 } 1395 1396 case Builtin::BI__atomic_test_and_set: { 1397 // Look at the argument type to determine whether this is a volatile 1398 // operation. The parameter type is always volatile. 1399 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1400 bool Volatile = 1401 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1402 1403 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1404 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 1405 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1406 Value *NewVal = Builder.getInt8(1); 1407 Value *Order = EmitScalarExpr(E->getArg(1)); 1408 if (isa<llvm::ConstantInt>(Order)) { 1409 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1410 AtomicRMWInst *Result = nullptr; 1411 switch (ord) { 1412 case 0: // memory_order_relaxed 1413 default: // invalid order 1414 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1415 llvm::AtomicOrdering::Monotonic); 1416 break; 1417 case 1: // memory_order_consume 1418 case 2: // memory_order_acquire 1419 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1420 llvm::AtomicOrdering::Acquire); 1421 break; 1422 case 3: // memory_order_release 1423 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1424 llvm::AtomicOrdering::Release); 1425 break; 1426 case 4: // memory_order_acq_rel 1427 1428 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1429 llvm::AtomicOrdering::AcquireRelease); 1430 break; 1431 case 5: // memory_order_seq_cst 1432 Result = Builder.CreateAtomicRMW( 1433 llvm::AtomicRMWInst::Xchg, Ptr, NewVal, 1434 llvm::AtomicOrdering::SequentiallyConsistent); 1435 break; 1436 } 1437 Result->setVolatile(Volatile); 1438 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1439 } 1440 1441 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1442 1443 llvm::BasicBlock *BBs[5] = { 1444 createBasicBlock("monotonic", CurFn), 1445 createBasicBlock("acquire", CurFn), 1446 createBasicBlock("release", CurFn), 1447 createBasicBlock("acqrel", CurFn), 1448 createBasicBlock("seqcst", CurFn) 1449 }; 1450 llvm::AtomicOrdering Orders[5] = { 1451 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire, 1452 llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease, 1453 llvm::AtomicOrdering::SequentiallyConsistent}; 1454 1455 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1456 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1457 1458 Builder.SetInsertPoint(ContBB); 1459 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set"); 1460 1461 for (unsigned i = 0; i < 5; ++i) { 1462 Builder.SetInsertPoint(BBs[i]); 1463 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1464 Ptr, NewVal, Orders[i]); 1465 RMW->setVolatile(Volatile); 1466 Result->addIncoming(RMW, BBs[i]); 1467 Builder.CreateBr(ContBB); 1468 } 1469 1470 SI->addCase(Builder.getInt32(0), BBs[0]); 1471 SI->addCase(Builder.getInt32(1), BBs[1]); 1472 SI->addCase(Builder.getInt32(2), BBs[1]); 1473 SI->addCase(Builder.getInt32(3), BBs[2]); 1474 SI->addCase(Builder.getInt32(4), BBs[3]); 1475 SI->addCase(Builder.getInt32(5), BBs[4]); 1476 1477 Builder.SetInsertPoint(ContBB); 1478 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1479 } 1480 1481 case Builtin::BI__atomic_clear: { 1482 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1483 bool Volatile = 1484 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1485 1486 Address Ptr = EmitPointerWithAlignment(E->getArg(0)); 1487 unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace(); 1488 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1489 Value *NewVal = Builder.getInt8(0); 1490 Value *Order = EmitScalarExpr(E->getArg(1)); 1491 if (isa<llvm::ConstantInt>(Order)) { 1492 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1493 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1494 switch (ord) { 1495 case 0: // memory_order_relaxed 1496 default: // invalid order 1497 Store->setOrdering(llvm::AtomicOrdering::Monotonic); 1498 break; 1499 case 3: // memory_order_release 1500 Store->setOrdering(llvm::AtomicOrdering::Release); 1501 break; 1502 case 5: // memory_order_seq_cst 1503 Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent); 1504 break; 1505 } 1506 return RValue::get(nullptr); 1507 } 1508 1509 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1510 1511 llvm::BasicBlock *BBs[3] = { 1512 createBasicBlock("monotonic", CurFn), 1513 createBasicBlock("release", CurFn), 1514 createBasicBlock("seqcst", CurFn) 1515 }; 1516 llvm::AtomicOrdering Orders[3] = { 1517 llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release, 1518 llvm::AtomicOrdering::SequentiallyConsistent}; 1519 1520 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1521 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1522 1523 for (unsigned i = 0; i < 3; ++i) { 1524 Builder.SetInsertPoint(BBs[i]); 1525 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1526 Store->setOrdering(Orders[i]); 1527 Builder.CreateBr(ContBB); 1528 } 1529 1530 SI->addCase(Builder.getInt32(0), BBs[0]); 1531 SI->addCase(Builder.getInt32(3), BBs[1]); 1532 SI->addCase(Builder.getInt32(5), BBs[2]); 1533 1534 Builder.SetInsertPoint(ContBB); 1535 return RValue::get(nullptr); 1536 } 1537 1538 case Builtin::BI__atomic_thread_fence: 1539 case Builtin::BI__atomic_signal_fence: 1540 case Builtin::BI__c11_atomic_thread_fence: 1541 case Builtin::BI__c11_atomic_signal_fence: { 1542 llvm::SynchronizationScope Scope; 1543 if (BuiltinID == Builtin::BI__atomic_signal_fence || 1544 BuiltinID == Builtin::BI__c11_atomic_signal_fence) 1545 Scope = llvm::SingleThread; 1546 else 1547 Scope = llvm::CrossThread; 1548 Value *Order = EmitScalarExpr(E->getArg(0)); 1549 if (isa<llvm::ConstantInt>(Order)) { 1550 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1551 switch (ord) { 1552 case 0: // memory_order_relaxed 1553 default: // invalid order 1554 break; 1555 case 1: // memory_order_consume 1556 case 2: // memory_order_acquire 1557 Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope); 1558 break; 1559 case 3: // memory_order_release 1560 Builder.CreateFence(llvm::AtomicOrdering::Release, Scope); 1561 break; 1562 case 4: // memory_order_acq_rel 1563 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope); 1564 break; 1565 case 5: // memory_order_seq_cst 1566 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, 1567 Scope); 1568 break; 1569 } 1570 return RValue::get(nullptr); 1571 } 1572 1573 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB; 1574 AcquireBB = createBasicBlock("acquire", CurFn); 1575 ReleaseBB = createBasicBlock("release", CurFn); 1576 AcqRelBB = createBasicBlock("acqrel", CurFn); 1577 SeqCstBB = createBasicBlock("seqcst", CurFn); 1578 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1579 1580 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1581 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); 1582 1583 Builder.SetInsertPoint(AcquireBB); 1584 Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope); 1585 Builder.CreateBr(ContBB); 1586 SI->addCase(Builder.getInt32(1), AcquireBB); 1587 SI->addCase(Builder.getInt32(2), AcquireBB); 1588 1589 Builder.SetInsertPoint(ReleaseBB); 1590 Builder.CreateFence(llvm::AtomicOrdering::Release, Scope); 1591 Builder.CreateBr(ContBB); 1592 SI->addCase(Builder.getInt32(3), ReleaseBB); 1593 1594 Builder.SetInsertPoint(AcqRelBB); 1595 Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope); 1596 Builder.CreateBr(ContBB); 1597 SI->addCase(Builder.getInt32(4), AcqRelBB); 1598 1599 Builder.SetInsertPoint(SeqCstBB); 1600 Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope); 1601 Builder.CreateBr(ContBB); 1602 SI->addCase(Builder.getInt32(5), SeqCstBB); 1603 1604 Builder.SetInsertPoint(ContBB); 1605 return RValue::get(nullptr); 1606 } 1607 1608 // Library functions with special handling. 1609 case Builtin::BIsqrt: 1610 case Builtin::BIsqrtf: 1611 case Builtin::BIsqrtl: { 1612 // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only 1613 // in finite- or unsafe-math mode (the intrinsic has different semantics 1614 // for handling negative numbers compared to the library function, so 1615 // -fmath-errno=0 is not enough). 1616 if (!FD->hasAttr<ConstAttr>()) 1617 break; 1618 if (!(CGM.getCodeGenOpts().UnsafeFPMath || 1619 CGM.getCodeGenOpts().NoNaNsFPMath)) 1620 break; 1621 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 1622 llvm::Type *ArgType = Arg0->getType(); 1623 Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType); 1624 return RValue::get(Builder.CreateCall(F, Arg0)); 1625 } 1626 1627 case Builtin::BI__builtin_pow: 1628 case Builtin::BI__builtin_powf: 1629 case Builtin::BI__builtin_powl: 1630 case Builtin::BIpow: 1631 case Builtin::BIpowf: 1632 case Builtin::BIpowl: { 1633 // Transform a call to pow* into a @llvm.pow.* intrinsic call. 1634 if (!FD->hasAttr<ConstAttr>()) 1635 break; 1636 Value *Base = EmitScalarExpr(E->getArg(0)); 1637 Value *Exponent = EmitScalarExpr(E->getArg(1)); 1638 llvm::Type *ArgType = Base->getType(); 1639 Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType); 1640 return RValue::get(Builder.CreateCall(F, {Base, Exponent})); 1641 } 1642 1643 case Builtin::BIfma: 1644 case Builtin::BIfmaf: 1645 case Builtin::BIfmal: 1646 case Builtin::BI__builtin_fma: 1647 case Builtin::BI__builtin_fmaf: 1648 case Builtin::BI__builtin_fmal: { 1649 // Rewrite fma to intrinsic. 1650 Value *FirstArg = EmitScalarExpr(E->getArg(0)); 1651 llvm::Type *ArgType = FirstArg->getType(); 1652 Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType); 1653 return RValue::get( 1654 Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)), 1655 EmitScalarExpr(E->getArg(2))})); 1656 } 1657 1658 case Builtin::BI__builtin_signbit: 1659 case Builtin::BI__builtin_signbitf: 1660 case Builtin::BI__builtin_signbitl: { 1661 return RValue::get( 1662 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))), 1663 ConvertType(E->getType()))); 1664 } 1665 case Builtin::BI__builtin_annotation: { 1666 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); 1667 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, 1668 AnnVal->getType()); 1669 1670 // Get the annotation string, go through casts. Sema requires this to be a 1671 // non-wide string literal, potentially casted, so the cast<> is safe. 1672 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts(); 1673 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString(); 1674 return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc())); 1675 } 1676 case Builtin::BI__builtin_addcb: 1677 case Builtin::BI__builtin_addcs: 1678 case Builtin::BI__builtin_addc: 1679 case Builtin::BI__builtin_addcl: 1680 case Builtin::BI__builtin_addcll: 1681 case Builtin::BI__builtin_subcb: 1682 case Builtin::BI__builtin_subcs: 1683 case Builtin::BI__builtin_subc: 1684 case Builtin::BI__builtin_subcl: 1685 case Builtin::BI__builtin_subcll: { 1686 1687 // We translate all of these builtins from expressions of the form: 1688 // int x = ..., y = ..., carryin = ..., carryout, result; 1689 // result = __builtin_addc(x, y, carryin, &carryout); 1690 // 1691 // to LLVM IR of the form: 1692 // 1693 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) 1694 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0 1695 // %carry1 = extractvalue {i32, i1} %tmp1, 1 1696 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1, 1697 // i32 %carryin) 1698 // %result = extractvalue {i32, i1} %tmp2, 0 1699 // %carry2 = extractvalue {i32, i1} %tmp2, 1 1700 // %tmp3 = or i1 %carry1, %carry2 1701 // %tmp4 = zext i1 %tmp3 to i32 1702 // store i32 %tmp4, i32* %carryout 1703 1704 // Scalarize our inputs. 1705 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 1706 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 1707 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2)); 1708 Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3)); 1709 1710 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow. 1711 llvm::Intrinsic::ID IntrinsicId; 1712 switch (BuiltinID) { 1713 default: llvm_unreachable("Unknown multiprecision builtin id."); 1714 case Builtin::BI__builtin_addcb: 1715 case Builtin::BI__builtin_addcs: 1716 case Builtin::BI__builtin_addc: 1717 case Builtin::BI__builtin_addcl: 1718 case Builtin::BI__builtin_addcll: 1719 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 1720 break; 1721 case Builtin::BI__builtin_subcb: 1722 case Builtin::BI__builtin_subcs: 1723 case Builtin::BI__builtin_subc: 1724 case Builtin::BI__builtin_subcl: 1725 case Builtin::BI__builtin_subcll: 1726 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 1727 break; 1728 } 1729 1730 // Construct our resulting LLVM IR expression. 1731 llvm::Value *Carry1; 1732 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId, 1733 X, Y, Carry1); 1734 llvm::Value *Carry2; 1735 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId, 1736 Sum1, Carryin, Carry2); 1737 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2), 1738 X->getType()); 1739 Builder.CreateStore(CarryOut, CarryOutPtr); 1740 return RValue::get(Sum2); 1741 } 1742 1743 case Builtin::BI__builtin_add_overflow: 1744 case Builtin::BI__builtin_sub_overflow: 1745 case Builtin::BI__builtin_mul_overflow: { 1746 const clang::Expr *LeftArg = E->getArg(0); 1747 const clang::Expr *RightArg = E->getArg(1); 1748 const clang::Expr *ResultArg = E->getArg(2); 1749 1750 clang::QualType ResultQTy = 1751 ResultArg->getType()->castAs<PointerType>()->getPointeeType(); 1752 1753 WidthAndSignedness LeftInfo = 1754 getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType()); 1755 WidthAndSignedness RightInfo = 1756 getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType()); 1757 WidthAndSignedness ResultInfo = 1758 getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy); 1759 WidthAndSignedness EncompassingInfo = 1760 EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo}); 1761 1762 llvm::Type *EncompassingLLVMTy = 1763 llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width); 1764 1765 llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy); 1766 1767 llvm::Intrinsic::ID IntrinsicId; 1768 switch (BuiltinID) { 1769 default: 1770 llvm_unreachable("Unknown overflow builtin id."); 1771 case Builtin::BI__builtin_add_overflow: 1772 IntrinsicId = EncompassingInfo.Signed 1773 ? llvm::Intrinsic::sadd_with_overflow 1774 : llvm::Intrinsic::uadd_with_overflow; 1775 break; 1776 case Builtin::BI__builtin_sub_overflow: 1777 IntrinsicId = EncompassingInfo.Signed 1778 ? llvm::Intrinsic::ssub_with_overflow 1779 : llvm::Intrinsic::usub_with_overflow; 1780 break; 1781 case Builtin::BI__builtin_mul_overflow: 1782 IntrinsicId = EncompassingInfo.Signed 1783 ? llvm::Intrinsic::smul_with_overflow 1784 : llvm::Intrinsic::umul_with_overflow; 1785 break; 1786 } 1787 1788 llvm::Value *Left = EmitScalarExpr(LeftArg); 1789 llvm::Value *Right = EmitScalarExpr(RightArg); 1790 Address ResultPtr = EmitPointerWithAlignment(ResultArg); 1791 1792 // Extend each operand to the encompassing type. 1793 Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed); 1794 Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed); 1795 1796 // Perform the operation on the extended values. 1797 llvm::Value *Overflow, *Result; 1798 Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow); 1799 1800 if (EncompassingInfo.Width > ResultInfo.Width) { 1801 // The encompassing type is wider than the result type, so we need to 1802 // truncate it. 1803 llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy); 1804 1805 // To see if the truncation caused an overflow, we will extend 1806 // the result and then compare it to the original result. 1807 llvm::Value *ResultTruncExt = Builder.CreateIntCast( 1808 ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed); 1809 llvm::Value *TruncationOverflow = 1810 Builder.CreateICmpNE(Result, ResultTruncExt); 1811 1812 Overflow = Builder.CreateOr(Overflow, TruncationOverflow); 1813 Result = ResultTrunc; 1814 } 1815 1816 // Finally, store the result using the pointer. 1817 bool isVolatile = 1818 ResultArg->getType()->getPointeeType().isVolatileQualified(); 1819 Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile); 1820 1821 return RValue::get(Overflow); 1822 } 1823 1824 case Builtin::BI__builtin_uadd_overflow: 1825 case Builtin::BI__builtin_uaddl_overflow: 1826 case Builtin::BI__builtin_uaddll_overflow: 1827 case Builtin::BI__builtin_usub_overflow: 1828 case Builtin::BI__builtin_usubl_overflow: 1829 case Builtin::BI__builtin_usubll_overflow: 1830 case Builtin::BI__builtin_umul_overflow: 1831 case Builtin::BI__builtin_umull_overflow: 1832 case Builtin::BI__builtin_umulll_overflow: 1833 case Builtin::BI__builtin_sadd_overflow: 1834 case Builtin::BI__builtin_saddl_overflow: 1835 case Builtin::BI__builtin_saddll_overflow: 1836 case Builtin::BI__builtin_ssub_overflow: 1837 case Builtin::BI__builtin_ssubl_overflow: 1838 case Builtin::BI__builtin_ssubll_overflow: 1839 case Builtin::BI__builtin_smul_overflow: 1840 case Builtin::BI__builtin_smull_overflow: 1841 case Builtin::BI__builtin_smulll_overflow: { 1842 1843 // We translate all of these builtins directly to the relevant llvm IR node. 1844 1845 // Scalarize our inputs. 1846 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 1847 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 1848 Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2)); 1849 1850 // Decide which of the overflow intrinsics we are lowering to: 1851 llvm::Intrinsic::ID IntrinsicId; 1852 switch (BuiltinID) { 1853 default: llvm_unreachable("Unknown overflow builtin id."); 1854 case Builtin::BI__builtin_uadd_overflow: 1855 case Builtin::BI__builtin_uaddl_overflow: 1856 case Builtin::BI__builtin_uaddll_overflow: 1857 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 1858 break; 1859 case Builtin::BI__builtin_usub_overflow: 1860 case Builtin::BI__builtin_usubl_overflow: 1861 case Builtin::BI__builtin_usubll_overflow: 1862 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 1863 break; 1864 case Builtin::BI__builtin_umul_overflow: 1865 case Builtin::BI__builtin_umull_overflow: 1866 case Builtin::BI__builtin_umulll_overflow: 1867 IntrinsicId = llvm::Intrinsic::umul_with_overflow; 1868 break; 1869 case Builtin::BI__builtin_sadd_overflow: 1870 case Builtin::BI__builtin_saddl_overflow: 1871 case Builtin::BI__builtin_saddll_overflow: 1872 IntrinsicId = llvm::Intrinsic::sadd_with_overflow; 1873 break; 1874 case Builtin::BI__builtin_ssub_overflow: 1875 case Builtin::BI__builtin_ssubl_overflow: 1876 case Builtin::BI__builtin_ssubll_overflow: 1877 IntrinsicId = llvm::Intrinsic::ssub_with_overflow; 1878 break; 1879 case Builtin::BI__builtin_smul_overflow: 1880 case Builtin::BI__builtin_smull_overflow: 1881 case Builtin::BI__builtin_smulll_overflow: 1882 IntrinsicId = llvm::Intrinsic::smul_with_overflow; 1883 break; 1884 } 1885 1886 1887 llvm::Value *Carry; 1888 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); 1889 Builder.CreateStore(Sum, SumOutPtr); 1890 1891 return RValue::get(Carry); 1892 } 1893 case Builtin::BI__builtin_addressof: 1894 return RValue::get(EmitLValue(E->getArg(0)).getPointer()); 1895 case Builtin::BI__builtin_operator_new: 1896 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 1897 E->getArg(0), false); 1898 case Builtin::BI__builtin_operator_delete: 1899 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 1900 E->getArg(0), true); 1901 case Builtin::BI__noop: 1902 // __noop always evaluates to an integer literal zero. 1903 return RValue::get(ConstantInt::get(IntTy, 0)); 1904 case Builtin::BI__builtin_call_with_static_chain: { 1905 const CallExpr *Call = cast<CallExpr>(E->getArg(0)); 1906 const Expr *Chain = E->getArg(1); 1907 return EmitCall(Call->getCallee()->getType(), 1908 EmitScalarExpr(Call->getCallee()), Call, ReturnValue, 1909 Call->getCalleeDecl(), EmitScalarExpr(Chain)); 1910 } 1911 case Builtin::BI_InterlockedExchange: 1912 case Builtin::BI_InterlockedExchangePointer: 1913 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1914 case Builtin::BI_InterlockedCompareExchangePointer: { 1915 llvm::Type *RTy; 1916 llvm::IntegerType *IntType = 1917 IntegerType::get(getLLVMContext(), 1918 getContext().getTypeSize(E->getType())); 1919 llvm::Type *IntPtrType = IntType->getPointerTo(); 1920 1921 llvm::Value *Destination = 1922 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType); 1923 1924 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1)); 1925 RTy = Exchange->getType(); 1926 Exchange = Builder.CreatePtrToInt(Exchange, IntType); 1927 1928 llvm::Value *Comparand = 1929 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType); 1930 1931 auto Result = 1932 Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, 1933 AtomicOrdering::SequentiallyConsistent, 1934 AtomicOrdering::SequentiallyConsistent); 1935 Result->setVolatile(true); 1936 1937 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result, 1938 0), 1939 RTy)); 1940 } 1941 case Builtin::BI_InterlockedCompareExchange: { 1942 AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg( 1943 EmitScalarExpr(E->getArg(0)), 1944 EmitScalarExpr(E->getArg(2)), 1945 EmitScalarExpr(E->getArg(1)), 1946 AtomicOrdering::SequentiallyConsistent, 1947 AtomicOrdering::SequentiallyConsistent); 1948 CXI->setVolatile(true); 1949 return RValue::get(Builder.CreateExtractValue(CXI, 0)); 1950 } 1951 case Builtin::BI_InterlockedIncrement: { 1952 llvm::Type *IntTy = ConvertType(E->getType()); 1953 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 1954 AtomicRMWInst::Add, 1955 EmitScalarExpr(E->getArg(0)), 1956 ConstantInt::get(IntTy, 1), 1957 llvm::AtomicOrdering::SequentiallyConsistent); 1958 RMWI->setVolatile(true); 1959 return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1))); 1960 } 1961 case Builtin::BI_InterlockedDecrement: { 1962 llvm::Type *IntTy = ConvertType(E->getType()); 1963 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 1964 AtomicRMWInst::Sub, 1965 EmitScalarExpr(E->getArg(0)), 1966 ConstantInt::get(IntTy, 1), 1967 llvm::AtomicOrdering::SequentiallyConsistent); 1968 RMWI->setVolatile(true); 1969 return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1))); 1970 } 1971 case Builtin::BI_InterlockedExchangeAdd: { 1972 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 1973 AtomicRMWInst::Add, 1974 EmitScalarExpr(E->getArg(0)), 1975 EmitScalarExpr(E->getArg(1)), 1976 llvm::AtomicOrdering::SequentiallyConsistent); 1977 RMWI->setVolatile(true); 1978 return RValue::get(RMWI); 1979 } 1980 case Builtin::BI__readfsdword: { 1981 llvm::Type *IntTy = ConvertType(E->getType()); 1982 Value *IntToPtr = 1983 Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), 1984 llvm::PointerType::get(IntTy, 257)); 1985 LoadInst *Load = 1986 Builder.CreateDefaultAlignedLoad(IntToPtr, /*isVolatile=*/true); 1987 return RValue::get(Load); 1988 } 1989 1990 case Builtin::BI__exception_code: 1991 case Builtin::BI_exception_code: 1992 return RValue::get(EmitSEHExceptionCode()); 1993 case Builtin::BI__exception_info: 1994 case Builtin::BI_exception_info: 1995 return RValue::get(EmitSEHExceptionInfo()); 1996 case Builtin::BI__abnormal_termination: 1997 case Builtin::BI_abnormal_termination: 1998 return RValue::get(EmitSEHAbnormalTermination()); 1999 case Builtin::BI_setjmpex: { 2000 if (getTarget().getTriple().isOSMSVCRT()) { 2001 llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; 2002 llvm::AttributeSet ReturnsTwiceAttr = 2003 AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex, 2004 llvm::Attribute::ReturnsTwice); 2005 llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction( 2006 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), 2007 "_setjmpex", ReturnsTwiceAttr); 2008 llvm::Value *Buf = Builder.CreateBitOrPointerCast( 2009 EmitScalarExpr(E->getArg(0)), Int8PtrTy); 2010 llvm::Value *FrameAddr = 2011 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 2012 ConstantInt::get(Int32Ty, 0)); 2013 llvm::Value *Args[] = {Buf, FrameAddr}; 2014 llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args); 2015 CS.setAttributes(ReturnsTwiceAttr); 2016 return RValue::get(CS.getInstruction()); 2017 } 2018 break; 2019 } 2020 case Builtin::BI_setjmp: { 2021 if (getTarget().getTriple().isOSMSVCRT()) { 2022 llvm::AttributeSet ReturnsTwiceAttr = 2023 AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex, 2024 llvm::Attribute::ReturnsTwice); 2025 llvm::Value *Buf = Builder.CreateBitOrPointerCast( 2026 EmitScalarExpr(E->getArg(0)), Int8PtrTy); 2027 llvm::CallSite CS; 2028 if (getTarget().getTriple().getArch() == llvm::Triple::x86) { 2029 llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy}; 2030 llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction( 2031 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true), 2032 "_setjmp3", ReturnsTwiceAttr); 2033 llvm::Value *Count = ConstantInt::get(IntTy, 0); 2034 llvm::Value *Args[] = {Buf, Count}; 2035 CS = EmitRuntimeCallOrInvoke(SetJmp3, Args); 2036 } else { 2037 llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; 2038 llvm::Constant *SetJmp = CGM.CreateRuntimeFunction( 2039 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), 2040 "_setjmp", ReturnsTwiceAttr); 2041 llvm::Value *FrameAddr = 2042 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 2043 ConstantInt::get(Int32Ty, 0)); 2044 llvm::Value *Args[] = {Buf, FrameAddr}; 2045 CS = EmitRuntimeCallOrInvoke(SetJmp, Args); 2046 } 2047 CS.setAttributes(ReturnsTwiceAttr); 2048 return RValue::get(CS.getInstruction()); 2049 } 2050 break; 2051 } 2052 2053 case Builtin::BI__GetExceptionInfo: { 2054 if (llvm::GlobalVariable *GV = 2055 CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType())) 2056 return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy)); 2057 break; 2058 } 2059 2060 // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions 2061 case Builtin::BIread_pipe: 2062 case Builtin::BIwrite_pipe: { 2063 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2064 *Arg1 = EmitScalarExpr(E->getArg(1)); 2065 2066 // Type of the generic packet parameter. 2067 unsigned GenericAS = 2068 getContext().getTargetAddressSpace(LangAS::opencl_generic); 2069 llvm::Type *I8PTy = llvm::PointerType::get( 2070 llvm::Type::getInt8Ty(getLLVMContext()), GenericAS); 2071 2072 // Testing which overloaded version we should generate the call for. 2073 if (2U == E->getNumArgs()) { 2074 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2" 2075 : "__write_pipe_2"; 2076 // Creating a generic function type to be able to call with any builtin or 2077 // user defined type. 2078 llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy}; 2079 llvm::FunctionType *FTy = llvm::FunctionType::get( 2080 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2081 Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy); 2082 return RValue::get(Builder.CreateCall( 2083 CGM.CreateRuntimeFunction(FTy, Name), {Arg0, BCast})); 2084 } else { 2085 assert(4 == E->getNumArgs() && 2086 "Illegal number of parameters to pipe function"); 2087 const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4" 2088 : "__write_pipe_4"; 2089 2090 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy}; 2091 Value *Arg2 = EmitScalarExpr(E->getArg(2)), 2092 *Arg3 = EmitScalarExpr(E->getArg(3)); 2093 llvm::FunctionType *FTy = llvm::FunctionType::get( 2094 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2095 Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy); 2096 // We know the third argument is an integer type, but we may need to cast 2097 // it to i32. 2098 if (Arg2->getType() != Int32Ty) 2099 Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty); 2100 return RValue::get(Builder.CreateCall( 2101 CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1, Arg2, BCast})); 2102 } 2103 } 2104 // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write 2105 // functions 2106 case Builtin::BIreserve_read_pipe: 2107 case Builtin::BIreserve_write_pipe: 2108 case Builtin::BIwork_group_reserve_read_pipe: 2109 case Builtin::BIwork_group_reserve_write_pipe: 2110 case Builtin::BIsub_group_reserve_read_pipe: 2111 case Builtin::BIsub_group_reserve_write_pipe: { 2112 // Composing the mangled name for the function. 2113 const char *Name; 2114 if (BuiltinID == Builtin::BIreserve_read_pipe) 2115 Name = "__reserve_read_pipe"; 2116 else if (BuiltinID == Builtin::BIreserve_write_pipe) 2117 Name = "__reserve_write_pipe"; 2118 else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe) 2119 Name = "__work_group_reserve_read_pipe"; 2120 else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe) 2121 Name = "__work_group_reserve_write_pipe"; 2122 else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe) 2123 Name = "__sub_group_reserve_read_pipe"; 2124 else 2125 Name = "__sub_group_reserve_write_pipe"; 2126 2127 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2128 *Arg1 = EmitScalarExpr(E->getArg(1)); 2129 llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy); 2130 2131 // Building the generic function prototype. 2132 llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty}; 2133 llvm::FunctionType *FTy = llvm::FunctionType::get( 2134 ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2135 // We know the second argument is an integer type, but we may need to cast 2136 // it to i32. 2137 if (Arg1->getType() != Int32Ty) 2138 Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty); 2139 return RValue::get( 2140 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1})); 2141 } 2142 // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write 2143 // functions 2144 case Builtin::BIcommit_read_pipe: 2145 case Builtin::BIcommit_write_pipe: 2146 case Builtin::BIwork_group_commit_read_pipe: 2147 case Builtin::BIwork_group_commit_write_pipe: 2148 case Builtin::BIsub_group_commit_read_pipe: 2149 case Builtin::BIsub_group_commit_write_pipe: { 2150 const char *Name; 2151 if (BuiltinID == Builtin::BIcommit_read_pipe) 2152 Name = "__commit_read_pipe"; 2153 else if (BuiltinID == Builtin::BIcommit_write_pipe) 2154 Name = "__commit_write_pipe"; 2155 else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe) 2156 Name = "__work_group_commit_read_pipe"; 2157 else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe) 2158 Name = "__work_group_commit_write_pipe"; 2159 else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe) 2160 Name = "__sub_group_commit_read_pipe"; 2161 else 2162 Name = "__sub_group_commit_write_pipe"; 2163 2164 Value *Arg0 = EmitScalarExpr(E->getArg(0)), 2165 *Arg1 = EmitScalarExpr(E->getArg(1)); 2166 2167 // Building the generic function prototype. 2168 llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType()}; 2169 llvm::FunctionType *FTy = 2170 llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()), 2171 llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2172 2173 return RValue::get( 2174 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1})); 2175 } 2176 // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions 2177 case Builtin::BIget_pipe_num_packets: 2178 case Builtin::BIget_pipe_max_packets: { 2179 const char *Name; 2180 if (BuiltinID == Builtin::BIget_pipe_num_packets) 2181 Name = "__get_pipe_num_packets"; 2182 else 2183 Name = "__get_pipe_max_packets"; 2184 2185 // Building the generic function prototype. 2186 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 2187 llvm::Type *ArgTys[] = {Arg0->getType()}; 2188 llvm::FunctionType *FTy = llvm::FunctionType::get( 2189 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2190 2191 return RValue::get( 2192 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0})); 2193 } 2194 2195 // OpenCL v2.0 s6.13.9 - Address space qualifier functions. 2196 case Builtin::BIto_global: 2197 case Builtin::BIto_local: 2198 case Builtin::BIto_private: { 2199 auto Arg0 = EmitScalarExpr(E->getArg(0)); 2200 auto NewArgT = llvm::PointerType::get(Int8Ty, 2201 CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic)); 2202 auto NewRetT = llvm::PointerType::get(Int8Ty, 2203 CGM.getContext().getTargetAddressSpace( 2204 E->getType()->getPointeeType().getAddressSpace())); 2205 auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false); 2206 llvm::Value *NewArg; 2207 if (Arg0->getType()->getPointerAddressSpace() != 2208 NewArgT->getPointerAddressSpace()) 2209 NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT); 2210 else 2211 NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT); 2212 auto NewCall = Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, 2213 E->getDirectCallee()->getName()), {NewArg}); 2214 return RValue::get(Builder.CreateBitOrPointerCast(NewCall, 2215 ConvertType(E->getType()))); 2216 } 2217 2218 // OpenCL v2.0, s6.13.17 - Enqueue kernel function. 2219 // It contains four different overload formats specified in Table 6.13.17.1. 2220 case Builtin::BIenqueue_kernel: { 2221 StringRef Name; // Generated function call name 2222 unsigned NumArgs = E->getNumArgs(); 2223 2224 llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy); 2225 llvm::Type *RangeTy = ConvertType(getContext().OCLNDRangeTy); 2226 2227 llvm::Value *Queue = EmitScalarExpr(E->getArg(0)); 2228 llvm::Value *Flags = EmitScalarExpr(E->getArg(1)); 2229 llvm::Value *Range = EmitScalarExpr(E->getArg(2)); 2230 2231 if (NumArgs == 4) { 2232 // The most basic form of the call with parameters: 2233 // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void) 2234 Name = "__enqueue_kernel_basic"; 2235 llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, Int8PtrTy}; 2236 llvm::FunctionType *FTy = llvm::FunctionType::get( 2237 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false); 2238 2239 llvm::Value *Block = 2240 Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy); 2241 2242 return RValue::get(Builder.CreateCall( 2243 CGM.CreateRuntimeFunction(FTy, Name), {Queue, Flags, Range, Block})); 2244 } 2245 assert(NumArgs >= 5 && "Invalid enqueue_kernel signature"); 2246 2247 // Could have events and/or vaargs. 2248 if (E->getArg(3)->getType()->isBlockPointerType()) { 2249 // No events passed, but has variadic arguments. 2250 Name = "__enqueue_kernel_vaargs"; 2251 llvm::Value *Block = 2252 Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy); 2253 // Create a vector of the arguments, as well as a constant value to 2254 // express to the runtime the number of variadic arguments. 2255 std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block, 2256 ConstantInt::get(IntTy, NumArgs - 4)}; 2257 std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, Int8PtrTy, 2258 IntTy}; 2259 2260 // Add the variadics. 2261 for (unsigned I = 4; I < NumArgs; ++I) { 2262 llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I)); 2263 unsigned TypeSizeInBytes = 2264 getContext() 2265 .getTypeSizeInChars(E->getArg(I)->getType()) 2266 .getQuantity(); 2267 Args.push_back(TypeSizeInBytes < 4 2268 ? Builder.CreateZExt(ArgSize, Int32Ty) 2269 : ArgSize); 2270 } 2271 2272 llvm::FunctionType *FTy = llvm::FunctionType::get( 2273 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true); 2274 return RValue::get( 2275 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2276 llvm::ArrayRef<llvm::Value *>(Args))); 2277 } 2278 // Any calls now have event arguments passed. 2279 if (NumArgs >= 7) { 2280 llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy); 2281 unsigned AS4 = 2282 E->getArg(4)->getType()->isArrayType() 2283 ? E->getArg(4)->getType().getAddressSpace() 2284 : E->getArg(4)->getType()->getPointeeType().getAddressSpace(); 2285 llvm::Type *EventPtrAS4Ty = 2286 EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS4)); 2287 unsigned AS5 = 2288 E->getArg(5)->getType()->getPointeeType().getAddressSpace(); 2289 llvm::Type *EventPtrAS5Ty = 2290 EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS5)); 2291 2292 llvm::Value *NumEvents = EmitScalarExpr(E->getArg(3)); 2293 llvm::Value *EventList = 2294 E->getArg(4)->getType()->isArrayType() 2295 ? EmitArrayToPointerDecay(E->getArg(4)).getPointer() 2296 : EmitScalarExpr(E->getArg(4)); 2297 llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5)); 2298 llvm::Value *Block = 2299 Builder.CreateBitCast(EmitScalarExpr(E->getArg(6)), Int8PtrTy); 2300 2301 std::vector<llvm::Type *> ArgTys = { 2302 QueueTy, Int32Ty, RangeTy, Int32Ty, 2303 EventPtrAS4Ty, EventPtrAS5Ty, Int8PtrTy}; 2304 std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents, 2305 EventList, ClkEvent, Block}; 2306 2307 if (NumArgs == 7) { 2308 // Has events but no variadics. 2309 Name = "__enqueue_kernel_basic_events"; 2310 llvm::FunctionType *FTy = llvm::FunctionType::get( 2311 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false); 2312 return RValue::get( 2313 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2314 llvm::ArrayRef<llvm::Value *>(Args))); 2315 } 2316 // Has event info and variadics 2317 // Pass the number of variadics to the runtime function too. 2318 Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7)); 2319 ArgTys.push_back(Int32Ty); 2320 Name = "__enqueue_kernel_events_vaargs"; 2321 2322 // Add the variadics. 2323 for (unsigned I = 7; I < NumArgs; ++I) { 2324 llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I)); 2325 unsigned TypeSizeInBytes = 2326 getContext() 2327 .getTypeSizeInChars(E->getArg(I)->getType()) 2328 .getQuantity(); 2329 Args.push_back(TypeSizeInBytes < 4 2330 ? Builder.CreateZExt(ArgSize, Int32Ty) 2331 : ArgSize); 2332 } 2333 llvm::FunctionType *FTy = llvm::FunctionType::get( 2334 Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true); 2335 return RValue::get( 2336 Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), 2337 llvm::ArrayRef<llvm::Value *>(Args))); 2338 } 2339 } 2340 // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block 2341 // parameter. 2342 case Builtin::BIget_kernel_work_group_size: { 2343 Value *Arg = EmitScalarExpr(E->getArg(0)); 2344 Arg = Builder.CreateBitCast(Arg, Int8PtrTy); 2345 return RValue::get( 2346 Builder.CreateCall(CGM.CreateRuntimeFunction( 2347 llvm::FunctionType::get(IntTy, Int8PtrTy, false), 2348 "__get_kernel_work_group_size_impl"), 2349 Arg)); 2350 } 2351 case Builtin::BIget_kernel_preferred_work_group_size_multiple: { 2352 Value *Arg = EmitScalarExpr(E->getArg(0)); 2353 Arg = Builder.CreateBitCast(Arg, Int8PtrTy); 2354 return RValue::get(Builder.CreateCall( 2355 CGM.CreateRuntimeFunction( 2356 llvm::FunctionType::get(IntTy, Int8PtrTy, false), 2357 "__get_kernel_preferred_work_group_multiple_impl"), 2358 Arg)); 2359 } 2360 case Builtin::BIprintf: 2361 if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) 2362 return EmitCUDADevicePrintfCallExpr(E, ReturnValue); 2363 break; 2364 case Builtin::BI__builtin_canonicalize: 2365 case Builtin::BI__builtin_canonicalizef: 2366 case Builtin::BI__builtin_canonicalizel: 2367 return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize)); 2368 2369 case Builtin::BI__builtin_thread_pointer: { 2370 if (!getContext().getTargetInfo().isTLSSupported()) 2371 CGM.ErrorUnsupported(E, "__builtin_thread_pointer"); 2372 // Fall through - it's already mapped to the intrinsic by GCCBuiltin. 2373 break; 2374 } 2375 } 2376 2377 // If this is an alias for a lib function (e.g. __builtin_sin), emit 2378 // the call using the normal call path, but using the unmangled 2379 // version of the function name. 2380 if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) 2381 return emitLibraryCall(*this, FD, E, 2382 CGM.getBuiltinLibFunction(FD, BuiltinID)); 2383 2384 // If this is a predefined lib function (e.g. malloc), emit the call 2385 // using exactly the normal call path. 2386 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) 2387 return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee())); 2388 2389 // Check that a call to a target specific builtin has the correct target 2390 // features. 2391 // This is down here to avoid non-target specific builtins, however, if 2392 // generic builtins start to require generic target features then we 2393 // can move this up to the beginning of the function. 2394 checkTargetFeatures(E, FD); 2395 2396 // See if we have a target specific intrinsic. 2397 const char *Name = getContext().BuiltinInfo.getName(BuiltinID); 2398 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; 2399 if (const char *Prefix = 2400 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch())) { 2401 IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name); 2402 // NOTE we dont need to perform a compatibility flag check here since the 2403 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the 2404 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier. 2405 if (IntrinsicID == Intrinsic::not_intrinsic) 2406 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix, Name); 2407 } 2408 2409 if (IntrinsicID != Intrinsic::not_intrinsic) { 2410 SmallVector<Value*, 16> Args; 2411 2412 // Find out if any arguments are required to be integer constant 2413 // expressions. 2414 unsigned ICEArguments = 0; 2415 ASTContext::GetBuiltinTypeError Error; 2416 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 2417 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 2418 2419 Function *F = CGM.getIntrinsic(IntrinsicID); 2420 llvm::FunctionType *FTy = F->getFunctionType(); 2421 2422 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { 2423 Value *ArgValue; 2424 // If this is a normal argument, just emit it as a scalar. 2425 if ((ICEArguments & (1 << i)) == 0) { 2426 ArgValue = EmitScalarExpr(E->getArg(i)); 2427 } else { 2428 // If this is required to be a constant, constant fold it so that we 2429 // know that the generated intrinsic gets a ConstantInt. 2430 llvm::APSInt Result; 2431 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext()); 2432 assert(IsConst && "Constant arg isn't actually constant?"); 2433 (void)IsConst; 2434 ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result); 2435 } 2436 2437 // If the intrinsic arg type is different from the builtin arg type 2438 // we need to do a bit cast. 2439 llvm::Type *PTy = FTy->getParamType(i); 2440 if (PTy != ArgValue->getType()) { 2441 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && 2442 "Must be able to losslessly bit cast to param"); 2443 ArgValue = Builder.CreateBitCast(ArgValue, PTy); 2444 } 2445 2446 Args.push_back(ArgValue); 2447 } 2448 2449 Value *V = Builder.CreateCall(F, Args); 2450 QualType BuiltinRetType = E->getType(); 2451 2452 llvm::Type *RetTy = VoidTy; 2453 if (!BuiltinRetType->isVoidType()) 2454 RetTy = ConvertType(BuiltinRetType); 2455 2456 if (RetTy != V->getType()) { 2457 assert(V->getType()->canLosslesslyBitCastTo(RetTy) && 2458 "Must be able to losslessly bit cast result type"); 2459 V = Builder.CreateBitCast(V, RetTy); 2460 } 2461 2462 return RValue::get(V); 2463 } 2464 2465 // See if we have a target specific builtin that needs to be lowered. 2466 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E)) 2467 return RValue::get(V); 2468 2469 ErrorUnsupported(E, "builtin function"); 2470 2471 // Unknown builtin, for now just dump it out and return undef. 2472 return GetUndefRValue(E->getType()); 2473 } 2474 2475 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, 2476 unsigned BuiltinID, const CallExpr *E, 2477 llvm::Triple::ArchType Arch) { 2478 switch (Arch) { 2479 case llvm::Triple::arm: 2480 case llvm::Triple::armeb: 2481 case llvm::Triple::thumb: 2482 case llvm::Triple::thumbeb: 2483 return CGF->EmitARMBuiltinExpr(BuiltinID, E); 2484 case llvm::Triple::aarch64: 2485 case llvm::Triple::aarch64_be: 2486 return CGF->EmitAArch64BuiltinExpr(BuiltinID, E); 2487 case llvm::Triple::x86: 2488 case llvm::Triple::x86_64: 2489 return CGF->EmitX86BuiltinExpr(BuiltinID, E); 2490 case llvm::Triple::ppc: 2491 case llvm::Triple::ppc64: 2492 case llvm::Triple::ppc64le: 2493 return CGF->EmitPPCBuiltinExpr(BuiltinID, E); 2494 case llvm::Triple::r600: 2495 case llvm::Triple::amdgcn: 2496 return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E); 2497 case llvm::Triple::systemz: 2498 return CGF->EmitSystemZBuiltinExpr(BuiltinID, E); 2499 case llvm::Triple::nvptx: 2500 case llvm::Triple::nvptx64: 2501 return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E); 2502 case llvm::Triple::wasm32: 2503 case llvm::Triple::wasm64: 2504 return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E); 2505 default: 2506 return nullptr; 2507 } 2508 } 2509 2510 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, 2511 const CallExpr *E) { 2512 if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) { 2513 assert(getContext().getAuxTargetInfo() && "Missing aux target info"); 2514 return EmitTargetArchBuiltinExpr( 2515 this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E, 2516 getContext().getAuxTargetInfo()->getTriple().getArch()); 2517 } 2518 2519 return EmitTargetArchBuiltinExpr(this, BuiltinID, E, 2520 getTarget().getTriple().getArch()); 2521 } 2522 2523 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, 2524 NeonTypeFlags TypeFlags, 2525 bool V1Ty=false) { 2526 int IsQuad = TypeFlags.isQuad(); 2527 switch (TypeFlags.getEltType()) { 2528 case NeonTypeFlags::Int8: 2529 case NeonTypeFlags::Poly8: 2530 return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); 2531 case NeonTypeFlags::Int16: 2532 case NeonTypeFlags::Poly16: 2533 case NeonTypeFlags::Float16: 2534 return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); 2535 case NeonTypeFlags::Int32: 2536 return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); 2537 case NeonTypeFlags::Int64: 2538 case NeonTypeFlags::Poly64: 2539 return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad)); 2540 case NeonTypeFlags::Poly128: 2541 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm. 2542 // There is a lot of i128 and f128 API missing. 2543 // so we use v16i8 to represent poly128 and get pattern matched. 2544 return llvm::VectorType::get(CGF->Int8Ty, 16); 2545 case NeonTypeFlags::Float32: 2546 return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad)); 2547 case NeonTypeFlags::Float64: 2548 return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad)); 2549 } 2550 llvm_unreachable("Unknown vector element type!"); 2551 } 2552 2553 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF, 2554 NeonTypeFlags IntTypeFlags) { 2555 int IsQuad = IntTypeFlags.isQuad(); 2556 switch (IntTypeFlags.getEltType()) { 2557 case NeonTypeFlags::Int32: 2558 return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad)); 2559 case NeonTypeFlags::Int64: 2560 return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad)); 2561 default: 2562 llvm_unreachable("Type can't be converted to floating-point!"); 2563 } 2564 } 2565 2566 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { 2567 unsigned nElts = V->getType()->getVectorNumElements(); 2568 Value* SV = llvm::ConstantVector::getSplat(nElts, C); 2569 return Builder.CreateShuffleVector(V, V, SV, "lane"); 2570 } 2571 2572 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, 2573 const char *name, 2574 unsigned shift, bool rightshift) { 2575 unsigned j = 0; 2576 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 2577 ai != ae; ++ai, ++j) 2578 if (shift > 0 && shift == j) 2579 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift); 2580 else 2581 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); 2582 2583 return Builder.CreateCall(F, Ops, name); 2584 } 2585 2586 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, 2587 bool neg) { 2588 int SV = cast<ConstantInt>(V)->getSExtValue(); 2589 return ConstantInt::get(Ty, neg ? -SV : SV); 2590 } 2591 2592 // \brief Right-shift a vector by a constant. 2593 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, 2594 llvm::Type *Ty, bool usgn, 2595 const char *name) { 2596 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 2597 2598 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue(); 2599 int EltSize = VTy->getScalarSizeInBits(); 2600 2601 Vec = Builder.CreateBitCast(Vec, Ty); 2602 2603 // lshr/ashr are undefined when the shift amount is equal to the vector 2604 // element size. 2605 if (ShiftAmt == EltSize) { 2606 if (usgn) { 2607 // Right-shifting an unsigned value by its size yields 0. 2608 return llvm::ConstantAggregateZero::get(VTy); 2609 } else { 2610 // Right-shifting a signed value by its size is equivalent 2611 // to a shift of size-1. 2612 --ShiftAmt; 2613 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt); 2614 } 2615 } 2616 2617 Shift = EmitNeonShiftVector(Shift, Ty, false); 2618 if (usgn) 2619 return Builder.CreateLShr(Vec, Shift, name); 2620 else 2621 return Builder.CreateAShr(Vec, Shift, name); 2622 } 2623 2624 enum { 2625 AddRetType = (1 << 0), 2626 Add1ArgType = (1 << 1), 2627 Add2ArgTypes = (1 << 2), 2628 2629 VectorizeRetType = (1 << 3), 2630 VectorizeArgTypes = (1 << 4), 2631 2632 InventFloatType = (1 << 5), 2633 UnsignedAlts = (1 << 6), 2634 2635 Use64BitVectors = (1 << 7), 2636 Use128BitVectors = (1 << 8), 2637 2638 Vectorize1ArgType = Add1ArgType | VectorizeArgTypes, 2639 VectorRet = AddRetType | VectorizeRetType, 2640 VectorRetGetArgs01 = 2641 AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes, 2642 FpCmpzModifiers = 2643 AddRetType | VectorizeRetType | Add1ArgType | InventFloatType 2644 }; 2645 2646 namespace { 2647 struct NeonIntrinsicInfo { 2648 const char *NameHint; 2649 unsigned BuiltinID; 2650 unsigned LLVMIntrinsic; 2651 unsigned AltLLVMIntrinsic; 2652 unsigned TypeModifier; 2653 2654 bool operator<(unsigned RHSBuiltinID) const { 2655 return BuiltinID < RHSBuiltinID; 2656 } 2657 bool operator<(const NeonIntrinsicInfo &TE) const { 2658 return BuiltinID < TE.BuiltinID; 2659 } 2660 }; 2661 } // end anonymous namespace 2662 2663 #define NEONMAP0(NameBase) \ 2664 { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 } 2665 2666 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ 2667 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ 2668 Intrinsic::LLVMIntrinsic, 0, TypeModifier } 2669 2670 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \ 2671 { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \ 2672 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ 2673 TypeModifier } 2674 2675 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { 2676 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 2677 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 2678 NEONMAP1(vabs_v, arm_neon_vabs, 0), 2679 NEONMAP1(vabsq_v, arm_neon_vabs, 0), 2680 NEONMAP0(vaddhn_v), 2681 NEONMAP1(vaesdq_v, arm_neon_aesd, 0), 2682 NEONMAP1(vaeseq_v, arm_neon_aese, 0), 2683 NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), 2684 NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0), 2685 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType), 2686 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), 2687 NEONMAP1(vcage_v, arm_neon_vacge, 0), 2688 NEONMAP1(vcageq_v, arm_neon_vacge, 0), 2689 NEONMAP1(vcagt_v, arm_neon_vacgt, 0), 2690 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0), 2691 NEONMAP1(vcale_v, arm_neon_vacge, 0), 2692 NEONMAP1(vcaleq_v, arm_neon_vacge, 0), 2693 NEONMAP1(vcalt_v, arm_neon_vacgt, 0), 2694 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0), 2695 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType), 2696 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType), 2697 NEONMAP1(vclz_v, ctlz, Add1ArgType), 2698 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 2699 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 2700 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 2701 NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0), 2702 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0), 2703 NEONMAP0(vcvt_f32_v), 2704 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 2705 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0), 2706 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0), 2707 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0), 2708 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0), 2709 NEONMAP0(vcvt_s32_v), 2710 NEONMAP0(vcvt_s64_v), 2711 NEONMAP0(vcvt_u32_v), 2712 NEONMAP0(vcvt_u64_v), 2713 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0), 2714 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0), 2715 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0), 2716 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0), 2717 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0), 2718 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0), 2719 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0), 2720 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0), 2721 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0), 2722 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0), 2723 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0), 2724 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0), 2725 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0), 2726 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0), 2727 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0), 2728 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0), 2729 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0), 2730 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0), 2731 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0), 2732 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0), 2733 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0), 2734 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0), 2735 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0), 2736 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0), 2737 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0), 2738 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0), 2739 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0), 2740 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0), 2741 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0), 2742 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0), 2743 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0), 2744 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0), 2745 NEONMAP0(vcvtq_f32_v), 2746 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 2747 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0), 2748 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0), 2749 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0), 2750 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0), 2751 NEONMAP0(vcvtq_s32_v), 2752 NEONMAP0(vcvtq_s64_v), 2753 NEONMAP0(vcvtq_u32_v), 2754 NEONMAP0(vcvtq_u64_v), 2755 NEONMAP0(vext_v), 2756 NEONMAP0(vextq_v), 2757 NEONMAP0(vfma_v), 2758 NEONMAP0(vfmaq_v), 2759 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 2760 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 2761 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 2762 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 2763 NEONMAP0(vld1_dup_v), 2764 NEONMAP1(vld1_v, arm_neon_vld1, 0), 2765 NEONMAP0(vld1q_dup_v), 2766 NEONMAP1(vld1q_v, arm_neon_vld1, 0), 2767 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0), 2768 NEONMAP1(vld2_v, arm_neon_vld2, 0), 2769 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0), 2770 NEONMAP1(vld2q_v, arm_neon_vld2, 0), 2771 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0), 2772 NEONMAP1(vld3_v, arm_neon_vld3, 0), 2773 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0), 2774 NEONMAP1(vld3q_v, arm_neon_vld3, 0), 2775 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0), 2776 NEONMAP1(vld4_v, arm_neon_vld4, 0), 2777 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0), 2778 NEONMAP1(vld4q_v, arm_neon_vld4, 0), 2779 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 2780 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType), 2781 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType), 2782 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 2783 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 2784 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType), 2785 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType), 2786 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 2787 NEONMAP0(vmovl_v), 2788 NEONMAP0(vmovn_v), 2789 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType), 2790 NEONMAP0(vmull_v), 2791 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType), 2792 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 2793 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 2794 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType), 2795 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 2796 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 2797 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType), 2798 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts), 2799 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts), 2800 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType), 2801 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType), 2802 NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 2803 NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 2804 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0), 2805 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0), 2806 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType), 2807 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType), 2808 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType), 2809 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts), 2810 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType), 2811 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType), 2812 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType), 2813 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType), 2814 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType), 2815 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 2816 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 2817 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 2818 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 2819 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 2820 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 2821 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0), 2822 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0), 2823 NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 2824 NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 2825 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType), 2826 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 2827 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 2828 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType), 2829 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType), 2830 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 2831 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 2832 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType), 2833 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType), 2834 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType), 2835 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType), 2836 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType), 2837 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType), 2838 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType), 2839 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType), 2840 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType), 2841 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType), 2842 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType), 2843 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType), 2844 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 2845 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 2846 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 2847 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 2848 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 2849 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 2850 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType), 2851 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType), 2852 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType), 2853 NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0), 2854 NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0), 2855 NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0), 2856 NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0), 2857 NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0), 2858 NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0), 2859 NEONMAP0(vshl_n_v), 2860 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 2861 NEONMAP0(vshll_n_v), 2862 NEONMAP0(vshlq_n_v), 2863 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 2864 NEONMAP0(vshr_n_v), 2865 NEONMAP0(vshrn_n_v), 2866 NEONMAP0(vshrq_n_v), 2867 NEONMAP1(vst1_v, arm_neon_vst1, 0), 2868 NEONMAP1(vst1q_v, arm_neon_vst1, 0), 2869 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0), 2870 NEONMAP1(vst2_v, arm_neon_vst2, 0), 2871 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0), 2872 NEONMAP1(vst2q_v, arm_neon_vst2, 0), 2873 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0), 2874 NEONMAP1(vst3_v, arm_neon_vst3, 0), 2875 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0), 2876 NEONMAP1(vst3q_v, arm_neon_vst3, 0), 2877 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0), 2878 NEONMAP1(vst4_v, arm_neon_vst4, 0), 2879 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0), 2880 NEONMAP1(vst4q_v, arm_neon_vst4, 0), 2881 NEONMAP0(vsubhn_v), 2882 NEONMAP0(vtrn_v), 2883 NEONMAP0(vtrnq_v), 2884 NEONMAP0(vtst_v), 2885 NEONMAP0(vtstq_v), 2886 NEONMAP0(vuzp_v), 2887 NEONMAP0(vuzpq_v), 2888 NEONMAP0(vzip_v), 2889 NEONMAP0(vzipq_v) 2890 }; 2891 2892 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { 2893 NEONMAP1(vabs_v, aarch64_neon_abs, 0), 2894 NEONMAP1(vabsq_v, aarch64_neon_abs, 0), 2895 NEONMAP0(vaddhn_v), 2896 NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0), 2897 NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), 2898 NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), 2899 NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0), 2900 NEONMAP1(vcage_v, aarch64_neon_facge, 0), 2901 NEONMAP1(vcageq_v, aarch64_neon_facge, 0), 2902 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), 2903 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0), 2904 NEONMAP1(vcale_v, aarch64_neon_facge, 0), 2905 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0), 2906 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0), 2907 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0), 2908 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType), 2909 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType), 2910 NEONMAP1(vclz_v, ctlz, Add1ArgType), 2911 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 2912 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 2913 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 2914 NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0), 2915 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0), 2916 NEONMAP0(vcvt_f32_v), 2917 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 2918 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 2919 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 2920 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 2921 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 2922 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 2923 NEONMAP0(vcvtq_f32_v), 2924 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 2925 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 2926 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 2927 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 2928 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 2929 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 2930 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), 2931 NEONMAP0(vext_v), 2932 NEONMAP0(vextq_v), 2933 NEONMAP0(vfma_v), 2934 NEONMAP0(vfmaq_v), 2935 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 2936 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 2937 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 2938 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 2939 NEONMAP0(vmovl_v), 2940 NEONMAP0(vmovn_v), 2941 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType), 2942 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType), 2943 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType), 2944 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 2945 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 2946 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType), 2947 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType), 2948 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType), 2949 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 2950 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 2951 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0), 2952 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0), 2953 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType), 2954 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType), 2955 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType), 2956 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts), 2957 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType), 2958 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType), 2959 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType), 2960 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType), 2961 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType), 2962 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 2963 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 2964 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts), 2965 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 2966 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts), 2967 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 2968 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0), 2969 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0), 2970 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 2971 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 2972 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType), 2973 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 2974 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 2975 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType), 2976 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType), 2977 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 2978 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 2979 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 2980 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 2981 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 2982 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 2983 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 2984 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 2985 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType), 2986 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType), 2987 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType), 2988 NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0), 2989 NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0), 2990 NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0), 2991 NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0), 2992 NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0), 2993 NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0), 2994 NEONMAP0(vshl_n_v), 2995 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 2996 NEONMAP0(vshll_n_v), 2997 NEONMAP0(vshlq_n_v), 2998 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 2999 NEONMAP0(vshr_n_v), 3000 NEONMAP0(vshrn_n_v), 3001 NEONMAP0(vshrq_n_v), 3002 NEONMAP0(vsubhn_v), 3003 NEONMAP0(vtst_v), 3004 NEONMAP0(vtstq_v), 3005 }; 3006 3007 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = { 3008 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType), 3009 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType), 3010 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType), 3011 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 3012 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 3013 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 3014 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 3015 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 3016 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 3017 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3018 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 3019 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType), 3020 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 3021 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType), 3022 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3023 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3024 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 3025 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 3026 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 3027 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 3028 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 3029 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 3030 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 3031 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 3032 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 3033 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 3034 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 3035 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 3036 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 3037 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 3038 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 3039 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 3040 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 3041 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 3042 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 3043 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 3044 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 3045 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 3046 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 3047 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 3048 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 3049 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 3050 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 3051 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 3052 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 3053 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 3054 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 3055 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 3056 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0), 3057 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3058 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3059 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3060 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3061 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 3062 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 3063 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3064 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3065 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 3066 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 3067 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3068 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3069 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3070 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3071 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 3072 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 3073 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3074 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 3075 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 3076 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 3077 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0), 3078 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType), 3079 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType), 3080 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3081 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 3082 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3083 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 3084 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3085 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 3086 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3087 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 3088 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 3089 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 3090 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 3091 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType), 3092 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 3093 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType), 3094 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 3095 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 3096 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType), 3097 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType), 3098 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 3099 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 3100 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType), 3101 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType), 3102 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors), 3103 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType), 3104 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors), 3105 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0), 3106 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType), 3107 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType), 3108 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 3109 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 3110 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 3111 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 3112 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType), 3113 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 3114 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 3115 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 3116 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType), 3117 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 3118 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType), 3119 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors), 3120 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType), 3121 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 3122 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 3123 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType), 3124 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType), 3125 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 3126 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 3127 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType), 3128 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType), 3129 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType), 3130 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType), 3131 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 3132 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 3133 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 3134 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 3135 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType), 3136 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 3137 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 3138 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3139 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3140 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3141 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3142 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType), 3143 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType), 3144 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3145 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3146 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 3147 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 3148 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType), 3149 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType), 3150 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType), 3151 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType), 3152 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 3153 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 3154 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType), 3155 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType), 3156 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType), 3157 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 3158 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 3159 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 3160 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 3161 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType), 3162 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 3163 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 3164 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 3165 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 3166 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType), 3167 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType), 3168 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 3169 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 3170 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType), 3171 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType), 3172 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType), 3173 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType), 3174 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType), 3175 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType), 3176 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType), 3177 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType), 3178 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType), 3179 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType), 3180 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType), 3181 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType), 3182 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0), 3183 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0), 3184 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0), 3185 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0), 3186 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType), 3187 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType), 3188 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType), 3189 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType), 3190 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 3191 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType), 3192 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 3193 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType), 3194 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType), 3195 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType), 3196 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 3197 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType), 3198 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 3199 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType), 3200 }; 3201 3202 #undef NEONMAP0 3203 #undef NEONMAP1 3204 #undef NEONMAP2 3205 3206 static bool NEONSIMDIntrinsicsProvenSorted = false; 3207 3208 static bool AArch64SIMDIntrinsicsProvenSorted = false; 3209 static bool AArch64SISDIntrinsicsProvenSorted = false; 3210 3211 3212 static const NeonIntrinsicInfo * 3213 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap, 3214 unsigned BuiltinID, bool &MapProvenSorted) { 3215 3216 #ifndef NDEBUG 3217 if (!MapProvenSorted) { 3218 assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap))); 3219 MapProvenSorted = true; 3220 } 3221 #endif 3222 3223 const NeonIntrinsicInfo *Builtin = 3224 std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID); 3225 3226 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID) 3227 return Builtin; 3228 3229 return nullptr; 3230 } 3231 3232 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID, 3233 unsigned Modifier, 3234 llvm::Type *ArgType, 3235 const CallExpr *E) { 3236 int VectorSize = 0; 3237 if (Modifier & Use64BitVectors) 3238 VectorSize = 64; 3239 else if (Modifier & Use128BitVectors) 3240 VectorSize = 128; 3241 3242 // Return type. 3243 SmallVector<llvm::Type *, 3> Tys; 3244 if (Modifier & AddRetType) { 3245 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 3246 if (Modifier & VectorizeRetType) 3247 Ty = llvm::VectorType::get( 3248 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1); 3249 3250 Tys.push_back(Ty); 3251 } 3252 3253 // Arguments. 3254 if (Modifier & VectorizeArgTypes) { 3255 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1; 3256 ArgType = llvm::VectorType::get(ArgType, Elts); 3257 } 3258 3259 if (Modifier & (Add1ArgType | Add2ArgTypes)) 3260 Tys.push_back(ArgType); 3261 3262 if (Modifier & Add2ArgTypes) 3263 Tys.push_back(ArgType); 3264 3265 if (Modifier & InventFloatType) 3266 Tys.push_back(FloatTy); 3267 3268 return CGM.getIntrinsic(IntrinsicID, Tys); 3269 } 3270 3271 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, 3272 const NeonIntrinsicInfo &SISDInfo, 3273 SmallVectorImpl<Value *> &Ops, 3274 const CallExpr *E) { 3275 unsigned BuiltinID = SISDInfo.BuiltinID; 3276 unsigned int Int = SISDInfo.LLVMIntrinsic; 3277 unsigned Modifier = SISDInfo.TypeModifier; 3278 const char *s = SISDInfo.NameHint; 3279 3280 switch (BuiltinID) { 3281 case NEON::BI__builtin_neon_vcled_s64: 3282 case NEON::BI__builtin_neon_vcled_u64: 3283 case NEON::BI__builtin_neon_vcles_f32: 3284 case NEON::BI__builtin_neon_vcled_f64: 3285 case NEON::BI__builtin_neon_vcltd_s64: 3286 case NEON::BI__builtin_neon_vcltd_u64: 3287 case NEON::BI__builtin_neon_vclts_f32: 3288 case NEON::BI__builtin_neon_vcltd_f64: 3289 case NEON::BI__builtin_neon_vcales_f32: 3290 case NEON::BI__builtin_neon_vcaled_f64: 3291 case NEON::BI__builtin_neon_vcalts_f32: 3292 case NEON::BI__builtin_neon_vcaltd_f64: 3293 // Only one direction of comparisons actually exist, cmle is actually a cmge 3294 // with swapped operands. The table gives us the right intrinsic but we 3295 // still need to do the swap. 3296 std::swap(Ops[0], Ops[1]); 3297 break; 3298 } 3299 3300 assert(Int && "Generic code assumes a valid intrinsic"); 3301 3302 // Determine the type(s) of this overloaded AArch64 intrinsic. 3303 const Expr *Arg = E->getArg(0); 3304 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType()); 3305 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E); 3306 3307 int j = 0; 3308 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0); 3309 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 3310 ai != ae; ++ai, ++j) { 3311 llvm::Type *ArgTy = ai->getType(); 3312 if (Ops[j]->getType()->getPrimitiveSizeInBits() == 3313 ArgTy->getPrimitiveSizeInBits()) 3314 continue; 3315 3316 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy()); 3317 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate 3318 // it before inserting. 3319 Ops[j] = 3320 CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType()); 3321 Ops[j] = 3322 CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0); 3323 } 3324 3325 Value *Result = CGF.EmitNeonCall(F, Ops, s); 3326 llvm::Type *ResultType = CGF.ConvertType(E->getType()); 3327 if (ResultType->getPrimitiveSizeInBits() < 3328 Result->getType()->getPrimitiveSizeInBits()) 3329 return CGF.Builder.CreateExtractElement(Result, C0); 3330 3331 return CGF.Builder.CreateBitCast(Result, ResultType, s); 3332 } 3333 3334 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( 3335 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, 3336 const char *NameHint, unsigned Modifier, const CallExpr *E, 3337 SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) { 3338 // Get the last argument, which specifies the vector type. 3339 llvm::APSInt NeonTypeConst; 3340 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 3341 if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext())) 3342 return nullptr; 3343 3344 // Determine the type of this overloaded NEON intrinsic. 3345 NeonTypeFlags Type(NeonTypeConst.getZExtValue()); 3346 bool Usgn = Type.isUnsigned(); 3347 bool Quad = Type.isQuad(); 3348 3349 llvm::VectorType *VTy = GetNeonType(this, Type); 3350 llvm::Type *Ty = VTy; 3351 if (!Ty) 3352 return nullptr; 3353 3354 auto getAlignmentValue32 = [&](Address addr) -> Value* { 3355 return Builder.getInt32(addr.getAlignment().getQuantity()); 3356 }; 3357 3358 unsigned Int = LLVMIntrinsic; 3359 if ((Modifier & UnsignedAlts) && !Usgn) 3360 Int = AltLLVMIntrinsic; 3361 3362 switch (BuiltinID) { 3363 default: break; 3364 case NEON::BI__builtin_neon_vabs_v: 3365 case NEON::BI__builtin_neon_vabsq_v: 3366 if (VTy->getElementType()->isFloatingPointTy()) 3367 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); 3368 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); 3369 case NEON::BI__builtin_neon_vaddhn_v: { 3370 llvm::VectorType *SrcTy = 3371 llvm::VectorType::getExtendedElementVectorType(VTy); 3372 3373 // %sum = add <4 x i32> %lhs, %rhs 3374 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3375 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 3376 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn"); 3377 3378 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 3379 Constant *ShiftAmt = 3380 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); 3381 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn"); 3382 3383 // %res = trunc <4 x i32> %high to <4 x i16> 3384 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn"); 3385 } 3386 case NEON::BI__builtin_neon_vcale_v: 3387 case NEON::BI__builtin_neon_vcaleq_v: 3388 case NEON::BI__builtin_neon_vcalt_v: 3389 case NEON::BI__builtin_neon_vcaltq_v: 3390 std::swap(Ops[0], Ops[1]); 3391 case NEON::BI__builtin_neon_vcage_v: 3392 case NEON::BI__builtin_neon_vcageq_v: 3393 case NEON::BI__builtin_neon_vcagt_v: 3394 case NEON::BI__builtin_neon_vcagtq_v: { 3395 llvm::Type *VecFlt = llvm::VectorType::get( 3396 VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy, 3397 VTy->getNumElements()); 3398 llvm::Type *Tys[] = { VTy, VecFlt }; 3399 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 3400 return EmitNeonCall(F, Ops, NameHint); 3401 } 3402 case NEON::BI__builtin_neon_vclz_v: 3403 case NEON::BI__builtin_neon_vclzq_v: 3404 // We generate target-independent intrinsic, which needs a second argument 3405 // for whether or not clz of zero is undefined; on ARM it isn't. 3406 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef())); 3407 break; 3408 case NEON::BI__builtin_neon_vcvt_f32_v: 3409 case NEON::BI__builtin_neon_vcvtq_f32_v: 3410 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3411 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad)); 3412 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 3413 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 3414 case NEON::BI__builtin_neon_vcvt_n_f32_v: 3415 case NEON::BI__builtin_neon_vcvt_n_f64_v: 3416 case NEON::BI__builtin_neon_vcvtq_n_f32_v: 3417 case NEON::BI__builtin_neon_vcvtq_n_f64_v: { 3418 llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty }; 3419 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; 3420 Function *F = CGM.getIntrinsic(Int, Tys); 3421 return EmitNeonCall(F, Ops, "vcvt_n"); 3422 } 3423 case NEON::BI__builtin_neon_vcvt_n_s32_v: 3424 case NEON::BI__builtin_neon_vcvt_n_u32_v: 3425 case NEON::BI__builtin_neon_vcvt_n_s64_v: 3426 case NEON::BI__builtin_neon_vcvt_n_u64_v: 3427 case NEON::BI__builtin_neon_vcvtq_n_s32_v: 3428 case NEON::BI__builtin_neon_vcvtq_n_u32_v: 3429 case NEON::BI__builtin_neon_vcvtq_n_s64_v: 3430 case NEON::BI__builtin_neon_vcvtq_n_u64_v: { 3431 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 3432 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 3433 return EmitNeonCall(F, Ops, "vcvt_n"); 3434 } 3435 case NEON::BI__builtin_neon_vcvt_s32_v: 3436 case NEON::BI__builtin_neon_vcvt_u32_v: 3437 case NEON::BI__builtin_neon_vcvt_s64_v: 3438 case NEON::BI__builtin_neon_vcvt_u64_v: 3439 case NEON::BI__builtin_neon_vcvtq_s32_v: 3440 case NEON::BI__builtin_neon_vcvtq_u32_v: 3441 case NEON::BI__builtin_neon_vcvtq_s64_v: 3442 case NEON::BI__builtin_neon_vcvtq_u64_v: { 3443 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); 3444 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 3445 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); 3446 } 3447 case NEON::BI__builtin_neon_vcvta_s32_v: 3448 case NEON::BI__builtin_neon_vcvta_s64_v: 3449 case NEON::BI__builtin_neon_vcvta_u32_v: 3450 case NEON::BI__builtin_neon_vcvta_u64_v: 3451 case NEON::BI__builtin_neon_vcvtaq_s32_v: 3452 case NEON::BI__builtin_neon_vcvtaq_s64_v: 3453 case NEON::BI__builtin_neon_vcvtaq_u32_v: 3454 case NEON::BI__builtin_neon_vcvtaq_u64_v: 3455 case NEON::BI__builtin_neon_vcvtn_s32_v: 3456 case NEON::BI__builtin_neon_vcvtn_s64_v: 3457 case NEON::BI__builtin_neon_vcvtn_u32_v: 3458 case NEON::BI__builtin_neon_vcvtn_u64_v: 3459 case NEON::BI__builtin_neon_vcvtnq_s32_v: 3460 case NEON::BI__builtin_neon_vcvtnq_s64_v: 3461 case NEON::BI__builtin_neon_vcvtnq_u32_v: 3462 case NEON::BI__builtin_neon_vcvtnq_u64_v: 3463 case NEON::BI__builtin_neon_vcvtp_s32_v: 3464 case NEON::BI__builtin_neon_vcvtp_s64_v: 3465 case NEON::BI__builtin_neon_vcvtp_u32_v: 3466 case NEON::BI__builtin_neon_vcvtp_u64_v: 3467 case NEON::BI__builtin_neon_vcvtpq_s32_v: 3468 case NEON::BI__builtin_neon_vcvtpq_s64_v: 3469 case NEON::BI__builtin_neon_vcvtpq_u32_v: 3470 case NEON::BI__builtin_neon_vcvtpq_u64_v: 3471 case NEON::BI__builtin_neon_vcvtm_s32_v: 3472 case NEON::BI__builtin_neon_vcvtm_s64_v: 3473 case NEON::BI__builtin_neon_vcvtm_u32_v: 3474 case NEON::BI__builtin_neon_vcvtm_u64_v: 3475 case NEON::BI__builtin_neon_vcvtmq_s32_v: 3476 case NEON::BI__builtin_neon_vcvtmq_s64_v: 3477 case NEON::BI__builtin_neon_vcvtmq_u32_v: 3478 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 3479 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 3480 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); 3481 } 3482 case NEON::BI__builtin_neon_vext_v: 3483 case NEON::BI__builtin_neon_vextq_v: { 3484 int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); 3485 SmallVector<uint32_t, 16> Indices; 3486 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 3487 Indices.push_back(i+CV); 3488 3489 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3490 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3491 return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext"); 3492 } 3493 case NEON::BI__builtin_neon_vfma_v: 3494 case NEON::BI__builtin_neon_vfmaq_v: { 3495 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 3496 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3497 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3498 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3499 3500 // NEON intrinsic puts accumulator first, unlike the LLVM fma. 3501 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 3502 } 3503 case NEON::BI__builtin_neon_vld1_v: 3504 case NEON::BI__builtin_neon_vld1q_v: { 3505 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 3506 Ops.push_back(getAlignmentValue32(PtrOp0)); 3507 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1"); 3508 } 3509 case NEON::BI__builtin_neon_vld2_v: 3510 case NEON::BI__builtin_neon_vld2q_v: 3511 case NEON::BI__builtin_neon_vld3_v: 3512 case NEON::BI__builtin_neon_vld3q_v: 3513 case NEON::BI__builtin_neon_vld4_v: 3514 case NEON::BI__builtin_neon_vld4q_v: { 3515 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 3516 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 3517 Value *Align = getAlignmentValue32(PtrOp1); 3518 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint); 3519 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3520 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3521 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 3522 } 3523 case NEON::BI__builtin_neon_vld1_dup_v: 3524 case NEON::BI__builtin_neon_vld1q_dup_v: { 3525 Value *V = UndefValue::get(Ty); 3526 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 3527 PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty); 3528 LoadInst *Ld = Builder.CreateLoad(PtrOp0); 3529 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 3530 Ops[0] = Builder.CreateInsertElement(V, Ld, CI); 3531 return EmitNeonSplat(Ops[0], CI); 3532 } 3533 case NEON::BI__builtin_neon_vld2_lane_v: 3534 case NEON::BI__builtin_neon_vld2q_lane_v: 3535 case NEON::BI__builtin_neon_vld3_lane_v: 3536 case NEON::BI__builtin_neon_vld3q_lane_v: 3537 case NEON::BI__builtin_neon_vld4_lane_v: 3538 case NEON::BI__builtin_neon_vld4q_lane_v: { 3539 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 3540 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 3541 for (unsigned I = 2; I < Ops.size() - 1; ++I) 3542 Ops[I] = Builder.CreateBitCast(Ops[I], Ty); 3543 Ops.push_back(getAlignmentValue32(PtrOp1)); 3544 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint); 3545 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3546 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3547 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 3548 } 3549 case NEON::BI__builtin_neon_vmovl_v: { 3550 llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy); 3551 Ops[0] = Builder.CreateBitCast(Ops[0], DTy); 3552 if (Usgn) 3553 return Builder.CreateZExt(Ops[0], Ty, "vmovl"); 3554 return Builder.CreateSExt(Ops[0], Ty, "vmovl"); 3555 } 3556 case NEON::BI__builtin_neon_vmovn_v: { 3557 llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy); 3558 Ops[0] = Builder.CreateBitCast(Ops[0], QTy); 3559 return Builder.CreateTrunc(Ops[0], Ty, "vmovn"); 3560 } 3561 case NEON::BI__builtin_neon_vmull_v: 3562 // FIXME: the integer vmull operations could be emitted in terms of pure 3563 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of 3564 // hoisting the exts outside loops. Until global ISel comes along that can 3565 // see through such movement this leads to bad CodeGen. So we need an 3566 // intrinsic for now. 3567 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls; 3568 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int; 3569 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 3570 case NEON::BI__builtin_neon_vpadal_v: 3571 case NEON::BI__builtin_neon_vpadalq_v: { 3572 // The source operand type has twice as many elements of half the size. 3573 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 3574 llvm::Type *EltTy = 3575 llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 3576 llvm::Type *NarrowTy = 3577 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 3578 llvm::Type *Tys[2] = { Ty, NarrowTy }; 3579 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); 3580 } 3581 case NEON::BI__builtin_neon_vpaddl_v: 3582 case NEON::BI__builtin_neon_vpaddlq_v: { 3583 // The source operand type has twice as many elements of half the size. 3584 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 3585 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 3586 llvm::Type *NarrowTy = 3587 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 3588 llvm::Type *Tys[2] = { Ty, NarrowTy }; 3589 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl"); 3590 } 3591 case NEON::BI__builtin_neon_vqdmlal_v: 3592 case NEON::BI__builtin_neon_vqdmlsl_v: { 3593 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end()); 3594 Ops[1] = 3595 EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal"); 3596 Ops.resize(2); 3597 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint); 3598 } 3599 case NEON::BI__builtin_neon_vqshl_n_v: 3600 case NEON::BI__builtin_neon_vqshlq_n_v: 3601 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n", 3602 1, false); 3603 case NEON::BI__builtin_neon_vqshlu_n_v: 3604 case NEON::BI__builtin_neon_vqshluq_n_v: 3605 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n", 3606 1, false); 3607 case NEON::BI__builtin_neon_vrecpe_v: 3608 case NEON::BI__builtin_neon_vrecpeq_v: 3609 case NEON::BI__builtin_neon_vrsqrte_v: 3610 case NEON::BI__builtin_neon_vrsqrteq_v: 3611 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic; 3612 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); 3613 3614 case NEON::BI__builtin_neon_vrshr_n_v: 3615 case NEON::BI__builtin_neon_vrshrq_n_v: 3616 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 3617 1, true); 3618 case NEON::BI__builtin_neon_vshl_n_v: 3619 case NEON::BI__builtin_neon_vshlq_n_v: 3620 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); 3621 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], 3622 "vshl_n"); 3623 case NEON::BI__builtin_neon_vshll_n_v: { 3624 llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy); 3625 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3626 if (Usgn) 3627 Ops[0] = Builder.CreateZExt(Ops[0], VTy); 3628 else 3629 Ops[0] = Builder.CreateSExt(Ops[0], VTy); 3630 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false); 3631 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n"); 3632 } 3633 case NEON::BI__builtin_neon_vshrn_n_v: { 3634 llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy); 3635 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3636 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false); 3637 if (Usgn) 3638 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]); 3639 else 3640 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]); 3641 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n"); 3642 } 3643 case NEON::BI__builtin_neon_vshr_n_v: 3644 case NEON::BI__builtin_neon_vshrq_n_v: 3645 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n"); 3646 case NEON::BI__builtin_neon_vst1_v: 3647 case NEON::BI__builtin_neon_vst1q_v: 3648 case NEON::BI__builtin_neon_vst2_v: 3649 case NEON::BI__builtin_neon_vst2q_v: 3650 case NEON::BI__builtin_neon_vst3_v: 3651 case NEON::BI__builtin_neon_vst3q_v: 3652 case NEON::BI__builtin_neon_vst4_v: 3653 case NEON::BI__builtin_neon_vst4q_v: 3654 case NEON::BI__builtin_neon_vst2_lane_v: 3655 case NEON::BI__builtin_neon_vst2q_lane_v: 3656 case NEON::BI__builtin_neon_vst3_lane_v: 3657 case NEON::BI__builtin_neon_vst3q_lane_v: 3658 case NEON::BI__builtin_neon_vst4_lane_v: 3659 case NEON::BI__builtin_neon_vst4q_lane_v: { 3660 llvm::Type *Tys[] = {Int8PtrTy, Ty}; 3661 Ops.push_back(getAlignmentValue32(PtrOp0)); 3662 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); 3663 } 3664 case NEON::BI__builtin_neon_vsubhn_v: { 3665 llvm::VectorType *SrcTy = 3666 llvm::VectorType::getExtendedElementVectorType(VTy); 3667 3668 // %sum = add <4 x i32> %lhs, %rhs 3669 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3670 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 3671 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn"); 3672 3673 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 3674 Constant *ShiftAmt = 3675 ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2); 3676 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn"); 3677 3678 // %res = trunc <4 x i32> %high to <4 x i16> 3679 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn"); 3680 } 3681 case NEON::BI__builtin_neon_vtrn_v: 3682 case NEON::BI__builtin_neon_vtrnq_v: { 3683 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 3684 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3685 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3686 Value *SV = nullptr; 3687 3688 for (unsigned vi = 0; vi != 2; ++vi) { 3689 SmallVector<uint32_t, 16> Indices; 3690 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 3691 Indices.push_back(i+vi); 3692 Indices.push_back(i+e+vi); 3693 } 3694 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 3695 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); 3696 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 3697 } 3698 return SV; 3699 } 3700 case NEON::BI__builtin_neon_vtst_v: 3701 case NEON::BI__builtin_neon_vtstq_v: { 3702 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3703 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3704 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 3705 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 3706 ConstantAggregateZero::get(Ty)); 3707 return Builder.CreateSExt(Ops[0], Ty, "vtst"); 3708 } 3709 case NEON::BI__builtin_neon_vuzp_v: 3710 case NEON::BI__builtin_neon_vuzpq_v: { 3711 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 3712 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3713 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3714 Value *SV = nullptr; 3715 3716 for (unsigned vi = 0; vi != 2; ++vi) { 3717 SmallVector<uint32_t, 16> Indices; 3718 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 3719 Indices.push_back(2*i+vi); 3720 3721 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 3722 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); 3723 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 3724 } 3725 return SV; 3726 } 3727 case NEON::BI__builtin_neon_vzip_v: 3728 case NEON::BI__builtin_neon_vzipq_v: { 3729 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 3730 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3731 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3732 Value *SV = nullptr; 3733 3734 for (unsigned vi = 0; vi != 2; ++vi) { 3735 SmallVector<uint32_t, 16> Indices; 3736 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 3737 Indices.push_back((i + vi*e) >> 1); 3738 Indices.push_back(((i + vi*e) >> 1)+e); 3739 } 3740 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 3741 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); 3742 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 3743 } 3744 return SV; 3745 } 3746 } 3747 3748 assert(Int && "Expected valid intrinsic number"); 3749 3750 // Determine the type(s) of this overloaded AArch64 intrinsic. 3751 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E); 3752 3753 Value *Result = EmitNeonCall(F, Ops, NameHint); 3754 llvm::Type *ResultType = ConvertType(E->getType()); 3755 // AArch64 intrinsic one-element vector type cast to 3756 // scalar type expected by the builtin 3757 return Builder.CreateBitCast(Result, ResultType, NameHint); 3758 } 3759 3760 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr( 3761 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp, 3762 const CmpInst::Predicate Ip, const Twine &Name) { 3763 llvm::Type *OTy = Op->getType(); 3764 3765 // FIXME: this is utterly horrific. We should not be looking at previous 3766 // codegen context to find out what needs doing. Unfortunately TableGen 3767 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32 3768 // (etc). 3769 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op)) 3770 OTy = BI->getOperand(0)->getType(); 3771 3772 Op = Builder.CreateBitCast(Op, OTy); 3773 if (OTy->getScalarType()->isFloatingPointTy()) { 3774 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy)); 3775 } else { 3776 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy)); 3777 } 3778 return Builder.CreateSExt(Op, Ty, Name); 3779 } 3780 3781 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, 3782 Value *ExtOp, Value *IndexOp, 3783 llvm::Type *ResTy, unsigned IntID, 3784 const char *Name) { 3785 SmallVector<Value *, 2> TblOps; 3786 if (ExtOp) 3787 TblOps.push_back(ExtOp); 3788 3789 // Build a vector containing sequential number like (0, 1, 2, ..., 15) 3790 SmallVector<uint32_t, 16> Indices; 3791 llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType()); 3792 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { 3793 Indices.push_back(2*i); 3794 Indices.push_back(2*i+1); 3795 } 3796 3797 int PairPos = 0, End = Ops.size() - 1; 3798 while (PairPos < End) { 3799 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 3800 Ops[PairPos+1], Indices, 3801 Name)); 3802 PairPos += 2; 3803 } 3804 3805 // If there's an odd number of 64-bit lookup table, fill the high 64-bit 3806 // of the 128-bit lookup table with zero. 3807 if (PairPos == End) { 3808 Value *ZeroTbl = ConstantAggregateZero::get(TblTy); 3809 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 3810 ZeroTbl, Indices, Name)); 3811 } 3812 3813 Function *TblF; 3814 TblOps.push_back(IndexOp); 3815 TblF = CGF.CGM.getIntrinsic(IntID, ResTy); 3816 3817 return CGF.EmitNeonCall(TblF, TblOps, Name); 3818 } 3819 3820 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) { 3821 unsigned Value; 3822 switch (BuiltinID) { 3823 default: 3824 return nullptr; 3825 case ARM::BI__builtin_arm_nop: 3826 Value = 0; 3827 break; 3828 case ARM::BI__builtin_arm_yield: 3829 case ARM::BI__yield: 3830 Value = 1; 3831 break; 3832 case ARM::BI__builtin_arm_wfe: 3833 case ARM::BI__wfe: 3834 Value = 2; 3835 break; 3836 case ARM::BI__builtin_arm_wfi: 3837 case ARM::BI__wfi: 3838 Value = 3; 3839 break; 3840 case ARM::BI__builtin_arm_sev: 3841 case ARM::BI__sev: 3842 Value = 4; 3843 break; 3844 case ARM::BI__builtin_arm_sevl: 3845 case ARM::BI__sevl: 3846 Value = 5; 3847 break; 3848 } 3849 3850 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), 3851 llvm::ConstantInt::get(Int32Ty, Value)); 3852 } 3853 3854 // Generates the IR for the read/write special register builtin, 3855 // ValueType is the type of the value that is to be written or read, 3856 // RegisterType is the type of the register being written to or read from. 3857 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, 3858 const CallExpr *E, 3859 llvm::Type *RegisterType, 3860 llvm::Type *ValueType, 3861 bool IsRead, 3862 StringRef SysReg = "") { 3863 // write and register intrinsics only support 32 and 64 bit operations. 3864 assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64)) 3865 && "Unsupported size for register."); 3866 3867 CodeGen::CGBuilderTy &Builder = CGF.Builder; 3868 CodeGen::CodeGenModule &CGM = CGF.CGM; 3869 LLVMContext &Context = CGM.getLLVMContext(); 3870 3871 if (SysReg.empty()) { 3872 const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts(); 3873 SysReg = cast<StringLiteral>(SysRegStrExpr)->getString(); 3874 } 3875 3876 llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) }; 3877 llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops); 3878 llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName); 3879 3880 llvm::Type *Types[] = { RegisterType }; 3881 3882 bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32); 3883 assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64)) 3884 && "Can't fit 64-bit value in 32-bit register"); 3885 3886 if (IsRead) { 3887 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types); 3888 llvm::Value *Call = Builder.CreateCall(F, Metadata); 3889 3890 if (MixedTypes) 3891 // Read into 64 bit register and then truncate result to 32 bit. 3892 return Builder.CreateTrunc(Call, ValueType); 3893 3894 if (ValueType->isPointerTy()) 3895 // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*). 3896 return Builder.CreateIntToPtr(Call, ValueType); 3897 3898 return Call; 3899 } 3900 3901 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types); 3902 llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1)); 3903 if (MixedTypes) { 3904 // Extend 32 bit write value to 64 bit to pass to write. 3905 ArgValue = Builder.CreateZExt(ArgValue, RegisterType); 3906 return Builder.CreateCall(F, { Metadata, ArgValue }); 3907 } 3908 3909 if (ValueType->isPointerTy()) { 3910 // Have VoidPtrTy ArgValue but want to return an i32/i64. 3911 ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType); 3912 return Builder.CreateCall(F, { Metadata, ArgValue }); 3913 } 3914 3915 return Builder.CreateCall(F, { Metadata, ArgValue }); 3916 } 3917 3918 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra 3919 /// argument that specifies the vector type. 3920 static bool HasExtraNeonArgument(unsigned BuiltinID) { 3921 switch (BuiltinID) { 3922 default: break; 3923 case NEON::BI__builtin_neon_vget_lane_i8: 3924 case NEON::BI__builtin_neon_vget_lane_i16: 3925 case NEON::BI__builtin_neon_vget_lane_i32: 3926 case NEON::BI__builtin_neon_vget_lane_i64: 3927 case NEON::BI__builtin_neon_vget_lane_f32: 3928 case NEON::BI__builtin_neon_vgetq_lane_i8: 3929 case NEON::BI__builtin_neon_vgetq_lane_i16: 3930 case NEON::BI__builtin_neon_vgetq_lane_i32: 3931 case NEON::BI__builtin_neon_vgetq_lane_i64: 3932 case NEON::BI__builtin_neon_vgetq_lane_f32: 3933 case NEON::BI__builtin_neon_vset_lane_i8: 3934 case NEON::BI__builtin_neon_vset_lane_i16: 3935 case NEON::BI__builtin_neon_vset_lane_i32: 3936 case NEON::BI__builtin_neon_vset_lane_i64: 3937 case NEON::BI__builtin_neon_vset_lane_f32: 3938 case NEON::BI__builtin_neon_vsetq_lane_i8: 3939 case NEON::BI__builtin_neon_vsetq_lane_i16: 3940 case NEON::BI__builtin_neon_vsetq_lane_i32: 3941 case NEON::BI__builtin_neon_vsetq_lane_i64: 3942 case NEON::BI__builtin_neon_vsetq_lane_f32: 3943 case NEON::BI__builtin_neon_vsha1h_u32: 3944 case NEON::BI__builtin_neon_vsha1cq_u32: 3945 case NEON::BI__builtin_neon_vsha1pq_u32: 3946 case NEON::BI__builtin_neon_vsha1mq_u32: 3947 case ARM::BI_MoveToCoprocessor: 3948 case ARM::BI_MoveToCoprocessor2: 3949 return false; 3950 } 3951 return true; 3952 } 3953 3954 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, 3955 const CallExpr *E) { 3956 if (auto Hint = GetValueForARMHint(BuiltinID)) 3957 return Hint; 3958 3959 if (BuiltinID == ARM::BI__emit) { 3960 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb; 3961 llvm::FunctionType *FTy = 3962 llvm::FunctionType::get(VoidTy, /*Variadic=*/false); 3963 3964 APSInt Value; 3965 if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext())) 3966 llvm_unreachable("Sema will ensure that the parameter is constant"); 3967 3968 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue(); 3969 3970 llvm::InlineAsm *Emit = 3971 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "", 3972 /*SideEffects=*/true) 3973 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "", 3974 /*SideEffects=*/true); 3975 3976 return Builder.CreateCall(Emit); 3977 } 3978 3979 if (BuiltinID == ARM::BI__builtin_arm_dbg) { 3980 Value *Option = EmitScalarExpr(E->getArg(0)); 3981 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option); 3982 } 3983 3984 if (BuiltinID == ARM::BI__builtin_arm_prefetch) { 3985 Value *Address = EmitScalarExpr(E->getArg(0)); 3986 Value *RW = EmitScalarExpr(E->getArg(1)); 3987 Value *IsData = EmitScalarExpr(E->getArg(2)); 3988 3989 // Locality is not supported on ARM target 3990 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); 3991 3992 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 3993 return Builder.CreateCall(F, {Address, RW, Locality, IsData}); 3994 } 3995 3996 if (BuiltinID == ARM::BI__builtin_arm_rbit) { 3997 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_rbit), 3998 EmitScalarExpr(E->getArg(0)), 3999 "rbit"); 4000 } 4001 4002 if (BuiltinID == ARM::BI__clear_cache) { 4003 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 4004 const FunctionDecl *FD = E->getDirectCallee(); 4005 Value *Ops[2]; 4006 for (unsigned i = 0; i < 2; i++) 4007 Ops[i] = EmitScalarExpr(E->getArg(i)); 4008 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 4009 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 4010 StringRef Name = FD->getName(); 4011 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 4012 } 4013 4014 if (BuiltinID == ARM::BI__builtin_arm_mcrr || 4015 BuiltinID == ARM::BI__builtin_arm_mcrr2) { 4016 Function *F; 4017 4018 switch (BuiltinID) { 4019 default: llvm_unreachable("unexpected builtin"); 4020 case ARM::BI__builtin_arm_mcrr: 4021 F = CGM.getIntrinsic(Intrinsic::arm_mcrr); 4022 break; 4023 case ARM::BI__builtin_arm_mcrr2: 4024 F = CGM.getIntrinsic(Intrinsic::arm_mcrr2); 4025 break; 4026 } 4027 4028 // MCRR{2} instruction has 5 operands but 4029 // the intrinsic has 4 because Rt and Rt2 4030 // are represented as a single unsigned 64 4031 // bit integer in the intrinsic definition 4032 // but internally it's represented as 2 32 4033 // bit integers. 4034 4035 Value *Coproc = EmitScalarExpr(E->getArg(0)); 4036 Value *Opc1 = EmitScalarExpr(E->getArg(1)); 4037 Value *RtAndRt2 = EmitScalarExpr(E->getArg(2)); 4038 Value *CRm = EmitScalarExpr(E->getArg(3)); 4039 4040 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 4041 Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty); 4042 Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1); 4043 Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty); 4044 4045 return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm}); 4046 } 4047 4048 if (BuiltinID == ARM::BI__builtin_arm_mrrc || 4049 BuiltinID == ARM::BI__builtin_arm_mrrc2) { 4050 Function *F; 4051 4052 switch (BuiltinID) { 4053 default: llvm_unreachable("unexpected builtin"); 4054 case ARM::BI__builtin_arm_mrrc: 4055 F = CGM.getIntrinsic(Intrinsic::arm_mrrc); 4056 break; 4057 case ARM::BI__builtin_arm_mrrc2: 4058 F = CGM.getIntrinsic(Intrinsic::arm_mrrc2); 4059 break; 4060 } 4061 4062 Value *Coproc = EmitScalarExpr(E->getArg(0)); 4063 Value *Opc1 = EmitScalarExpr(E->getArg(1)); 4064 Value *CRm = EmitScalarExpr(E->getArg(2)); 4065 Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm}); 4066 4067 // Returns an unsigned 64 bit integer, represented 4068 // as two 32 bit integers. 4069 4070 Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1); 4071 Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0); 4072 Rt = Builder.CreateZExt(Rt, Int64Ty); 4073 Rt1 = Builder.CreateZExt(Rt1, Int64Ty); 4074 4075 Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32); 4076 RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true); 4077 RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1); 4078 4079 return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType())); 4080 } 4081 4082 if (BuiltinID == ARM::BI__builtin_arm_ldrexd || 4083 ((BuiltinID == ARM::BI__builtin_arm_ldrex || 4084 BuiltinID == ARM::BI__builtin_arm_ldaex) && 4085 getContext().getTypeSize(E->getType()) == 64) || 4086 BuiltinID == ARM::BI__ldrexd) { 4087 Function *F; 4088 4089 switch (BuiltinID) { 4090 default: llvm_unreachable("unexpected builtin"); 4091 case ARM::BI__builtin_arm_ldaex: 4092 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd); 4093 break; 4094 case ARM::BI__builtin_arm_ldrexd: 4095 case ARM::BI__builtin_arm_ldrex: 4096 case ARM::BI__ldrexd: 4097 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd); 4098 break; 4099 } 4100 4101 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 4102 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 4103 "ldrexd"); 4104 4105 Value *Val0 = Builder.CreateExtractValue(Val, 1); 4106 Value *Val1 = Builder.CreateExtractValue(Val, 0); 4107 Val0 = Builder.CreateZExt(Val0, Int64Ty); 4108 Val1 = Builder.CreateZExt(Val1, Int64Ty); 4109 4110 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32); 4111 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 4112 Val = Builder.CreateOr(Val, Val1); 4113 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 4114 } 4115 4116 if (BuiltinID == ARM::BI__builtin_arm_ldrex || 4117 BuiltinID == ARM::BI__builtin_arm_ldaex) { 4118 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 4119 4120 QualType Ty = E->getType(); 4121 llvm::Type *RealResTy = ConvertType(Ty); 4122 llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(), 4123 getContext().getTypeSize(Ty)); 4124 LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo()); 4125 4126 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex 4127 ? Intrinsic::arm_ldaex 4128 : Intrinsic::arm_ldrex, 4129 LoadAddr->getType()); 4130 Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); 4131 4132 if (RealResTy->isPointerTy()) 4133 return Builder.CreateIntToPtr(Val, RealResTy); 4134 else { 4135 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 4136 return Builder.CreateBitCast(Val, RealResTy); 4137 } 4138 } 4139 4140 if (BuiltinID == ARM::BI__builtin_arm_strexd || 4141 ((BuiltinID == ARM::BI__builtin_arm_stlex || 4142 BuiltinID == ARM::BI__builtin_arm_strex) && 4143 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) { 4144 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 4145 ? Intrinsic::arm_stlexd 4146 : Intrinsic::arm_strexd); 4147 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr); 4148 4149 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 4150 Value *Val = EmitScalarExpr(E->getArg(0)); 4151 Builder.CreateStore(Val, Tmp); 4152 4153 Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); 4154 Val = Builder.CreateLoad(LdPtr); 4155 4156 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 4157 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 4158 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy); 4159 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd"); 4160 } 4161 4162 if (BuiltinID == ARM::BI__builtin_arm_strex || 4163 BuiltinID == ARM::BI__builtin_arm_stlex) { 4164 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 4165 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 4166 4167 QualType Ty = E->getArg(0)->getType(); 4168 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 4169 getContext().getTypeSize(Ty)); 4170 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 4171 4172 if (StoreVal->getType()->isPointerTy()) 4173 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty); 4174 else { 4175 StoreVal = Builder.CreateBitCast(StoreVal, StoreTy); 4176 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty); 4177 } 4178 4179 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 4180 ? Intrinsic::arm_stlex 4181 : Intrinsic::arm_strex, 4182 StoreAddr->getType()); 4183 return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex"); 4184 } 4185 4186 if (BuiltinID == ARM::BI__builtin_arm_clrex) { 4187 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); 4188 return Builder.CreateCall(F); 4189 } 4190 4191 // CRC32 4192 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 4193 switch (BuiltinID) { 4194 case ARM::BI__builtin_arm_crc32b: 4195 CRCIntrinsicID = Intrinsic::arm_crc32b; break; 4196 case ARM::BI__builtin_arm_crc32cb: 4197 CRCIntrinsicID = Intrinsic::arm_crc32cb; break; 4198 case ARM::BI__builtin_arm_crc32h: 4199 CRCIntrinsicID = Intrinsic::arm_crc32h; break; 4200 case ARM::BI__builtin_arm_crc32ch: 4201 CRCIntrinsicID = Intrinsic::arm_crc32ch; break; 4202 case ARM::BI__builtin_arm_crc32w: 4203 case ARM::BI__builtin_arm_crc32d: 4204 CRCIntrinsicID = Intrinsic::arm_crc32w; break; 4205 case ARM::BI__builtin_arm_crc32cw: 4206 case ARM::BI__builtin_arm_crc32cd: 4207 CRCIntrinsicID = Intrinsic::arm_crc32cw; break; 4208 } 4209 4210 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 4211 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 4212 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 4213 4214 // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w 4215 // intrinsics, hence we need different codegen for these cases. 4216 if (BuiltinID == ARM::BI__builtin_arm_crc32d || 4217 BuiltinID == ARM::BI__builtin_arm_crc32cd) { 4218 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 4219 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty); 4220 Value *Arg1b = Builder.CreateLShr(Arg1, C1); 4221 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty); 4222 4223 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 4224 Value *Res = Builder.CreateCall(F, {Arg0, Arg1a}); 4225 return Builder.CreateCall(F, {Res, Arg1b}); 4226 } else { 4227 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty); 4228 4229 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 4230 return Builder.CreateCall(F, {Arg0, Arg1}); 4231 } 4232 } 4233 4234 if (BuiltinID == ARM::BI__builtin_arm_rsr || 4235 BuiltinID == ARM::BI__builtin_arm_rsr64 || 4236 BuiltinID == ARM::BI__builtin_arm_rsrp || 4237 BuiltinID == ARM::BI__builtin_arm_wsr || 4238 BuiltinID == ARM::BI__builtin_arm_wsr64 || 4239 BuiltinID == ARM::BI__builtin_arm_wsrp) { 4240 4241 bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr || 4242 BuiltinID == ARM::BI__builtin_arm_rsr64 || 4243 BuiltinID == ARM::BI__builtin_arm_rsrp; 4244 4245 bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp || 4246 BuiltinID == ARM::BI__builtin_arm_wsrp; 4247 4248 bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 || 4249 BuiltinID == ARM::BI__builtin_arm_wsr64; 4250 4251 llvm::Type *ValueType; 4252 llvm::Type *RegisterType; 4253 if (IsPointerBuiltin) { 4254 ValueType = VoidPtrTy; 4255 RegisterType = Int32Ty; 4256 } else if (Is64Bit) { 4257 ValueType = RegisterType = Int64Ty; 4258 } else { 4259 ValueType = RegisterType = Int32Ty; 4260 } 4261 4262 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); 4263 } 4264 4265 // Find out if any arguments are required to be integer constant 4266 // expressions. 4267 unsigned ICEArguments = 0; 4268 ASTContext::GetBuiltinTypeError Error; 4269 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 4270 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 4271 4272 auto getAlignmentValue32 = [&](Address addr) -> Value* { 4273 return Builder.getInt32(addr.getAlignment().getQuantity()); 4274 }; 4275 4276 Address PtrOp0 = Address::invalid(); 4277 Address PtrOp1 = Address::invalid(); 4278 SmallVector<Value*, 4> Ops; 4279 bool HasExtraArg = HasExtraNeonArgument(BuiltinID); 4280 unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0); 4281 for (unsigned i = 0, e = NumArgs; i != e; i++) { 4282 if (i == 0) { 4283 switch (BuiltinID) { 4284 case NEON::BI__builtin_neon_vld1_v: 4285 case NEON::BI__builtin_neon_vld1q_v: 4286 case NEON::BI__builtin_neon_vld1q_lane_v: 4287 case NEON::BI__builtin_neon_vld1_lane_v: 4288 case NEON::BI__builtin_neon_vld1_dup_v: 4289 case NEON::BI__builtin_neon_vld1q_dup_v: 4290 case NEON::BI__builtin_neon_vst1_v: 4291 case NEON::BI__builtin_neon_vst1q_v: 4292 case NEON::BI__builtin_neon_vst1q_lane_v: 4293 case NEON::BI__builtin_neon_vst1_lane_v: 4294 case NEON::BI__builtin_neon_vst2_v: 4295 case NEON::BI__builtin_neon_vst2q_v: 4296 case NEON::BI__builtin_neon_vst2_lane_v: 4297 case NEON::BI__builtin_neon_vst2q_lane_v: 4298 case NEON::BI__builtin_neon_vst3_v: 4299 case NEON::BI__builtin_neon_vst3q_v: 4300 case NEON::BI__builtin_neon_vst3_lane_v: 4301 case NEON::BI__builtin_neon_vst3q_lane_v: 4302 case NEON::BI__builtin_neon_vst4_v: 4303 case NEON::BI__builtin_neon_vst4q_v: 4304 case NEON::BI__builtin_neon_vst4_lane_v: 4305 case NEON::BI__builtin_neon_vst4q_lane_v: 4306 // Get the alignment for the argument in addition to the value; 4307 // we'll use it later. 4308 PtrOp0 = EmitPointerWithAlignment(E->getArg(0)); 4309 Ops.push_back(PtrOp0.getPointer()); 4310 continue; 4311 } 4312 } 4313 if (i == 1) { 4314 switch (BuiltinID) { 4315 case NEON::BI__builtin_neon_vld2_v: 4316 case NEON::BI__builtin_neon_vld2q_v: 4317 case NEON::BI__builtin_neon_vld3_v: 4318 case NEON::BI__builtin_neon_vld3q_v: 4319 case NEON::BI__builtin_neon_vld4_v: 4320 case NEON::BI__builtin_neon_vld4q_v: 4321 case NEON::BI__builtin_neon_vld2_lane_v: 4322 case NEON::BI__builtin_neon_vld2q_lane_v: 4323 case NEON::BI__builtin_neon_vld3_lane_v: 4324 case NEON::BI__builtin_neon_vld3q_lane_v: 4325 case NEON::BI__builtin_neon_vld4_lane_v: 4326 case NEON::BI__builtin_neon_vld4q_lane_v: 4327 case NEON::BI__builtin_neon_vld2_dup_v: 4328 case NEON::BI__builtin_neon_vld3_dup_v: 4329 case NEON::BI__builtin_neon_vld4_dup_v: 4330 // Get the alignment for the argument in addition to the value; 4331 // we'll use it later. 4332 PtrOp1 = EmitPointerWithAlignment(E->getArg(1)); 4333 Ops.push_back(PtrOp1.getPointer()); 4334 continue; 4335 } 4336 } 4337 4338 if ((ICEArguments & (1 << i)) == 0) { 4339 Ops.push_back(EmitScalarExpr(E->getArg(i))); 4340 } else { 4341 // If this is required to be a constant, constant fold it so that we know 4342 // that the generated intrinsic gets a ConstantInt. 4343 llvm::APSInt Result; 4344 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 4345 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 4346 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 4347 } 4348 } 4349 4350 switch (BuiltinID) { 4351 default: break; 4352 4353 case NEON::BI__builtin_neon_vget_lane_i8: 4354 case NEON::BI__builtin_neon_vget_lane_i16: 4355 case NEON::BI__builtin_neon_vget_lane_i32: 4356 case NEON::BI__builtin_neon_vget_lane_i64: 4357 case NEON::BI__builtin_neon_vget_lane_f32: 4358 case NEON::BI__builtin_neon_vgetq_lane_i8: 4359 case NEON::BI__builtin_neon_vgetq_lane_i16: 4360 case NEON::BI__builtin_neon_vgetq_lane_i32: 4361 case NEON::BI__builtin_neon_vgetq_lane_i64: 4362 case NEON::BI__builtin_neon_vgetq_lane_f32: 4363 return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane"); 4364 4365 case NEON::BI__builtin_neon_vset_lane_i8: 4366 case NEON::BI__builtin_neon_vset_lane_i16: 4367 case NEON::BI__builtin_neon_vset_lane_i32: 4368 case NEON::BI__builtin_neon_vset_lane_i64: 4369 case NEON::BI__builtin_neon_vset_lane_f32: 4370 case NEON::BI__builtin_neon_vsetq_lane_i8: 4371 case NEON::BI__builtin_neon_vsetq_lane_i16: 4372 case NEON::BI__builtin_neon_vsetq_lane_i32: 4373 case NEON::BI__builtin_neon_vsetq_lane_i64: 4374 case NEON::BI__builtin_neon_vsetq_lane_f32: 4375 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 4376 4377 case NEON::BI__builtin_neon_vsha1h_u32: 4378 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops, 4379 "vsha1h"); 4380 case NEON::BI__builtin_neon_vsha1cq_u32: 4381 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops, 4382 "vsha1h"); 4383 case NEON::BI__builtin_neon_vsha1pq_u32: 4384 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops, 4385 "vsha1h"); 4386 case NEON::BI__builtin_neon_vsha1mq_u32: 4387 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops, 4388 "vsha1h"); 4389 4390 // The ARM _MoveToCoprocessor builtins put the input register value as 4391 // the first argument, but the LLVM intrinsic expects it as the third one. 4392 case ARM::BI_MoveToCoprocessor: 4393 case ARM::BI_MoveToCoprocessor2: { 4394 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ? 4395 Intrinsic::arm_mcr : Intrinsic::arm_mcr2); 4396 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0], 4397 Ops[3], Ops[4], Ops[5]}); 4398 } 4399 } 4400 4401 // Get the last argument, which specifies the vector type. 4402 assert(HasExtraArg); 4403 llvm::APSInt Result; 4404 const Expr *Arg = E->getArg(E->getNumArgs()-1); 4405 if (!Arg->isIntegerConstantExpr(Result, getContext())) 4406 return nullptr; 4407 4408 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f || 4409 BuiltinID == ARM::BI__builtin_arm_vcvtr_d) { 4410 // Determine the overloaded type of this builtin. 4411 llvm::Type *Ty; 4412 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f) 4413 Ty = FloatTy; 4414 else 4415 Ty = DoubleTy; 4416 4417 // Determine whether this is an unsigned conversion or not. 4418 bool usgn = Result.getZExtValue() == 1; 4419 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr; 4420 4421 // Call the appropriate intrinsic. 4422 Function *F = CGM.getIntrinsic(Int, Ty); 4423 return Builder.CreateCall(F, Ops, "vcvtr"); 4424 } 4425 4426 // Determine the type of this overloaded NEON intrinsic. 4427 NeonTypeFlags Type(Result.getZExtValue()); 4428 bool usgn = Type.isUnsigned(); 4429 bool rightShift = false; 4430 4431 llvm::VectorType *VTy = GetNeonType(this, Type); 4432 llvm::Type *Ty = VTy; 4433 if (!Ty) 4434 return nullptr; 4435 4436 // Many NEON builtins have identical semantics and uses in ARM and 4437 // AArch64. Emit these in a single function. 4438 auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap); 4439 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 4440 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted); 4441 if (Builtin) 4442 return EmitCommonNeonBuiltinExpr( 4443 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 4444 Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1); 4445 4446 unsigned Int; 4447 switch (BuiltinID) { 4448 default: return nullptr; 4449 case NEON::BI__builtin_neon_vld1q_lane_v: 4450 // Handle 64-bit integer elements as a special case. Use shuffles of 4451 // one-element vectors to avoid poor code for i64 in the backend. 4452 if (VTy->getElementType()->isIntegerTy(64)) { 4453 // Extract the other lane. 4454 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4455 uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); 4456 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); 4457 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 4458 // Load the value as a one-element vector. 4459 Ty = llvm::VectorType::get(VTy->getElementType(), 1); 4460 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4461 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys); 4462 Value *Align = getAlignmentValue32(PtrOp0); 4463 Value *Ld = Builder.CreateCall(F, {Ops[0], Align}); 4464 // Combine them. 4465 uint32_t Indices[] = {1 - Lane, Lane}; 4466 SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices); 4467 return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane"); 4468 } 4469 // fall through 4470 case NEON::BI__builtin_neon_vld1_lane_v: { 4471 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4472 PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType()); 4473 Value *Ld = Builder.CreateLoad(PtrOp0); 4474 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); 4475 } 4476 case NEON::BI__builtin_neon_vld2_dup_v: 4477 case NEON::BI__builtin_neon_vld3_dup_v: 4478 case NEON::BI__builtin_neon_vld4_dup_v: { 4479 // Handle 64-bit elements as a special-case. There is no "dup" needed. 4480 if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) { 4481 switch (BuiltinID) { 4482 case NEON::BI__builtin_neon_vld2_dup_v: 4483 Int = Intrinsic::arm_neon_vld2; 4484 break; 4485 case NEON::BI__builtin_neon_vld3_dup_v: 4486 Int = Intrinsic::arm_neon_vld3; 4487 break; 4488 case NEON::BI__builtin_neon_vld4_dup_v: 4489 Int = Intrinsic::arm_neon_vld4; 4490 break; 4491 default: llvm_unreachable("unknown vld_dup intrinsic?"); 4492 } 4493 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4494 Function *F = CGM.getIntrinsic(Int, Tys); 4495 llvm::Value *Align = getAlignmentValue32(PtrOp1); 4496 Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup"); 4497 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4498 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4499 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 4500 } 4501 switch (BuiltinID) { 4502 case NEON::BI__builtin_neon_vld2_dup_v: 4503 Int = Intrinsic::arm_neon_vld2lane; 4504 break; 4505 case NEON::BI__builtin_neon_vld3_dup_v: 4506 Int = Intrinsic::arm_neon_vld3lane; 4507 break; 4508 case NEON::BI__builtin_neon_vld4_dup_v: 4509 Int = Intrinsic::arm_neon_vld4lane; 4510 break; 4511 default: llvm_unreachable("unknown vld_dup intrinsic?"); 4512 } 4513 llvm::Type *Tys[] = {Ty, Int8PtrTy}; 4514 Function *F = CGM.getIntrinsic(Int, Tys); 4515 llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); 4516 4517 SmallVector<Value*, 6> Args; 4518 Args.push_back(Ops[1]); 4519 Args.append(STy->getNumElements(), UndefValue::get(Ty)); 4520 4521 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 4522 Args.push_back(CI); 4523 Args.push_back(getAlignmentValue32(PtrOp1)); 4524 4525 Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); 4526 // splat lane 0 to all elts in each vector of the result. 4527 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 4528 Value *Val = Builder.CreateExtractValue(Ops[1], i); 4529 Value *Elt = Builder.CreateBitCast(Val, Ty); 4530 Elt = EmitNeonSplat(Elt, CI); 4531 Elt = Builder.CreateBitCast(Elt, Val->getType()); 4532 Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); 4533 } 4534 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4535 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4536 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 4537 } 4538 case NEON::BI__builtin_neon_vqrshrn_n_v: 4539 Int = 4540 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns; 4541 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n", 4542 1, true); 4543 case NEON::BI__builtin_neon_vqrshrun_n_v: 4544 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty), 4545 Ops, "vqrshrun_n", 1, true); 4546 case NEON::BI__builtin_neon_vqshrn_n_v: 4547 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns; 4548 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n", 4549 1, true); 4550 case NEON::BI__builtin_neon_vqshrun_n_v: 4551 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty), 4552 Ops, "vqshrun_n", 1, true); 4553 case NEON::BI__builtin_neon_vrecpe_v: 4554 case NEON::BI__builtin_neon_vrecpeq_v: 4555 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty), 4556 Ops, "vrecpe"); 4557 case NEON::BI__builtin_neon_vrshrn_n_v: 4558 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty), 4559 Ops, "vrshrn_n", 1, true); 4560 case NEON::BI__builtin_neon_vrsra_n_v: 4561 case NEON::BI__builtin_neon_vrsraq_n_v: 4562 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4563 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4564 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true); 4565 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 4566 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]}); 4567 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); 4568 case NEON::BI__builtin_neon_vsri_n_v: 4569 case NEON::BI__builtin_neon_vsriq_n_v: 4570 rightShift = true; 4571 case NEON::BI__builtin_neon_vsli_n_v: 4572 case NEON::BI__builtin_neon_vsliq_n_v: 4573 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); 4574 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty), 4575 Ops, "vsli_n"); 4576 case NEON::BI__builtin_neon_vsra_n_v: 4577 case NEON::BI__builtin_neon_vsraq_n_v: 4578 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4579 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 4580 return Builder.CreateAdd(Ops[0], Ops[1]); 4581 case NEON::BI__builtin_neon_vst1q_lane_v: 4582 // Handle 64-bit integer elements as a special case. Use a shuffle to get 4583 // a one-element vector and avoid poor code for i64 in the backend. 4584 if (VTy->getElementType()->isIntegerTy(64)) { 4585 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4586 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2])); 4587 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 4588 Ops[2] = getAlignmentValue32(PtrOp0); 4589 llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()}; 4590 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, 4591 Tys), Ops); 4592 } 4593 // fall through 4594 case NEON::BI__builtin_neon_vst1_lane_v: { 4595 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4596 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 4597 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 4598 auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty)); 4599 return St; 4600 } 4601 case NEON::BI__builtin_neon_vtbl1_v: 4602 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), 4603 Ops, "vtbl1"); 4604 case NEON::BI__builtin_neon_vtbl2_v: 4605 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2), 4606 Ops, "vtbl2"); 4607 case NEON::BI__builtin_neon_vtbl3_v: 4608 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3), 4609 Ops, "vtbl3"); 4610 case NEON::BI__builtin_neon_vtbl4_v: 4611 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4), 4612 Ops, "vtbl4"); 4613 case NEON::BI__builtin_neon_vtbx1_v: 4614 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1), 4615 Ops, "vtbx1"); 4616 case NEON::BI__builtin_neon_vtbx2_v: 4617 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2), 4618 Ops, "vtbx2"); 4619 case NEON::BI__builtin_neon_vtbx3_v: 4620 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3), 4621 Ops, "vtbx3"); 4622 case NEON::BI__builtin_neon_vtbx4_v: 4623 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4), 4624 Ops, "vtbx4"); 4625 } 4626 } 4627 4628 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, 4629 const CallExpr *E, 4630 SmallVectorImpl<Value *> &Ops) { 4631 unsigned int Int = 0; 4632 const char *s = nullptr; 4633 4634 switch (BuiltinID) { 4635 default: 4636 return nullptr; 4637 case NEON::BI__builtin_neon_vtbl1_v: 4638 case NEON::BI__builtin_neon_vqtbl1_v: 4639 case NEON::BI__builtin_neon_vqtbl1q_v: 4640 case NEON::BI__builtin_neon_vtbl2_v: 4641 case NEON::BI__builtin_neon_vqtbl2_v: 4642 case NEON::BI__builtin_neon_vqtbl2q_v: 4643 case NEON::BI__builtin_neon_vtbl3_v: 4644 case NEON::BI__builtin_neon_vqtbl3_v: 4645 case NEON::BI__builtin_neon_vqtbl3q_v: 4646 case NEON::BI__builtin_neon_vtbl4_v: 4647 case NEON::BI__builtin_neon_vqtbl4_v: 4648 case NEON::BI__builtin_neon_vqtbl4q_v: 4649 break; 4650 case NEON::BI__builtin_neon_vtbx1_v: 4651 case NEON::BI__builtin_neon_vqtbx1_v: 4652 case NEON::BI__builtin_neon_vqtbx1q_v: 4653 case NEON::BI__builtin_neon_vtbx2_v: 4654 case NEON::BI__builtin_neon_vqtbx2_v: 4655 case NEON::BI__builtin_neon_vqtbx2q_v: 4656 case NEON::BI__builtin_neon_vtbx3_v: 4657 case NEON::BI__builtin_neon_vqtbx3_v: 4658 case NEON::BI__builtin_neon_vqtbx3q_v: 4659 case NEON::BI__builtin_neon_vtbx4_v: 4660 case NEON::BI__builtin_neon_vqtbx4_v: 4661 case NEON::BI__builtin_neon_vqtbx4q_v: 4662 break; 4663 } 4664 4665 assert(E->getNumArgs() >= 3); 4666 4667 // Get the last argument, which specifies the vector type. 4668 llvm::APSInt Result; 4669 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 4670 if (!Arg->isIntegerConstantExpr(Result, CGF.getContext())) 4671 return nullptr; 4672 4673 // Determine the type of this overloaded NEON intrinsic. 4674 NeonTypeFlags Type(Result.getZExtValue()); 4675 llvm::VectorType *Ty = GetNeonType(&CGF, Type); 4676 if (!Ty) 4677 return nullptr; 4678 4679 CodeGen::CGBuilderTy &Builder = CGF.Builder; 4680 4681 // AArch64 scalar builtins are not overloaded, they do not have an extra 4682 // argument that specifies the vector type, need to handle each case. 4683 switch (BuiltinID) { 4684 case NEON::BI__builtin_neon_vtbl1_v: { 4685 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr, 4686 Ops[1], Ty, Intrinsic::aarch64_neon_tbl1, 4687 "vtbl1"); 4688 } 4689 case NEON::BI__builtin_neon_vtbl2_v: { 4690 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr, 4691 Ops[2], Ty, Intrinsic::aarch64_neon_tbl1, 4692 "vtbl1"); 4693 } 4694 case NEON::BI__builtin_neon_vtbl3_v: { 4695 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr, 4696 Ops[3], Ty, Intrinsic::aarch64_neon_tbl2, 4697 "vtbl2"); 4698 } 4699 case NEON::BI__builtin_neon_vtbl4_v: { 4700 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr, 4701 Ops[4], Ty, Intrinsic::aarch64_neon_tbl2, 4702 "vtbl2"); 4703 } 4704 case NEON::BI__builtin_neon_vtbx1_v: { 4705 Value *TblRes = 4706 packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2], 4707 Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1"); 4708 4709 llvm::Constant *EightV = ConstantInt::get(Ty, 8); 4710 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); 4711 CmpRes = Builder.CreateSExt(CmpRes, Ty); 4712 4713 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 4714 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 4715 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 4716 } 4717 case NEON::BI__builtin_neon_vtbx2_v: { 4718 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0], 4719 Ops[3], Ty, Intrinsic::aarch64_neon_tbx1, 4720 "vtbx1"); 4721 } 4722 case NEON::BI__builtin_neon_vtbx3_v: { 4723 Value *TblRes = 4724 packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4], 4725 Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2"); 4726 4727 llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24); 4728 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], 4729 TwentyFourV); 4730 CmpRes = Builder.CreateSExt(CmpRes, Ty); 4731 4732 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 4733 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 4734 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 4735 } 4736 case NEON::BI__builtin_neon_vtbx4_v: { 4737 return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0], 4738 Ops[5], Ty, Intrinsic::aarch64_neon_tbx2, 4739 "vtbx2"); 4740 } 4741 case NEON::BI__builtin_neon_vqtbl1_v: 4742 case NEON::BI__builtin_neon_vqtbl1q_v: 4743 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break; 4744 case NEON::BI__builtin_neon_vqtbl2_v: 4745 case NEON::BI__builtin_neon_vqtbl2q_v: { 4746 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break; 4747 case NEON::BI__builtin_neon_vqtbl3_v: 4748 case NEON::BI__builtin_neon_vqtbl3q_v: 4749 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break; 4750 case NEON::BI__builtin_neon_vqtbl4_v: 4751 case NEON::BI__builtin_neon_vqtbl4q_v: 4752 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break; 4753 case NEON::BI__builtin_neon_vqtbx1_v: 4754 case NEON::BI__builtin_neon_vqtbx1q_v: 4755 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break; 4756 case NEON::BI__builtin_neon_vqtbx2_v: 4757 case NEON::BI__builtin_neon_vqtbx2q_v: 4758 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break; 4759 case NEON::BI__builtin_neon_vqtbx3_v: 4760 case NEON::BI__builtin_neon_vqtbx3q_v: 4761 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break; 4762 case NEON::BI__builtin_neon_vqtbx4_v: 4763 case NEON::BI__builtin_neon_vqtbx4q_v: 4764 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break; 4765 } 4766 } 4767 4768 if (!Int) 4769 return nullptr; 4770 4771 Function *F = CGF.CGM.getIntrinsic(Int, Ty); 4772 return CGF.EmitNeonCall(F, Ops, s); 4773 } 4774 4775 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) { 4776 llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4); 4777 Op = Builder.CreateBitCast(Op, Int16Ty); 4778 Value *V = UndefValue::get(VTy); 4779 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 4780 Op = Builder.CreateInsertElement(V, Op, CI); 4781 return Op; 4782 } 4783 4784 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, 4785 const CallExpr *E) { 4786 unsigned HintID = static_cast<unsigned>(-1); 4787 switch (BuiltinID) { 4788 default: break; 4789 case AArch64::BI__builtin_arm_nop: 4790 HintID = 0; 4791 break; 4792 case AArch64::BI__builtin_arm_yield: 4793 HintID = 1; 4794 break; 4795 case AArch64::BI__builtin_arm_wfe: 4796 HintID = 2; 4797 break; 4798 case AArch64::BI__builtin_arm_wfi: 4799 HintID = 3; 4800 break; 4801 case AArch64::BI__builtin_arm_sev: 4802 HintID = 4; 4803 break; 4804 case AArch64::BI__builtin_arm_sevl: 4805 HintID = 5; 4806 break; 4807 } 4808 4809 if (HintID != static_cast<unsigned>(-1)) { 4810 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint); 4811 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); 4812 } 4813 4814 if (BuiltinID == AArch64::BI__builtin_arm_prefetch) { 4815 Value *Address = EmitScalarExpr(E->getArg(0)); 4816 Value *RW = EmitScalarExpr(E->getArg(1)); 4817 Value *CacheLevel = EmitScalarExpr(E->getArg(2)); 4818 Value *RetentionPolicy = EmitScalarExpr(E->getArg(3)); 4819 Value *IsData = EmitScalarExpr(E->getArg(4)); 4820 4821 Value *Locality = nullptr; 4822 if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) { 4823 // Temporal fetch, needs to convert cache level to locality. 4824 Locality = llvm::ConstantInt::get(Int32Ty, 4825 -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3); 4826 } else { 4827 // Streaming fetch. 4828 Locality = llvm::ConstantInt::get(Int32Ty, 0); 4829 } 4830 4831 // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify 4832 // PLDL3STRM or PLDL2STRM. 4833 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 4834 return Builder.CreateCall(F, {Address, RW, Locality, IsData}); 4835 } 4836 4837 if (BuiltinID == AArch64::BI__builtin_arm_rbit) { 4838 assert((getContext().getTypeSize(E->getType()) == 32) && 4839 "rbit of unusual size!"); 4840 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 4841 return Builder.CreateCall( 4842 CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit"); 4843 } 4844 if (BuiltinID == AArch64::BI__builtin_arm_rbit64) { 4845 assert((getContext().getTypeSize(E->getType()) == 64) && 4846 "rbit of unusual size!"); 4847 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 4848 return Builder.CreateCall( 4849 CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit"); 4850 } 4851 4852 if (BuiltinID == AArch64::BI__clear_cache) { 4853 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 4854 const FunctionDecl *FD = E->getDirectCallee(); 4855 Value *Ops[2]; 4856 for (unsigned i = 0; i < 2; i++) 4857 Ops[i] = EmitScalarExpr(E->getArg(i)); 4858 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 4859 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 4860 StringRef Name = FD->getName(); 4861 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 4862 } 4863 4864 if ((BuiltinID == AArch64::BI__builtin_arm_ldrex || 4865 BuiltinID == AArch64::BI__builtin_arm_ldaex) && 4866 getContext().getTypeSize(E->getType()) == 128) { 4867 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 4868 ? Intrinsic::aarch64_ldaxp 4869 : Intrinsic::aarch64_ldxp); 4870 4871 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 4872 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 4873 "ldxp"); 4874 4875 Value *Val0 = Builder.CreateExtractValue(Val, 1); 4876 Value *Val1 = Builder.CreateExtractValue(Val, 0); 4877 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 4878 Val0 = Builder.CreateZExt(Val0, Int128Ty); 4879 Val1 = Builder.CreateZExt(Val1, Int128Ty); 4880 4881 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64); 4882 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 4883 Val = Builder.CreateOr(Val, Val1); 4884 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 4885 } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex || 4886 BuiltinID == AArch64::BI__builtin_arm_ldaex) { 4887 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 4888 4889 QualType Ty = E->getType(); 4890 llvm::Type *RealResTy = ConvertType(Ty); 4891 llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(), 4892 getContext().getTypeSize(Ty)); 4893 LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo()); 4894 4895 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 4896 ? Intrinsic::aarch64_ldaxr 4897 : Intrinsic::aarch64_ldxr, 4898 LoadAddr->getType()); 4899 Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr"); 4900 4901 if (RealResTy->isPointerTy()) 4902 return Builder.CreateIntToPtr(Val, RealResTy); 4903 4904 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 4905 return Builder.CreateBitCast(Val, RealResTy); 4906 } 4907 4908 if ((BuiltinID == AArch64::BI__builtin_arm_strex || 4909 BuiltinID == AArch64::BI__builtin_arm_stlex) && 4910 getContext().getTypeSize(E->getArg(0)->getType()) == 128) { 4911 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 4912 ? Intrinsic::aarch64_stlxp 4913 : Intrinsic::aarch64_stxp); 4914 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr); 4915 4916 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 4917 EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true); 4918 4919 Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy)); 4920 llvm::Value *Val = Builder.CreateLoad(Tmp); 4921 4922 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 4923 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 4924 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), 4925 Int8PtrTy); 4926 return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp"); 4927 } 4928 4929 if (BuiltinID == AArch64::BI__builtin_arm_strex || 4930 BuiltinID == AArch64::BI__builtin_arm_stlex) { 4931 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 4932 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 4933 4934 QualType Ty = E->getArg(0)->getType(); 4935 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 4936 getContext().getTypeSize(Ty)); 4937 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 4938 4939 if (StoreVal->getType()->isPointerTy()) 4940 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty); 4941 else { 4942 StoreVal = Builder.CreateBitCast(StoreVal, StoreTy); 4943 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty); 4944 } 4945 4946 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 4947 ? Intrinsic::aarch64_stlxr 4948 : Intrinsic::aarch64_stxr, 4949 StoreAddr->getType()); 4950 return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr"); 4951 } 4952 4953 if (BuiltinID == AArch64::BI__builtin_arm_clrex) { 4954 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex); 4955 return Builder.CreateCall(F); 4956 } 4957 4958 // CRC32 4959 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 4960 switch (BuiltinID) { 4961 case AArch64::BI__builtin_arm_crc32b: 4962 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break; 4963 case AArch64::BI__builtin_arm_crc32cb: 4964 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break; 4965 case AArch64::BI__builtin_arm_crc32h: 4966 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break; 4967 case AArch64::BI__builtin_arm_crc32ch: 4968 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break; 4969 case AArch64::BI__builtin_arm_crc32w: 4970 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break; 4971 case AArch64::BI__builtin_arm_crc32cw: 4972 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break; 4973 case AArch64::BI__builtin_arm_crc32d: 4974 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break; 4975 case AArch64::BI__builtin_arm_crc32cd: 4976 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break; 4977 } 4978 4979 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 4980 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 4981 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 4982 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 4983 4984 llvm::Type *DataTy = F->getFunctionType()->getParamType(1); 4985 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy); 4986 4987 return Builder.CreateCall(F, {Arg0, Arg1}); 4988 } 4989 4990 if (BuiltinID == AArch64::BI__builtin_arm_rsr || 4991 BuiltinID == AArch64::BI__builtin_arm_rsr64 || 4992 BuiltinID == AArch64::BI__builtin_arm_rsrp || 4993 BuiltinID == AArch64::BI__builtin_arm_wsr || 4994 BuiltinID == AArch64::BI__builtin_arm_wsr64 || 4995 BuiltinID == AArch64::BI__builtin_arm_wsrp) { 4996 4997 bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr || 4998 BuiltinID == AArch64::BI__builtin_arm_rsr64 || 4999 BuiltinID == AArch64::BI__builtin_arm_rsrp; 5000 5001 bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp || 5002 BuiltinID == AArch64::BI__builtin_arm_wsrp; 5003 5004 bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr && 5005 BuiltinID != AArch64::BI__builtin_arm_wsr; 5006 5007 llvm::Type *ValueType; 5008 llvm::Type *RegisterType = Int64Ty; 5009 if (IsPointerBuiltin) { 5010 ValueType = VoidPtrTy; 5011 } else if (Is64Bit) { 5012 ValueType = Int64Ty; 5013 } else { 5014 ValueType = Int32Ty; 5015 } 5016 5017 return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead); 5018 } 5019 5020 // Find out if any arguments are required to be integer constant 5021 // expressions. 5022 unsigned ICEArguments = 0; 5023 ASTContext::GetBuiltinTypeError Error; 5024 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 5025 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 5026 5027 llvm::SmallVector<Value*, 4> Ops; 5028 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { 5029 if ((ICEArguments & (1 << i)) == 0) { 5030 Ops.push_back(EmitScalarExpr(E->getArg(i))); 5031 } else { 5032 // If this is required to be a constant, constant fold it so that we know 5033 // that the generated intrinsic gets a ConstantInt. 5034 llvm::APSInt Result; 5035 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 5036 assert(IsConst && "Constant arg isn't actually constant?"); 5037 (void)IsConst; 5038 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 5039 } 5040 } 5041 5042 auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap); 5043 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 5044 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted); 5045 5046 if (Builtin) { 5047 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1))); 5048 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E); 5049 assert(Result && "SISD intrinsic should have been handled"); 5050 return Result; 5051 } 5052 5053 llvm::APSInt Result; 5054 const Expr *Arg = E->getArg(E->getNumArgs()-1); 5055 NeonTypeFlags Type(0); 5056 if (Arg->isIntegerConstantExpr(Result, getContext())) 5057 // Determine the type of this overloaded NEON intrinsic. 5058 Type = NeonTypeFlags(Result.getZExtValue()); 5059 5060 bool usgn = Type.isUnsigned(); 5061 bool quad = Type.isQuad(); 5062 5063 // Handle non-overloaded intrinsics first. 5064 switch (BuiltinID) { 5065 default: break; 5066 case NEON::BI__builtin_neon_vldrq_p128: { 5067 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); 5068 Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy); 5069 return Builder.CreateDefaultAlignedLoad(Ptr); 5070 } 5071 case NEON::BI__builtin_neon_vstrq_p128: { 5072 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); 5073 Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy); 5074 return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr); 5075 } 5076 case NEON::BI__builtin_neon_vcvts_u32_f32: 5077 case NEON::BI__builtin_neon_vcvtd_u64_f64: 5078 usgn = true; 5079 // FALL THROUGH 5080 case NEON::BI__builtin_neon_vcvts_s32_f32: 5081 case NEON::BI__builtin_neon_vcvtd_s64_f64: { 5082 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5083 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 5084 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 5085 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 5086 Ops[0] = Builder.CreateBitCast(Ops[0], FTy); 5087 if (usgn) 5088 return Builder.CreateFPToUI(Ops[0], InTy); 5089 return Builder.CreateFPToSI(Ops[0], InTy); 5090 } 5091 case NEON::BI__builtin_neon_vcvts_f32_u32: 5092 case NEON::BI__builtin_neon_vcvtd_f64_u64: 5093 usgn = true; 5094 // FALL THROUGH 5095 case NEON::BI__builtin_neon_vcvts_f32_s32: 5096 case NEON::BI__builtin_neon_vcvtd_f64_s64: { 5097 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5098 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 5099 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 5100 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 5101 Ops[0] = Builder.CreateBitCast(Ops[0], InTy); 5102 if (usgn) 5103 return Builder.CreateUIToFP(Ops[0], FTy); 5104 return Builder.CreateSIToFP(Ops[0], FTy); 5105 } 5106 case NEON::BI__builtin_neon_vpaddd_s64: { 5107 llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2); 5108 Value *Vec = EmitScalarExpr(E->getArg(0)); 5109 // The vector is v2f64, so make sure it's bitcast to that. 5110 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64"); 5111 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5112 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5113 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5114 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5115 // Pairwise addition of a v2f64 into a scalar f64. 5116 return Builder.CreateAdd(Op0, Op1, "vpaddd"); 5117 } 5118 case NEON::BI__builtin_neon_vpaddd_f64: { 5119 llvm::Type *Ty = 5120 llvm::VectorType::get(DoubleTy, 2); 5121 Value *Vec = EmitScalarExpr(E->getArg(0)); 5122 // The vector is v2f64, so make sure it's bitcast to that. 5123 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64"); 5124 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5125 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5126 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5127 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5128 // Pairwise addition of a v2f64 into a scalar f64. 5129 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 5130 } 5131 case NEON::BI__builtin_neon_vpadds_f32: { 5132 llvm::Type *Ty = 5133 llvm::VectorType::get(FloatTy, 2); 5134 Value *Vec = EmitScalarExpr(E->getArg(0)); 5135 // The vector is v2f32, so make sure it's bitcast to that. 5136 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32"); 5137 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 5138 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 5139 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 5140 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 5141 // Pairwise addition of a v2f32 into a scalar f32. 5142 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 5143 } 5144 case NEON::BI__builtin_neon_vceqzd_s64: 5145 case NEON::BI__builtin_neon_vceqzd_f64: 5146 case NEON::BI__builtin_neon_vceqzs_f32: 5147 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5148 return EmitAArch64CompareBuiltinExpr( 5149 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5150 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz"); 5151 case NEON::BI__builtin_neon_vcgezd_s64: 5152 case NEON::BI__builtin_neon_vcgezd_f64: 5153 case NEON::BI__builtin_neon_vcgezs_f32: 5154 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5155 return EmitAArch64CompareBuiltinExpr( 5156 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5157 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez"); 5158 case NEON::BI__builtin_neon_vclezd_s64: 5159 case NEON::BI__builtin_neon_vclezd_f64: 5160 case NEON::BI__builtin_neon_vclezs_f32: 5161 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5162 return EmitAArch64CompareBuiltinExpr( 5163 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5164 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez"); 5165 case NEON::BI__builtin_neon_vcgtzd_s64: 5166 case NEON::BI__builtin_neon_vcgtzd_f64: 5167 case NEON::BI__builtin_neon_vcgtzs_f32: 5168 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5169 return EmitAArch64CompareBuiltinExpr( 5170 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5171 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz"); 5172 case NEON::BI__builtin_neon_vcltzd_s64: 5173 case NEON::BI__builtin_neon_vcltzd_f64: 5174 case NEON::BI__builtin_neon_vcltzs_f32: 5175 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5176 return EmitAArch64CompareBuiltinExpr( 5177 Ops[0], ConvertType(E->getCallReturnType(getContext())), 5178 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz"); 5179 5180 case NEON::BI__builtin_neon_vceqzd_u64: { 5181 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5182 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 5183 Ops[0] = 5184 Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty)); 5185 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd"); 5186 } 5187 case NEON::BI__builtin_neon_vceqd_f64: 5188 case NEON::BI__builtin_neon_vcled_f64: 5189 case NEON::BI__builtin_neon_vcltd_f64: 5190 case NEON::BI__builtin_neon_vcged_f64: 5191 case NEON::BI__builtin_neon_vcgtd_f64: { 5192 llvm::CmpInst::Predicate P; 5193 switch (BuiltinID) { 5194 default: llvm_unreachable("missing builtin ID in switch!"); 5195 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break; 5196 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break; 5197 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break; 5198 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break; 5199 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break; 5200 } 5201 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5202 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 5203 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 5204 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 5205 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd"); 5206 } 5207 case NEON::BI__builtin_neon_vceqs_f32: 5208 case NEON::BI__builtin_neon_vcles_f32: 5209 case NEON::BI__builtin_neon_vclts_f32: 5210 case NEON::BI__builtin_neon_vcges_f32: 5211 case NEON::BI__builtin_neon_vcgts_f32: { 5212 llvm::CmpInst::Predicate P; 5213 switch (BuiltinID) { 5214 default: llvm_unreachable("missing builtin ID in switch!"); 5215 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break; 5216 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break; 5217 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break; 5218 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break; 5219 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break; 5220 } 5221 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5222 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); 5223 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy); 5224 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 5225 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd"); 5226 } 5227 case NEON::BI__builtin_neon_vceqd_s64: 5228 case NEON::BI__builtin_neon_vceqd_u64: 5229 case NEON::BI__builtin_neon_vcgtd_s64: 5230 case NEON::BI__builtin_neon_vcgtd_u64: 5231 case NEON::BI__builtin_neon_vcltd_s64: 5232 case NEON::BI__builtin_neon_vcltd_u64: 5233 case NEON::BI__builtin_neon_vcged_u64: 5234 case NEON::BI__builtin_neon_vcged_s64: 5235 case NEON::BI__builtin_neon_vcled_u64: 5236 case NEON::BI__builtin_neon_vcled_s64: { 5237 llvm::CmpInst::Predicate P; 5238 switch (BuiltinID) { 5239 default: llvm_unreachable("missing builtin ID in switch!"); 5240 case NEON::BI__builtin_neon_vceqd_s64: 5241 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break; 5242 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break; 5243 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break; 5244 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break; 5245 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break; 5246 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break; 5247 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break; 5248 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break; 5249 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break; 5250 } 5251 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5252 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 5253 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 5254 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]); 5255 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd"); 5256 } 5257 case NEON::BI__builtin_neon_vtstd_s64: 5258 case NEON::BI__builtin_neon_vtstd_u64: { 5259 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5260 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 5261 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 5262 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 5263 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 5264 llvm::Constant::getNullValue(Int64Ty)); 5265 return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd"); 5266 } 5267 case NEON::BI__builtin_neon_vset_lane_i8: 5268 case NEON::BI__builtin_neon_vset_lane_i16: 5269 case NEON::BI__builtin_neon_vset_lane_i32: 5270 case NEON::BI__builtin_neon_vset_lane_i64: 5271 case NEON::BI__builtin_neon_vset_lane_f32: 5272 case NEON::BI__builtin_neon_vsetq_lane_i8: 5273 case NEON::BI__builtin_neon_vsetq_lane_i16: 5274 case NEON::BI__builtin_neon_vsetq_lane_i32: 5275 case NEON::BI__builtin_neon_vsetq_lane_i64: 5276 case NEON::BI__builtin_neon_vsetq_lane_f32: 5277 Ops.push_back(EmitScalarExpr(E->getArg(2))); 5278 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5279 case NEON::BI__builtin_neon_vset_lane_f64: 5280 // The vector type needs a cast for the v1f64 variant. 5281 Ops[1] = Builder.CreateBitCast(Ops[1], 5282 llvm::VectorType::get(DoubleTy, 1)); 5283 Ops.push_back(EmitScalarExpr(E->getArg(2))); 5284 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5285 case NEON::BI__builtin_neon_vsetq_lane_f64: 5286 // The vector type needs a cast for the v2f64 variant. 5287 Ops[1] = Builder.CreateBitCast(Ops[1], 5288 llvm::VectorType::get(DoubleTy, 2)); 5289 Ops.push_back(EmitScalarExpr(E->getArg(2))); 5290 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 5291 5292 case NEON::BI__builtin_neon_vget_lane_i8: 5293 case NEON::BI__builtin_neon_vdupb_lane_i8: 5294 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8)); 5295 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5296 "vget_lane"); 5297 case NEON::BI__builtin_neon_vgetq_lane_i8: 5298 case NEON::BI__builtin_neon_vdupb_laneq_i8: 5299 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16)); 5300 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5301 "vgetq_lane"); 5302 case NEON::BI__builtin_neon_vget_lane_i16: 5303 case NEON::BI__builtin_neon_vduph_lane_i16: 5304 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4)); 5305 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5306 "vget_lane"); 5307 case NEON::BI__builtin_neon_vgetq_lane_i16: 5308 case NEON::BI__builtin_neon_vduph_laneq_i16: 5309 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8)); 5310 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5311 "vgetq_lane"); 5312 case NEON::BI__builtin_neon_vget_lane_i32: 5313 case NEON::BI__builtin_neon_vdups_lane_i32: 5314 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2)); 5315 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5316 "vget_lane"); 5317 case NEON::BI__builtin_neon_vdups_lane_f32: 5318 Ops[0] = Builder.CreateBitCast(Ops[0], 5319 llvm::VectorType::get(FloatTy, 2)); 5320 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5321 "vdups_lane"); 5322 case NEON::BI__builtin_neon_vgetq_lane_i32: 5323 case NEON::BI__builtin_neon_vdups_laneq_i32: 5324 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); 5325 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5326 "vgetq_lane"); 5327 case NEON::BI__builtin_neon_vget_lane_i64: 5328 case NEON::BI__builtin_neon_vdupd_lane_i64: 5329 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1)); 5330 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5331 "vget_lane"); 5332 case NEON::BI__builtin_neon_vdupd_lane_f64: 5333 Ops[0] = Builder.CreateBitCast(Ops[0], 5334 llvm::VectorType::get(DoubleTy, 1)); 5335 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5336 "vdupd_lane"); 5337 case NEON::BI__builtin_neon_vgetq_lane_i64: 5338 case NEON::BI__builtin_neon_vdupd_laneq_i64: 5339 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); 5340 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5341 "vgetq_lane"); 5342 case NEON::BI__builtin_neon_vget_lane_f32: 5343 Ops[0] = Builder.CreateBitCast(Ops[0], 5344 llvm::VectorType::get(FloatTy, 2)); 5345 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5346 "vget_lane"); 5347 case NEON::BI__builtin_neon_vget_lane_f64: 5348 Ops[0] = Builder.CreateBitCast(Ops[0], 5349 llvm::VectorType::get(DoubleTy, 1)); 5350 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5351 "vget_lane"); 5352 case NEON::BI__builtin_neon_vgetq_lane_f32: 5353 case NEON::BI__builtin_neon_vdups_laneq_f32: 5354 Ops[0] = Builder.CreateBitCast(Ops[0], 5355 llvm::VectorType::get(FloatTy, 4)); 5356 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5357 "vgetq_lane"); 5358 case NEON::BI__builtin_neon_vgetq_lane_f64: 5359 case NEON::BI__builtin_neon_vdupd_laneq_f64: 5360 Ops[0] = Builder.CreateBitCast(Ops[0], 5361 llvm::VectorType::get(DoubleTy, 2)); 5362 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 5363 "vgetq_lane"); 5364 case NEON::BI__builtin_neon_vaddd_s64: 5365 case NEON::BI__builtin_neon_vaddd_u64: 5366 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd"); 5367 case NEON::BI__builtin_neon_vsubd_s64: 5368 case NEON::BI__builtin_neon_vsubd_u64: 5369 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd"); 5370 case NEON::BI__builtin_neon_vqdmlalh_s16: 5371 case NEON::BI__builtin_neon_vqdmlslh_s16: { 5372 SmallVector<Value *, 2> ProductOps; 5373 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 5374 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2)))); 5375 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 5376 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 5377 ProductOps, "vqdmlXl"); 5378 Constant *CI = ConstantInt::get(SizeTy, 0); 5379 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 5380 5381 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16 5382 ? Intrinsic::aarch64_neon_sqadd 5383 : Intrinsic::aarch64_neon_sqsub; 5384 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl"); 5385 } 5386 case NEON::BI__builtin_neon_vqshlud_n_s64: { 5387 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5388 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 5389 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty), 5390 Ops, "vqshlu_n"); 5391 } 5392 case NEON::BI__builtin_neon_vqshld_n_u64: 5393 case NEON::BI__builtin_neon_vqshld_n_s64: { 5394 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64 5395 ? Intrinsic::aarch64_neon_uqshl 5396 : Intrinsic::aarch64_neon_sqshl; 5397 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5398 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 5399 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n"); 5400 } 5401 case NEON::BI__builtin_neon_vrshrd_n_u64: 5402 case NEON::BI__builtin_neon_vrshrd_n_s64: { 5403 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64 5404 ? Intrinsic::aarch64_neon_urshl 5405 : Intrinsic::aarch64_neon_srshl; 5406 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5407 int SV = cast<ConstantInt>(Ops[1])->getSExtValue(); 5408 Ops[1] = ConstantInt::get(Int64Ty, -SV); 5409 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n"); 5410 } 5411 case NEON::BI__builtin_neon_vrsrad_n_u64: 5412 case NEON::BI__builtin_neon_vrsrad_n_s64: { 5413 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64 5414 ? Intrinsic::aarch64_neon_urshl 5415 : Intrinsic::aarch64_neon_srshl; 5416 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 5417 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2)))); 5418 Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty), 5419 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)}); 5420 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty)); 5421 } 5422 case NEON::BI__builtin_neon_vshld_n_s64: 5423 case NEON::BI__builtin_neon_vshld_n_u64: { 5424 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 5425 return Builder.CreateShl( 5426 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n"); 5427 } 5428 case NEON::BI__builtin_neon_vshrd_n_s64: { 5429 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 5430 return Builder.CreateAShr( 5431 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 5432 Amt->getZExtValue())), 5433 "shrd_n"); 5434 } 5435 case NEON::BI__builtin_neon_vshrd_n_u64: { 5436 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 5437 uint64_t ShiftAmt = Amt->getZExtValue(); 5438 // Right-shifting an unsigned value by its size yields 0. 5439 if (ShiftAmt == 64) 5440 return ConstantInt::get(Int64Ty, 0); 5441 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt), 5442 "shrd_n"); 5443 } 5444 case NEON::BI__builtin_neon_vsrad_n_s64: { 5445 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 5446 Ops[1] = Builder.CreateAShr( 5447 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 5448 Amt->getZExtValue())), 5449 "shrd_n"); 5450 return Builder.CreateAdd(Ops[0], Ops[1]); 5451 } 5452 case NEON::BI__builtin_neon_vsrad_n_u64: { 5453 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 5454 uint64_t ShiftAmt = Amt->getZExtValue(); 5455 // Right-shifting an unsigned value by its size yields 0. 5456 // As Op + 0 = Op, return Ops[0] directly. 5457 if (ShiftAmt == 64) 5458 return Ops[0]; 5459 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt), 5460 "shrd_n"); 5461 return Builder.CreateAdd(Ops[0], Ops[1]); 5462 } 5463 case NEON::BI__builtin_neon_vqdmlalh_lane_s16: 5464 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16: 5465 case NEON::BI__builtin_neon_vqdmlslh_lane_s16: 5466 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: { 5467 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 5468 "lane"); 5469 SmallVector<Value *, 2> ProductOps; 5470 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 5471 ProductOps.push_back(vectorWrapScalar16(Ops[2])); 5472 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 5473 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 5474 ProductOps, "vqdmlXl"); 5475 Constant *CI = ConstantInt::get(SizeTy, 0); 5476 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 5477 Ops.pop_back(); 5478 5479 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 || 5480 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16) 5481 ? Intrinsic::aarch64_neon_sqadd 5482 : Intrinsic::aarch64_neon_sqsub; 5483 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl"); 5484 } 5485 case NEON::BI__builtin_neon_vqdmlals_s32: 5486 case NEON::BI__builtin_neon_vqdmlsls_s32: { 5487 SmallVector<Value *, 2> ProductOps; 5488 ProductOps.push_back(Ops[1]); 5489 ProductOps.push_back(EmitScalarExpr(E->getArg(2))); 5490 Ops[1] = 5491 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 5492 ProductOps, "vqdmlXl"); 5493 5494 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32 5495 ? Intrinsic::aarch64_neon_sqadd 5496 : Intrinsic::aarch64_neon_sqsub; 5497 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl"); 5498 } 5499 case NEON::BI__builtin_neon_vqdmlals_lane_s32: 5500 case NEON::BI__builtin_neon_vqdmlals_laneq_s32: 5501 case NEON::BI__builtin_neon_vqdmlsls_lane_s32: 5502 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: { 5503 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 5504 "lane"); 5505 SmallVector<Value *, 2> ProductOps; 5506 ProductOps.push_back(Ops[1]); 5507 ProductOps.push_back(Ops[2]); 5508 Ops[1] = 5509 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 5510 ProductOps, "vqdmlXl"); 5511 Ops.pop_back(); 5512 5513 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 || 5514 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32) 5515 ? Intrinsic::aarch64_neon_sqadd 5516 : Intrinsic::aarch64_neon_sqsub; 5517 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl"); 5518 } 5519 } 5520 5521 llvm::VectorType *VTy = GetNeonType(this, Type); 5522 llvm::Type *Ty = VTy; 5523 if (!Ty) 5524 return nullptr; 5525 5526 // Not all intrinsics handled by the common case work for AArch64 yet, so only 5527 // defer to common code if it's been added to our special map. 5528 Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID, 5529 AArch64SIMDIntrinsicsProvenSorted); 5530 5531 if (Builtin) 5532 return EmitCommonNeonBuiltinExpr( 5533 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 5534 Builtin->NameHint, Builtin->TypeModifier, E, Ops, 5535 /*never use addresses*/ Address::invalid(), Address::invalid()); 5536 5537 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops)) 5538 return V; 5539 5540 unsigned Int; 5541 switch (BuiltinID) { 5542 default: return nullptr; 5543 case NEON::BI__builtin_neon_vbsl_v: 5544 case NEON::BI__builtin_neon_vbslq_v: { 5545 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy); 5546 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl"); 5547 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl"); 5548 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl"); 5549 5550 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl"); 5551 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl"); 5552 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl"); 5553 return Builder.CreateBitCast(Ops[0], Ty); 5554 } 5555 case NEON::BI__builtin_neon_vfma_lane_v: 5556 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types 5557 // The ARM builtins (and instructions) have the addend as the first 5558 // operand, but the 'fma' intrinsics have it last. Swap it around here. 5559 Value *Addend = Ops[0]; 5560 Value *Multiplicand = Ops[1]; 5561 Value *LaneSource = Ops[2]; 5562 Ops[0] = Multiplicand; 5563 Ops[1] = LaneSource; 5564 Ops[2] = Addend; 5565 5566 // Now adjust things to handle the lane access. 5567 llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ? 5568 llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) : 5569 VTy; 5570 llvm::Constant *cst = cast<Constant>(Ops[3]); 5571 Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst); 5572 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy); 5573 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane"); 5574 5575 Ops.pop_back(); 5576 Int = Intrinsic::fma; 5577 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla"); 5578 } 5579 case NEON::BI__builtin_neon_vfma_laneq_v: { 5580 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 5581 // v1f64 fma should be mapped to Neon scalar f64 fma 5582 if (VTy && VTy->getElementType() == DoubleTy) { 5583 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 5584 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 5585 llvm::Type *VTy = GetNeonType(this, 5586 NeonTypeFlags(NeonTypeFlags::Float64, false, true)); 5587 Ops[2] = Builder.CreateBitCast(Ops[2], VTy); 5588 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 5589 Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); 5590 Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 5591 return Builder.CreateBitCast(Result, Ty); 5592 } 5593 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 5594 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5595 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5596 5597 llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), 5598 VTy->getNumElements() * 2); 5599 Ops[2] = Builder.CreateBitCast(Ops[2], STy); 5600 Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), 5601 cast<ConstantInt>(Ops[3])); 5602 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); 5603 5604 return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); 5605 } 5606 case NEON::BI__builtin_neon_vfmaq_laneq_v: { 5607 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 5608 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5609 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5610 5611 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5612 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); 5613 return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]}); 5614 } 5615 case NEON::BI__builtin_neon_vfmas_lane_f32: 5616 case NEON::BI__builtin_neon_vfmas_laneq_f32: 5617 case NEON::BI__builtin_neon_vfmad_lane_f64: 5618 case NEON::BI__builtin_neon_vfmad_laneq_f64: { 5619 Ops.push_back(EmitScalarExpr(E->getArg(3))); 5620 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 5621 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 5622 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 5623 return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]}); 5624 } 5625 case NEON::BI__builtin_neon_vmull_v: 5626 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 5627 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull; 5628 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull; 5629 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 5630 case NEON::BI__builtin_neon_vmax_v: 5631 case NEON::BI__builtin_neon_vmaxq_v: 5632 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 5633 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax; 5634 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax; 5635 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax"); 5636 case NEON::BI__builtin_neon_vmin_v: 5637 case NEON::BI__builtin_neon_vminq_v: 5638 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 5639 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin; 5640 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin; 5641 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin"); 5642 case NEON::BI__builtin_neon_vabd_v: 5643 case NEON::BI__builtin_neon_vabdq_v: 5644 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 5645 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd; 5646 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd; 5647 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd"); 5648 case NEON::BI__builtin_neon_vpadal_v: 5649 case NEON::BI__builtin_neon_vpadalq_v: { 5650 unsigned ArgElts = VTy->getNumElements(); 5651 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType()); 5652 unsigned BitWidth = EltTy->getBitWidth(); 5653 llvm::Type *ArgTy = llvm::VectorType::get( 5654 llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts); 5655 llvm::Type* Tys[2] = { VTy, ArgTy }; 5656 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp; 5657 SmallVector<llvm::Value*, 1> TmpOps; 5658 TmpOps.push_back(Ops[1]); 5659 Function *F = CGM.getIntrinsic(Int, Tys); 5660 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal"); 5661 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType()); 5662 return Builder.CreateAdd(tmp, addend); 5663 } 5664 case NEON::BI__builtin_neon_vpmin_v: 5665 case NEON::BI__builtin_neon_vpminq_v: 5666 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 5667 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp; 5668 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp; 5669 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); 5670 case NEON::BI__builtin_neon_vpmax_v: 5671 case NEON::BI__builtin_neon_vpmaxq_v: 5672 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 5673 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp; 5674 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp; 5675 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); 5676 case NEON::BI__builtin_neon_vminnm_v: 5677 case NEON::BI__builtin_neon_vminnmq_v: 5678 Int = Intrinsic::aarch64_neon_fminnm; 5679 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); 5680 case NEON::BI__builtin_neon_vmaxnm_v: 5681 case NEON::BI__builtin_neon_vmaxnmq_v: 5682 Int = Intrinsic::aarch64_neon_fmaxnm; 5683 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); 5684 case NEON::BI__builtin_neon_vrecpss_f32: { 5685 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5686 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy), 5687 Ops, "vrecps"); 5688 } 5689 case NEON::BI__builtin_neon_vrecpsd_f64: { 5690 Ops.push_back(EmitScalarExpr(E->getArg(1))); 5691 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy), 5692 Ops, "vrecps"); 5693 } 5694 case NEON::BI__builtin_neon_vqshrun_n_v: 5695 Int = Intrinsic::aarch64_neon_sqshrun; 5696 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); 5697 case NEON::BI__builtin_neon_vqrshrun_n_v: 5698 Int = Intrinsic::aarch64_neon_sqrshrun; 5699 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); 5700 case NEON::BI__builtin_neon_vqshrn_n_v: 5701 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn; 5702 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); 5703 case NEON::BI__builtin_neon_vrshrn_n_v: 5704 Int = Intrinsic::aarch64_neon_rshrn; 5705 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); 5706 case NEON::BI__builtin_neon_vqrshrn_n_v: 5707 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn; 5708 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); 5709 case NEON::BI__builtin_neon_vrnda_v: 5710 case NEON::BI__builtin_neon_vrndaq_v: { 5711 Int = Intrinsic::round; 5712 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); 5713 } 5714 case NEON::BI__builtin_neon_vrndi_v: 5715 case NEON::BI__builtin_neon_vrndiq_v: { 5716 Int = Intrinsic::nearbyint; 5717 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi"); 5718 } 5719 case NEON::BI__builtin_neon_vrndm_v: 5720 case NEON::BI__builtin_neon_vrndmq_v: { 5721 Int = Intrinsic::floor; 5722 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); 5723 } 5724 case NEON::BI__builtin_neon_vrndn_v: 5725 case NEON::BI__builtin_neon_vrndnq_v: { 5726 Int = Intrinsic::aarch64_neon_frintn; 5727 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); 5728 } 5729 case NEON::BI__builtin_neon_vrndp_v: 5730 case NEON::BI__builtin_neon_vrndpq_v: { 5731 Int = Intrinsic::ceil; 5732 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); 5733 } 5734 case NEON::BI__builtin_neon_vrndx_v: 5735 case NEON::BI__builtin_neon_vrndxq_v: { 5736 Int = Intrinsic::rint; 5737 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); 5738 } 5739 case NEON::BI__builtin_neon_vrnd_v: 5740 case NEON::BI__builtin_neon_vrndq_v: { 5741 Int = Intrinsic::trunc; 5742 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz"); 5743 } 5744 case NEON::BI__builtin_neon_vceqz_v: 5745 case NEON::BI__builtin_neon_vceqzq_v: 5746 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, 5747 ICmpInst::ICMP_EQ, "vceqz"); 5748 case NEON::BI__builtin_neon_vcgez_v: 5749 case NEON::BI__builtin_neon_vcgezq_v: 5750 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, 5751 ICmpInst::ICMP_SGE, "vcgez"); 5752 case NEON::BI__builtin_neon_vclez_v: 5753 case NEON::BI__builtin_neon_vclezq_v: 5754 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, 5755 ICmpInst::ICMP_SLE, "vclez"); 5756 case NEON::BI__builtin_neon_vcgtz_v: 5757 case NEON::BI__builtin_neon_vcgtzq_v: 5758 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, 5759 ICmpInst::ICMP_SGT, "vcgtz"); 5760 case NEON::BI__builtin_neon_vcltz_v: 5761 case NEON::BI__builtin_neon_vcltzq_v: 5762 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, 5763 ICmpInst::ICMP_SLT, "vcltz"); 5764 case NEON::BI__builtin_neon_vcvt_f64_v: 5765 case NEON::BI__builtin_neon_vcvtq_f64_v: 5766 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5767 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); 5768 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 5769 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 5770 case NEON::BI__builtin_neon_vcvt_f64_f32: { 5771 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad && 5772 "unexpected vcvt_f64_f32 builtin"); 5773 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false); 5774 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 5775 5776 return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); 5777 } 5778 case NEON::BI__builtin_neon_vcvt_f32_f64: { 5779 assert(Type.getEltType() == NeonTypeFlags::Float32 && 5780 "unexpected vcvt_f32_f64 builtin"); 5781 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true); 5782 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 5783 5784 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); 5785 } 5786 case NEON::BI__builtin_neon_vcvt_s32_v: 5787 case NEON::BI__builtin_neon_vcvt_u32_v: 5788 case NEON::BI__builtin_neon_vcvt_s64_v: 5789 case NEON::BI__builtin_neon_vcvt_u64_v: 5790 case NEON::BI__builtin_neon_vcvtq_s32_v: 5791 case NEON::BI__builtin_neon_vcvtq_u32_v: 5792 case NEON::BI__builtin_neon_vcvtq_s64_v: 5793 case NEON::BI__builtin_neon_vcvtq_u64_v: { 5794 Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); 5795 if (usgn) 5796 return Builder.CreateFPToUI(Ops[0], Ty); 5797 return Builder.CreateFPToSI(Ops[0], Ty); 5798 } 5799 case NEON::BI__builtin_neon_vcvta_s32_v: 5800 case NEON::BI__builtin_neon_vcvtaq_s32_v: 5801 case NEON::BI__builtin_neon_vcvta_u32_v: 5802 case NEON::BI__builtin_neon_vcvtaq_u32_v: 5803 case NEON::BI__builtin_neon_vcvta_s64_v: 5804 case NEON::BI__builtin_neon_vcvtaq_s64_v: 5805 case NEON::BI__builtin_neon_vcvta_u64_v: 5806 case NEON::BI__builtin_neon_vcvtaq_u64_v: { 5807 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas; 5808 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 5809 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta"); 5810 } 5811 case NEON::BI__builtin_neon_vcvtm_s32_v: 5812 case NEON::BI__builtin_neon_vcvtmq_s32_v: 5813 case NEON::BI__builtin_neon_vcvtm_u32_v: 5814 case NEON::BI__builtin_neon_vcvtmq_u32_v: 5815 case NEON::BI__builtin_neon_vcvtm_s64_v: 5816 case NEON::BI__builtin_neon_vcvtmq_s64_v: 5817 case NEON::BI__builtin_neon_vcvtm_u64_v: 5818 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 5819 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms; 5820 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 5821 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm"); 5822 } 5823 case NEON::BI__builtin_neon_vcvtn_s32_v: 5824 case NEON::BI__builtin_neon_vcvtnq_s32_v: 5825 case NEON::BI__builtin_neon_vcvtn_u32_v: 5826 case NEON::BI__builtin_neon_vcvtnq_u32_v: 5827 case NEON::BI__builtin_neon_vcvtn_s64_v: 5828 case NEON::BI__builtin_neon_vcvtnq_s64_v: 5829 case NEON::BI__builtin_neon_vcvtn_u64_v: 5830 case NEON::BI__builtin_neon_vcvtnq_u64_v: { 5831 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns; 5832 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 5833 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn"); 5834 } 5835 case NEON::BI__builtin_neon_vcvtp_s32_v: 5836 case NEON::BI__builtin_neon_vcvtpq_s32_v: 5837 case NEON::BI__builtin_neon_vcvtp_u32_v: 5838 case NEON::BI__builtin_neon_vcvtpq_u32_v: 5839 case NEON::BI__builtin_neon_vcvtp_s64_v: 5840 case NEON::BI__builtin_neon_vcvtpq_s64_v: 5841 case NEON::BI__builtin_neon_vcvtp_u64_v: 5842 case NEON::BI__builtin_neon_vcvtpq_u64_v: { 5843 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps; 5844 llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) }; 5845 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp"); 5846 } 5847 case NEON::BI__builtin_neon_vmulx_v: 5848 case NEON::BI__builtin_neon_vmulxq_v: { 5849 Int = Intrinsic::aarch64_neon_fmulx; 5850 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); 5851 } 5852 case NEON::BI__builtin_neon_vmul_lane_v: 5853 case NEON::BI__builtin_neon_vmul_laneq_v: { 5854 // v1f64 vmul_lane should be mapped to Neon scalar mul lane 5855 bool Quad = false; 5856 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v) 5857 Quad = true; 5858 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 5859 llvm::Type *VTy = GetNeonType(this, 5860 NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); 5861 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 5862 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); 5863 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); 5864 return Builder.CreateBitCast(Result, Ty); 5865 } 5866 case NEON::BI__builtin_neon_vnegd_s64: 5867 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd"); 5868 case NEON::BI__builtin_neon_vpmaxnm_v: 5869 case NEON::BI__builtin_neon_vpmaxnmq_v: { 5870 Int = Intrinsic::aarch64_neon_fmaxnmp; 5871 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); 5872 } 5873 case NEON::BI__builtin_neon_vpminnm_v: 5874 case NEON::BI__builtin_neon_vpminnmq_v: { 5875 Int = Intrinsic::aarch64_neon_fminnmp; 5876 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); 5877 } 5878 case NEON::BI__builtin_neon_vsqrt_v: 5879 case NEON::BI__builtin_neon_vsqrtq_v: { 5880 Int = Intrinsic::sqrt; 5881 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5882 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); 5883 } 5884 case NEON::BI__builtin_neon_vrbit_v: 5885 case NEON::BI__builtin_neon_vrbitq_v: { 5886 Int = Intrinsic::aarch64_neon_rbit; 5887 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); 5888 } 5889 case NEON::BI__builtin_neon_vaddv_u8: 5890 // FIXME: These are handled by the AArch64 scalar code. 5891 usgn = true; 5892 // FALLTHROUGH 5893 case NEON::BI__builtin_neon_vaddv_s8: { 5894 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 5895 Ty = Int32Ty; 5896 VTy = llvm::VectorType::get(Int8Ty, 8); 5897 llvm::Type *Tys[2] = { Ty, VTy }; 5898 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5899 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 5900 return Builder.CreateTrunc(Ops[0], Int8Ty); 5901 } 5902 case NEON::BI__builtin_neon_vaddv_u16: 5903 usgn = true; 5904 // FALLTHROUGH 5905 case NEON::BI__builtin_neon_vaddv_s16: { 5906 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 5907 Ty = Int32Ty; 5908 VTy = llvm::VectorType::get(Int16Ty, 4); 5909 llvm::Type *Tys[2] = { Ty, VTy }; 5910 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5911 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 5912 return Builder.CreateTrunc(Ops[0], Int16Ty); 5913 } 5914 case NEON::BI__builtin_neon_vaddvq_u8: 5915 usgn = true; 5916 // FALLTHROUGH 5917 case NEON::BI__builtin_neon_vaddvq_s8: { 5918 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 5919 Ty = Int32Ty; 5920 VTy = llvm::VectorType::get(Int8Ty, 16); 5921 llvm::Type *Tys[2] = { Ty, VTy }; 5922 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5923 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 5924 return Builder.CreateTrunc(Ops[0], Int8Ty); 5925 } 5926 case NEON::BI__builtin_neon_vaddvq_u16: 5927 usgn = true; 5928 // FALLTHROUGH 5929 case NEON::BI__builtin_neon_vaddvq_s16: { 5930 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 5931 Ty = Int32Ty; 5932 VTy = llvm::VectorType::get(Int16Ty, 8); 5933 llvm::Type *Tys[2] = { Ty, VTy }; 5934 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5935 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 5936 return Builder.CreateTrunc(Ops[0], Int16Ty); 5937 } 5938 case NEON::BI__builtin_neon_vmaxv_u8: { 5939 Int = Intrinsic::aarch64_neon_umaxv; 5940 Ty = Int32Ty; 5941 VTy = llvm::VectorType::get(Int8Ty, 8); 5942 llvm::Type *Tys[2] = { Ty, VTy }; 5943 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5944 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5945 return Builder.CreateTrunc(Ops[0], Int8Ty); 5946 } 5947 case NEON::BI__builtin_neon_vmaxv_u16: { 5948 Int = Intrinsic::aarch64_neon_umaxv; 5949 Ty = Int32Ty; 5950 VTy = llvm::VectorType::get(Int16Ty, 4); 5951 llvm::Type *Tys[2] = { Ty, VTy }; 5952 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5953 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5954 return Builder.CreateTrunc(Ops[0], Int16Ty); 5955 } 5956 case NEON::BI__builtin_neon_vmaxvq_u8: { 5957 Int = Intrinsic::aarch64_neon_umaxv; 5958 Ty = Int32Ty; 5959 VTy = llvm::VectorType::get(Int8Ty, 16); 5960 llvm::Type *Tys[2] = { Ty, VTy }; 5961 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5962 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5963 return Builder.CreateTrunc(Ops[0], Int8Ty); 5964 } 5965 case NEON::BI__builtin_neon_vmaxvq_u16: { 5966 Int = Intrinsic::aarch64_neon_umaxv; 5967 Ty = Int32Ty; 5968 VTy = llvm::VectorType::get(Int16Ty, 8); 5969 llvm::Type *Tys[2] = { Ty, VTy }; 5970 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5971 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5972 return Builder.CreateTrunc(Ops[0], Int16Ty); 5973 } 5974 case NEON::BI__builtin_neon_vmaxv_s8: { 5975 Int = Intrinsic::aarch64_neon_smaxv; 5976 Ty = Int32Ty; 5977 VTy = llvm::VectorType::get(Int8Ty, 8); 5978 llvm::Type *Tys[2] = { Ty, VTy }; 5979 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5980 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5981 return Builder.CreateTrunc(Ops[0], Int8Ty); 5982 } 5983 case NEON::BI__builtin_neon_vmaxv_s16: { 5984 Int = Intrinsic::aarch64_neon_smaxv; 5985 Ty = Int32Ty; 5986 VTy = llvm::VectorType::get(Int16Ty, 4); 5987 llvm::Type *Tys[2] = { Ty, VTy }; 5988 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5989 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5990 return Builder.CreateTrunc(Ops[0], Int16Ty); 5991 } 5992 case NEON::BI__builtin_neon_vmaxvq_s8: { 5993 Int = Intrinsic::aarch64_neon_smaxv; 5994 Ty = Int32Ty; 5995 VTy = llvm::VectorType::get(Int8Ty, 16); 5996 llvm::Type *Tys[2] = { Ty, VTy }; 5997 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5998 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5999 return Builder.CreateTrunc(Ops[0], Int8Ty); 6000 } 6001 case NEON::BI__builtin_neon_vmaxvq_s16: { 6002 Int = Intrinsic::aarch64_neon_smaxv; 6003 Ty = Int32Ty; 6004 VTy = llvm::VectorType::get(Int16Ty, 8); 6005 llvm::Type *Tys[2] = { Ty, VTy }; 6006 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6007 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 6008 return Builder.CreateTrunc(Ops[0], Int16Ty); 6009 } 6010 case NEON::BI__builtin_neon_vminv_u8: { 6011 Int = Intrinsic::aarch64_neon_uminv; 6012 Ty = Int32Ty; 6013 VTy = llvm::VectorType::get(Int8Ty, 8); 6014 llvm::Type *Tys[2] = { Ty, VTy }; 6015 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6016 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6017 return Builder.CreateTrunc(Ops[0], Int8Ty); 6018 } 6019 case NEON::BI__builtin_neon_vminv_u16: { 6020 Int = Intrinsic::aarch64_neon_uminv; 6021 Ty = Int32Ty; 6022 VTy = llvm::VectorType::get(Int16Ty, 4); 6023 llvm::Type *Tys[2] = { Ty, VTy }; 6024 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6025 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6026 return Builder.CreateTrunc(Ops[0], Int16Ty); 6027 } 6028 case NEON::BI__builtin_neon_vminvq_u8: { 6029 Int = Intrinsic::aarch64_neon_uminv; 6030 Ty = Int32Ty; 6031 VTy = llvm::VectorType::get(Int8Ty, 16); 6032 llvm::Type *Tys[2] = { Ty, VTy }; 6033 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6034 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6035 return Builder.CreateTrunc(Ops[0], Int8Ty); 6036 } 6037 case NEON::BI__builtin_neon_vminvq_u16: { 6038 Int = Intrinsic::aarch64_neon_uminv; 6039 Ty = Int32Ty; 6040 VTy = llvm::VectorType::get(Int16Ty, 8); 6041 llvm::Type *Tys[2] = { Ty, VTy }; 6042 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6043 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6044 return Builder.CreateTrunc(Ops[0], Int16Ty); 6045 } 6046 case NEON::BI__builtin_neon_vminv_s8: { 6047 Int = Intrinsic::aarch64_neon_sminv; 6048 Ty = Int32Ty; 6049 VTy = llvm::VectorType::get(Int8Ty, 8); 6050 llvm::Type *Tys[2] = { Ty, VTy }; 6051 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6052 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6053 return Builder.CreateTrunc(Ops[0], Int8Ty); 6054 } 6055 case NEON::BI__builtin_neon_vminv_s16: { 6056 Int = Intrinsic::aarch64_neon_sminv; 6057 Ty = Int32Ty; 6058 VTy = llvm::VectorType::get(Int16Ty, 4); 6059 llvm::Type *Tys[2] = { Ty, VTy }; 6060 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6061 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6062 return Builder.CreateTrunc(Ops[0], Int16Ty); 6063 } 6064 case NEON::BI__builtin_neon_vminvq_s8: { 6065 Int = Intrinsic::aarch64_neon_sminv; 6066 Ty = Int32Ty; 6067 VTy = llvm::VectorType::get(Int8Ty, 16); 6068 llvm::Type *Tys[2] = { Ty, VTy }; 6069 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6070 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6071 return Builder.CreateTrunc(Ops[0], Int8Ty); 6072 } 6073 case NEON::BI__builtin_neon_vminvq_s16: { 6074 Int = Intrinsic::aarch64_neon_sminv; 6075 Ty = Int32Ty; 6076 VTy = llvm::VectorType::get(Int16Ty, 8); 6077 llvm::Type *Tys[2] = { Ty, VTy }; 6078 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6079 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 6080 return Builder.CreateTrunc(Ops[0], Int16Ty); 6081 } 6082 case NEON::BI__builtin_neon_vmul_n_f64: { 6083 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 6084 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy); 6085 return Builder.CreateFMul(Ops[0], RHS); 6086 } 6087 case NEON::BI__builtin_neon_vaddlv_u8: { 6088 Int = Intrinsic::aarch64_neon_uaddlv; 6089 Ty = Int32Ty; 6090 VTy = llvm::VectorType::get(Int8Ty, 8); 6091 llvm::Type *Tys[2] = { Ty, VTy }; 6092 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6093 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6094 return Builder.CreateTrunc(Ops[0], Int16Ty); 6095 } 6096 case NEON::BI__builtin_neon_vaddlv_u16: { 6097 Int = Intrinsic::aarch64_neon_uaddlv; 6098 Ty = Int32Ty; 6099 VTy = llvm::VectorType::get(Int16Ty, 4); 6100 llvm::Type *Tys[2] = { Ty, VTy }; 6101 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6102 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6103 } 6104 case NEON::BI__builtin_neon_vaddlvq_u8: { 6105 Int = Intrinsic::aarch64_neon_uaddlv; 6106 Ty = Int32Ty; 6107 VTy = llvm::VectorType::get(Int8Ty, 16); 6108 llvm::Type *Tys[2] = { Ty, VTy }; 6109 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6110 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6111 return Builder.CreateTrunc(Ops[0], Int16Ty); 6112 } 6113 case NEON::BI__builtin_neon_vaddlvq_u16: { 6114 Int = Intrinsic::aarch64_neon_uaddlv; 6115 Ty = Int32Ty; 6116 VTy = llvm::VectorType::get(Int16Ty, 8); 6117 llvm::Type *Tys[2] = { Ty, VTy }; 6118 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6119 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6120 } 6121 case NEON::BI__builtin_neon_vaddlv_s8: { 6122 Int = Intrinsic::aarch64_neon_saddlv; 6123 Ty = Int32Ty; 6124 VTy = llvm::VectorType::get(Int8Ty, 8); 6125 llvm::Type *Tys[2] = { Ty, VTy }; 6126 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6127 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6128 return Builder.CreateTrunc(Ops[0], Int16Ty); 6129 } 6130 case NEON::BI__builtin_neon_vaddlv_s16: { 6131 Int = Intrinsic::aarch64_neon_saddlv; 6132 Ty = Int32Ty; 6133 VTy = llvm::VectorType::get(Int16Ty, 4); 6134 llvm::Type *Tys[2] = { Ty, VTy }; 6135 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6136 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6137 } 6138 case NEON::BI__builtin_neon_vaddlvq_s8: { 6139 Int = Intrinsic::aarch64_neon_saddlv; 6140 Ty = Int32Ty; 6141 VTy = llvm::VectorType::get(Int8Ty, 16); 6142 llvm::Type *Tys[2] = { Ty, VTy }; 6143 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6144 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6145 return Builder.CreateTrunc(Ops[0], Int16Ty); 6146 } 6147 case NEON::BI__builtin_neon_vaddlvq_s16: { 6148 Int = Intrinsic::aarch64_neon_saddlv; 6149 Ty = Int32Ty; 6150 VTy = llvm::VectorType::get(Int16Ty, 8); 6151 llvm::Type *Tys[2] = { Ty, VTy }; 6152 Ops.push_back(EmitScalarExpr(E->getArg(0))); 6153 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 6154 } 6155 case NEON::BI__builtin_neon_vsri_n_v: 6156 case NEON::BI__builtin_neon_vsriq_n_v: { 6157 Int = Intrinsic::aarch64_neon_vsri; 6158 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 6159 return EmitNeonCall(Intrin, Ops, "vsri_n"); 6160 } 6161 case NEON::BI__builtin_neon_vsli_n_v: 6162 case NEON::BI__builtin_neon_vsliq_n_v: { 6163 Int = Intrinsic::aarch64_neon_vsli; 6164 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 6165 return EmitNeonCall(Intrin, Ops, "vsli_n"); 6166 } 6167 case NEON::BI__builtin_neon_vsra_n_v: 6168 case NEON::BI__builtin_neon_vsraq_n_v: 6169 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6170 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 6171 return Builder.CreateAdd(Ops[0], Ops[1]); 6172 case NEON::BI__builtin_neon_vrsra_n_v: 6173 case NEON::BI__builtin_neon_vrsraq_n_v: { 6174 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl; 6175 SmallVector<llvm::Value*,2> TmpOps; 6176 TmpOps.push_back(Ops[1]); 6177 TmpOps.push_back(Ops[2]); 6178 Function* F = CGM.getIntrinsic(Int, Ty); 6179 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true); 6180 Ops[0] = Builder.CreateBitCast(Ops[0], VTy); 6181 return Builder.CreateAdd(Ops[0], tmp); 6182 } 6183 // FIXME: Sharing loads & stores with 32-bit is complicated by the absence 6184 // of an Align parameter here. 6185 case NEON::BI__builtin_neon_vld1_x2_v: 6186 case NEON::BI__builtin_neon_vld1q_x2_v: 6187 case NEON::BI__builtin_neon_vld1_x3_v: 6188 case NEON::BI__builtin_neon_vld1q_x3_v: 6189 case NEON::BI__builtin_neon_vld1_x4_v: 6190 case NEON::BI__builtin_neon_vld1q_x4_v: { 6191 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 6192 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6193 llvm::Type *Tys[2] = { VTy, PTy }; 6194 unsigned Int; 6195 switch (BuiltinID) { 6196 case NEON::BI__builtin_neon_vld1_x2_v: 6197 case NEON::BI__builtin_neon_vld1q_x2_v: 6198 Int = Intrinsic::aarch64_neon_ld1x2; 6199 break; 6200 case NEON::BI__builtin_neon_vld1_x3_v: 6201 case NEON::BI__builtin_neon_vld1q_x3_v: 6202 Int = Intrinsic::aarch64_neon_ld1x3; 6203 break; 6204 case NEON::BI__builtin_neon_vld1_x4_v: 6205 case NEON::BI__builtin_neon_vld1q_x4_v: 6206 Int = Intrinsic::aarch64_neon_ld1x4; 6207 break; 6208 } 6209 Function *F = CGM.getIntrinsic(Int, Tys); 6210 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN"); 6211 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6212 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6213 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6214 } 6215 case NEON::BI__builtin_neon_vst1_x2_v: 6216 case NEON::BI__builtin_neon_vst1q_x2_v: 6217 case NEON::BI__builtin_neon_vst1_x3_v: 6218 case NEON::BI__builtin_neon_vst1q_x3_v: 6219 case NEON::BI__builtin_neon_vst1_x4_v: 6220 case NEON::BI__builtin_neon_vst1q_x4_v: { 6221 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 6222 llvm::Type *Tys[2] = { VTy, PTy }; 6223 unsigned Int; 6224 switch (BuiltinID) { 6225 case NEON::BI__builtin_neon_vst1_x2_v: 6226 case NEON::BI__builtin_neon_vst1q_x2_v: 6227 Int = Intrinsic::aarch64_neon_st1x2; 6228 break; 6229 case NEON::BI__builtin_neon_vst1_x3_v: 6230 case NEON::BI__builtin_neon_vst1q_x3_v: 6231 Int = Intrinsic::aarch64_neon_st1x3; 6232 break; 6233 case NEON::BI__builtin_neon_vst1_x4_v: 6234 case NEON::BI__builtin_neon_vst1q_x4_v: 6235 Int = Intrinsic::aarch64_neon_st1x4; 6236 break; 6237 } 6238 std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end()); 6239 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, ""); 6240 } 6241 case NEON::BI__builtin_neon_vld1_v: 6242 case NEON::BI__builtin_neon_vld1q_v: 6243 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 6244 return Builder.CreateDefaultAlignedLoad(Ops[0]); 6245 case NEON::BI__builtin_neon_vst1_v: 6246 case NEON::BI__builtin_neon_vst1q_v: 6247 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 6248 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 6249 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6250 case NEON::BI__builtin_neon_vld1_lane_v: 6251 case NEON::BI__builtin_neon_vld1q_lane_v: 6252 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6253 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 6254 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6255 Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]); 6256 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); 6257 case NEON::BI__builtin_neon_vld1_dup_v: 6258 case NEON::BI__builtin_neon_vld1q_dup_v: { 6259 Value *V = UndefValue::get(Ty); 6260 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 6261 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6262 Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]); 6263 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 6264 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); 6265 return EmitNeonSplat(Ops[0], CI); 6266 } 6267 case NEON::BI__builtin_neon_vst1_lane_v: 6268 case NEON::BI__builtin_neon_vst1q_lane_v: 6269 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6270 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 6271 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6272 return Builder.CreateDefaultAlignedStore(Ops[1], 6273 Builder.CreateBitCast(Ops[0], Ty)); 6274 case NEON::BI__builtin_neon_vld2_v: 6275 case NEON::BI__builtin_neon_vld2q_v: { 6276 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 6277 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6278 llvm::Type *Tys[2] = { VTy, PTy }; 6279 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys); 6280 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 6281 Ops[0] = Builder.CreateBitCast(Ops[0], 6282 llvm::PointerType::getUnqual(Ops[1]->getType())); 6283 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6284 } 6285 case NEON::BI__builtin_neon_vld3_v: 6286 case NEON::BI__builtin_neon_vld3q_v: { 6287 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 6288 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6289 llvm::Type *Tys[2] = { VTy, PTy }; 6290 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys); 6291 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 6292 Ops[0] = Builder.CreateBitCast(Ops[0], 6293 llvm::PointerType::getUnqual(Ops[1]->getType())); 6294 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6295 } 6296 case NEON::BI__builtin_neon_vld4_v: 6297 case NEON::BI__builtin_neon_vld4q_v: { 6298 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 6299 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6300 llvm::Type *Tys[2] = { VTy, PTy }; 6301 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys); 6302 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 6303 Ops[0] = Builder.CreateBitCast(Ops[0], 6304 llvm::PointerType::getUnqual(Ops[1]->getType())); 6305 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6306 } 6307 case NEON::BI__builtin_neon_vld2_dup_v: 6308 case NEON::BI__builtin_neon_vld2q_dup_v: { 6309 llvm::Type *PTy = 6310 llvm::PointerType::getUnqual(VTy->getElementType()); 6311 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6312 llvm::Type *Tys[2] = { VTy, PTy }; 6313 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys); 6314 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 6315 Ops[0] = Builder.CreateBitCast(Ops[0], 6316 llvm::PointerType::getUnqual(Ops[1]->getType())); 6317 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6318 } 6319 case NEON::BI__builtin_neon_vld3_dup_v: 6320 case NEON::BI__builtin_neon_vld3q_dup_v: { 6321 llvm::Type *PTy = 6322 llvm::PointerType::getUnqual(VTy->getElementType()); 6323 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6324 llvm::Type *Tys[2] = { VTy, PTy }; 6325 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys); 6326 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 6327 Ops[0] = Builder.CreateBitCast(Ops[0], 6328 llvm::PointerType::getUnqual(Ops[1]->getType())); 6329 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6330 } 6331 case NEON::BI__builtin_neon_vld4_dup_v: 6332 case NEON::BI__builtin_neon_vld4q_dup_v: { 6333 llvm::Type *PTy = 6334 llvm::PointerType::getUnqual(VTy->getElementType()); 6335 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 6336 llvm::Type *Tys[2] = { VTy, PTy }; 6337 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys); 6338 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 6339 Ops[0] = Builder.CreateBitCast(Ops[0], 6340 llvm::PointerType::getUnqual(Ops[1]->getType())); 6341 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6342 } 6343 case NEON::BI__builtin_neon_vld2_lane_v: 6344 case NEON::BI__builtin_neon_vld2q_lane_v: { 6345 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 6346 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys); 6347 Ops.push_back(Ops[1]); 6348 Ops.erase(Ops.begin()+1); 6349 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6350 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6351 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 6352 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane"); 6353 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6354 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6355 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6356 } 6357 case NEON::BI__builtin_neon_vld3_lane_v: 6358 case NEON::BI__builtin_neon_vld3q_lane_v: { 6359 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 6360 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys); 6361 Ops.push_back(Ops[1]); 6362 Ops.erase(Ops.begin()+1); 6363 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6364 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6365 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 6366 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); 6367 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane"); 6368 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6369 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6370 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6371 } 6372 case NEON::BI__builtin_neon_vld4_lane_v: 6373 case NEON::BI__builtin_neon_vld4q_lane_v: { 6374 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 6375 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys); 6376 Ops.push_back(Ops[1]); 6377 Ops.erase(Ops.begin()+1); 6378 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6379 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6380 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 6381 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 6382 Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty); 6383 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane"); 6384 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 6385 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 6386 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 6387 } 6388 case NEON::BI__builtin_neon_vst2_v: 6389 case NEON::BI__builtin_neon_vst2q_v: { 6390 Ops.push_back(Ops[0]); 6391 Ops.erase(Ops.begin()); 6392 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() }; 6393 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys), 6394 Ops, ""); 6395 } 6396 case NEON::BI__builtin_neon_vst2_lane_v: 6397 case NEON::BI__builtin_neon_vst2q_lane_v: { 6398 Ops.push_back(Ops[0]); 6399 Ops.erase(Ops.begin()); 6400 Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); 6401 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 6402 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys), 6403 Ops, ""); 6404 } 6405 case NEON::BI__builtin_neon_vst3_v: 6406 case NEON::BI__builtin_neon_vst3q_v: { 6407 Ops.push_back(Ops[0]); 6408 Ops.erase(Ops.begin()); 6409 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 6410 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys), 6411 Ops, ""); 6412 } 6413 case NEON::BI__builtin_neon_vst3_lane_v: 6414 case NEON::BI__builtin_neon_vst3q_lane_v: { 6415 Ops.push_back(Ops[0]); 6416 Ops.erase(Ops.begin()); 6417 Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty); 6418 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 6419 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys), 6420 Ops, ""); 6421 } 6422 case NEON::BI__builtin_neon_vst4_v: 6423 case NEON::BI__builtin_neon_vst4q_v: { 6424 Ops.push_back(Ops[0]); 6425 Ops.erase(Ops.begin()); 6426 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 6427 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys), 6428 Ops, ""); 6429 } 6430 case NEON::BI__builtin_neon_vst4_lane_v: 6431 case NEON::BI__builtin_neon_vst4q_lane_v: { 6432 Ops.push_back(Ops[0]); 6433 Ops.erase(Ops.begin()); 6434 Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty); 6435 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() }; 6436 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys), 6437 Ops, ""); 6438 } 6439 case NEON::BI__builtin_neon_vtrn_v: 6440 case NEON::BI__builtin_neon_vtrnq_v: { 6441 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 6442 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6443 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6444 Value *SV = nullptr; 6445 6446 for (unsigned vi = 0; vi != 2; ++vi) { 6447 SmallVector<uint32_t, 16> Indices; 6448 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 6449 Indices.push_back(i+vi); 6450 Indices.push_back(i+e+vi); 6451 } 6452 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 6453 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn"); 6454 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 6455 } 6456 return SV; 6457 } 6458 case NEON::BI__builtin_neon_vuzp_v: 6459 case NEON::BI__builtin_neon_vuzpq_v: { 6460 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 6461 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6462 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6463 Value *SV = nullptr; 6464 6465 for (unsigned vi = 0; vi != 2; ++vi) { 6466 SmallVector<uint32_t, 16> Indices; 6467 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 6468 Indices.push_back(2*i+vi); 6469 6470 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 6471 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp"); 6472 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 6473 } 6474 return SV; 6475 } 6476 case NEON::BI__builtin_neon_vzip_v: 6477 case NEON::BI__builtin_neon_vzipq_v: { 6478 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 6479 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 6480 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 6481 Value *SV = nullptr; 6482 6483 for (unsigned vi = 0; vi != 2; ++vi) { 6484 SmallVector<uint32_t, 16> Indices; 6485 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 6486 Indices.push_back((i + vi*e) >> 1); 6487 Indices.push_back(((i + vi*e) >> 1)+e); 6488 } 6489 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 6490 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip"); 6491 SV = Builder.CreateDefaultAlignedStore(SV, Addr); 6492 } 6493 return SV; 6494 } 6495 case NEON::BI__builtin_neon_vqtbl1q_v: { 6496 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty), 6497 Ops, "vtbl1"); 6498 } 6499 case NEON::BI__builtin_neon_vqtbl2q_v: { 6500 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty), 6501 Ops, "vtbl2"); 6502 } 6503 case NEON::BI__builtin_neon_vqtbl3q_v: { 6504 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty), 6505 Ops, "vtbl3"); 6506 } 6507 case NEON::BI__builtin_neon_vqtbl4q_v: { 6508 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty), 6509 Ops, "vtbl4"); 6510 } 6511 case NEON::BI__builtin_neon_vqtbx1q_v: { 6512 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty), 6513 Ops, "vtbx1"); 6514 } 6515 case NEON::BI__builtin_neon_vqtbx2q_v: { 6516 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty), 6517 Ops, "vtbx2"); 6518 } 6519 case NEON::BI__builtin_neon_vqtbx3q_v: { 6520 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty), 6521 Ops, "vtbx3"); 6522 } 6523 case NEON::BI__builtin_neon_vqtbx4q_v: { 6524 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty), 6525 Ops, "vtbx4"); 6526 } 6527 case NEON::BI__builtin_neon_vsqadd_v: 6528 case NEON::BI__builtin_neon_vsqaddq_v: { 6529 Int = Intrinsic::aarch64_neon_usqadd; 6530 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd"); 6531 } 6532 case NEON::BI__builtin_neon_vuqadd_v: 6533 case NEON::BI__builtin_neon_vuqaddq_v: { 6534 Int = Intrinsic::aarch64_neon_suqadd; 6535 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); 6536 } 6537 } 6538 } 6539 6540 llvm::Value *CodeGenFunction:: 6541 BuildVector(ArrayRef<llvm::Value*> Ops) { 6542 assert((Ops.size() & (Ops.size() - 1)) == 0 && 6543 "Not a power-of-two sized vector!"); 6544 bool AllConstants = true; 6545 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i) 6546 AllConstants &= isa<Constant>(Ops[i]); 6547 6548 // If this is a constant vector, create a ConstantVector. 6549 if (AllConstants) { 6550 SmallVector<llvm::Constant*, 16> CstOps; 6551 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 6552 CstOps.push_back(cast<Constant>(Ops[i])); 6553 return llvm::ConstantVector::get(CstOps); 6554 } 6555 6556 // Otherwise, insertelement the values to build the vector. 6557 Value *Result = 6558 llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size())); 6559 6560 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 6561 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i)); 6562 6563 return Result; 6564 } 6565 6566 // Convert the mask from an integer type to a vector of i1. 6567 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask, 6568 unsigned NumElts) { 6569 6570 llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(), 6571 cast<IntegerType>(Mask->getType())->getBitWidth()); 6572 Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy); 6573 6574 // If we have less than 8 elements, then the starting mask was an i8 and 6575 // we need to extract down to the right number of elements. 6576 if (NumElts < 8) { 6577 uint32_t Indices[4]; 6578 for (unsigned i = 0; i != NumElts; ++i) 6579 Indices[i] = i; 6580 MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec, 6581 makeArrayRef(Indices, NumElts), 6582 "extract"); 6583 } 6584 return MaskVec; 6585 } 6586 6587 static Value *EmitX86MaskedStore(CodeGenFunction &CGF, 6588 SmallVectorImpl<Value *> &Ops, 6589 unsigned Align) { 6590 // Cast the pointer to right type. 6591 Ops[0] = CGF.Builder.CreateBitCast(Ops[0], 6592 llvm::PointerType::getUnqual(Ops[1]->getType())); 6593 6594 // If the mask is all ones just emit a regular store. 6595 if (const auto *C = dyn_cast<Constant>(Ops[2])) 6596 if (C->isAllOnesValue()) 6597 return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align); 6598 6599 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 6600 Ops[1]->getType()->getVectorNumElements()); 6601 6602 return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec); 6603 } 6604 6605 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF, 6606 SmallVectorImpl<Value *> &Ops, unsigned Align) { 6607 // Cast the pointer to right type. 6608 Ops[0] = CGF.Builder.CreateBitCast(Ops[0], 6609 llvm::PointerType::getUnqual(Ops[1]->getType())); 6610 6611 // If the mask is all ones just emit a regular store. 6612 if (const auto *C = dyn_cast<Constant>(Ops[2])) 6613 if (C->isAllOnesValue()) 6614 return CGF.Builder.CreateAlignedLoad(Ops[0], Align); 6615 6616 Value *MaskVec = getMaskVecValue(CGF, Ops[2], 6617 Ops[1]->getType()->getVectorNumElements()); 6618 6619 return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]); 6620 } 6621 6622 static Value *EmitX86Select(CodeGenFunction &CGF, 6623 Value *Mask, Value *Op0, Value *Op1) { 6624 6625 // If the mask is all ones just return first argument. 6626 if (const auto *C = dyn_cast<Constant>(Mask)) 6627 if (C->isAllOnesValue()) 6628 return Op0; 6629 6630 Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements()); 6631 6632 return CGF.Builder.CreateSelect(Mask, Op0, Op1); 6633 } 6634 6635 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, 6636 bool Signed, SmallVectorImpl<Value *> &Ops) { 6637 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 6638 Value *Cmp; 6639 6640 if (CC == 3) { 6641 Cmp = Constant::getNullValue( 6642 llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); 6643 } else if (CC == 7) { 6644 Cmp = Constant::getAllOnesValue( 6645 llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts)); 6646 } else { 6647 ICmpInst::Predicate Pred; 6648 switch (CC) { 6649 default: llvm_unreachable("Unknown condition code"); 6650 case 0: Pred = ICmpInst::ICMP_EQ; break; 6651 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; 6652 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; 6653 case 4: Pred = ICmpInst::ICMP_NE; break; 6654 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; 6655 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; 6656 } 6657 Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]); 6658 } 6659 6660 const auto *C = dyn_cast<Constant>(Ops.back()); 6661 if (!C || !C->isAllOnesValue()) 6662 Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts)); 6663 6664 if (NumElts < 8) { 6665 uint32_t Indices[8]; 6666 for (unsigned i = 0; i != NumElts; ++i) 6667 Indices[i] = i; 6668 for (unsigned i = NumElts; i != 8; ++i) 6669 Indices[i] = i % NumElts + NumElts; 6670 Cmp = CGF.Builder.CreateShuffleVector( 6671 Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices); 6672 } 6673 return CGF.Builder.CreateBitCast(Cmp, 6674 IntegerType::get(CGF.getLLVMContext(), 6675 std::max(NumElts, 8U))); 6676 } 6677 6678 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, 6679 const CallExpr *E) { 6680 if (BuiltinID == X86::BI__builtin_ms_va_start || 6681 BuiltinID == X86::BI__builtin_ms_va_end) 6682 return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(), 6683 BuiltinID == X86::BI__builtin_ms_va_start); 6684 if (BuiltinID == X86::BI__builtin_ms_va_copy) { 6685 // Lower this manually. We can't reliably determine whether or not any 6686 // given va_copy() is for a Win64 va_list from the calling convention 6687 // alone, because it's legal to do this from a System V ABI function. 6688 // With opaque pointer types, we won't have enough information in LLVM 6689 // IR to determine this from the argument types, either. Best to do it 6690 // now, while we have enough information. 6691 Address DestAddr = EmitMSVAListRef(E->getArg(0)); 6692 Address SrcAddr = EmitMSVAListRef(E->getArg(1)); 6693 6694 llvm::Type *BPP = Int8PtrPtrTy; 6695 6696 DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"), 6697 DestAddr.getAlignment()); 6698 SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"), 6699 SrcAddr.getAlignment()); 6700 6701 Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val"); 6702 return Builder.CreateStore(ArgPtr, DestAddr); 6703 } 6704 6705 SmallVector<Value*, 4> Ops; 6706 6707 // Find out if any arguments are required to be integer constant expressions. 6708 unsigned ICEArguments = 0; 6709 ASTContext::GetBuiltinTypeError Error; 6710 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 6711 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 6712 6713 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 6714 // If this is a normal argument, just emit it as a scalar. 6715 if ((ICEArguments & (1 << i)) == 0) { 6716 Ops.push_back(EmitScalarExpr(E->getArg(i))); 6717 continue; 6718 } 6719 6720 // If this is required to be a constant, constant fold it so that we know 6721 // that the generated intrinsic gets a ConstantInt. 6722 llvm::APSInt Result; 6723 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 6724 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 6725 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 6726 } 6727 6728 // These exist so that the builtin that takes an immediate can be bounds 6729 // checked by clang to avoid passing bad immediates to the backend. Since 6730 // AVX has a larger immediate than SSE we would need separate builtins to 6731 // do the different bounds checking. Rather than create a clang specific 6732 // SSE only builtin, this implements eight separate builtins to match gcc 6733 // implementation. 6734 auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) { 6735 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm)); 6736 llvm::Function *F = CGM.getIntrinsic(ID); 6737 return Builder.CreateCall(F, Ops); 6738 }; 6739 6740 // For the vector forms of FP comparisons, translate the builtins directly to 6741 // IR. 6742 // TODO: The builtins could be removed if the SSE header files used vector 6743 // extension comparisons directly (vector ordered/unordered may need 6744 // additional support via __builtin_isnan()). 6745 auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) { 6746 Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]); 6747 llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType()); 6748 llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy); 6749 Value *Sext = Builder.CreateSExt(Cmp, IntVecTy); 6750 return Builder.CreateBitCast(Sext, FPVecTy); 6751 }; 6752 6753 switch (BuiltinID) { 6754 default: return nullptr; 6755 case X86::BI__builtin_cpu_supports: { 6756 const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts(); 6757 StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString(); 6758 6759 // TODO: When/if this becomes more than x86 specific then use a TargetInfo 6760 // based mapping. 6761 // Processor features and mapping to processor feature value. 6762 enum X86Features { 6763 CMOV = 0, 6764 MMX, 6765 POPCNT, 6766 SSE, 6767 SSE2, 6768 SSE3, 6769 SSSE3, 6770 SSE4_1, 6771 SSE4_2, 6772 AVX, 6773 AVX2, 6774 SSE4_A, 6775 FMA4, 6776 XOP, 6777 FMA, 6778 AVX512F, 6779 BMI, 6780 BMI2, 6781 AES, 6782 PCLMUL, 6783 AVX512VL, 6784 AVX512BW, 6785 AVX512DQ, 6786 AVX512CD, 6787 AVX512ER, 6788 AVX512PF, 6789 AVX512VBMI, 6790 AVX512IFMA, 6791 MAX 6792 }; 6793 6794 X86Features Feature = StringSwitch<X86Features>(FeatureStr) 6795 .Case("cmov", X86Features::CMOV) 6796 .Case("mmx", X86Features::MMX) 6797 .Case("popcnt", X86Features::POPCNT) 6798 .Case("sse", X86Features::SSE) 6799 .Case("sse2", X86Features::SSE2) 6800 .Case("sse3", X86Features::SSE3) 6801 .Case("ssse3", X86Features::SSSE3) 6802 .Case("sse4.1", X86Features::SSE4_1) 6803 .Case("sse4.2", X86Features::SSE4_2) 6804 .Case("avx", X86Features::AVX) 6805 .Case("avx2", X86Features::AVX2) 6806 .Case("sse4a", X86Features::SSE4_A) 6807 .Case("fma4", X86Features::FMA4) 6808 .Case("xop", X86Features::XOP) 6809 .Case("fma", X86Features::FMA) 6810 .Case("avx512f", X86Features::AVX512F) 6811 .Case("bmi", X86Features::BMI) 6812 .Case("bmi2", X86Features::BMI2) 6813 .Case("aes", X86Features::AES) 6814 .Case("pclmul", X86Features::PCLMUL) 6815 .Case("avx512vl", X86Features::AVX512VL) 6816 .Case("avx512bw", X86Features::AVX512BW) 6817 .Case("avx512dq", X86Features::AVX512DQ) 6818 .Case("avx512cd", X86Features::AVX512CD) 6819 .Case("avx512er", X86Features::AVX512ER) 6820 .Case("avx512pf", X86Features::AVX512PF) 6821 .Case("avx512vbmi", X86Features::AVX512VBMI) 6822 .Case("avx512ifma", X86Features::AVX512IFMA) 6823 .Default(X86Features::MAX); 6824 assert(Feature != X86Features::MAX && "Invalid feature!"); 6825 6826 // Matching the struct layout from the compiler-rt/libgcc structure that is 6827 // filled in: 6828 // unsigned int __cpu_vendor; 6829 // unsigned int __cpu_type; 6830 // unsigned int __cpu_subtype; 6831 // unsigned int __cpu_features[1]; 6832 llvm::Type *STy = llvm::StructType::get( 6833 Int32Ty, Int32Ty, Int32Ty, llvm::ArrayType::get(Int32Ty, 1), nullptr); 6834 6835 // Grab the global __cpu_model. 6836 llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model"); 6837 6838 // Grab the first (0th) element from the field __cpu_features off of the 6839 // global in the struct STy. 6840 Value *Idxs[] = { 6841 ConstantInt::get(Int32Ty, 0), 6842 ConstantInt::get(Int32Ty, 3), 6843 ConstantInt::get(Int32Ty, 0) 6844 }; 6845 Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs); 6846 Value *Features = Builder.CreateAlignedLoad(CpuFeatures, 6847 CharUnits::fromQuantity(4)); 6848 6849 // Check the value of the bit corresponding to the feature requested. 6850 Value *Bitset = Builder.CreateAnd( 6851 Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature)); 6852 return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0)); 6853 } 6854 case X86::BI_mm_prefetch: { 6855 Value *Address = Ops[0]; 6856 Value *RW = ConstantInt::get(Int32Ty, 0); 6857 Value *Locality = Ops[1]; 6858 Value *Data = ConstantInt::get(Int32Ty, 1); 6859 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 6860 return Builder.CreateCall(F, {Address, RW, Locality, Data}); 6861 } 6862 case X86::BI__builtin_ia32_undef128: 6863 case X86::BI__builtin_ia32_undef256: 6864 case X86::BI__builtin_ia32_undef512: 6865 return UndefValue::get(ConvertType(E->getType())); 6866 case X86::BI__builtin_ia32_vec_init_v8qi: 6867 case X86::BI__builtin_ia32_vec_init_v4hi: 6868 case X86::BI__builtin_ia32_vec_init_v2si: 6869 return Builder.CreateBitCast(BuildVector(Ops), 6870 llvm::Type::getX86_MMXTy(getLLVMContext())); 6871 case X86::BI__builtin_ia32_vec_ext_v2si: 6872 return Builder.CreateExtractElement(Ops[0], 6873 llvm::ConstantInt::get(Ops[1]->getType(), 0)); 6874 case X86::BI__builtin_ia32_ldmxcsr: { 6875 Address Tmp = CreateMemTemp(E->getArg(0)->getType()); 6876 Builder.CreateStore(Ops[0], Tmp); 6877 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), 6878 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); 6879 } 6880 case X86::BI__builtin_ia32_stmxcsr: { 6881 Address Tmp = CreateMemTemp(E->getType()); 6882 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), 6883 Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy)); 6884 return Builder.CreateLoad(Tmp, "stmxcsr"); 6885 } 6886 case X86::BI__builtin_ia32_xsave: 6887 case X86::BI__builtin_ia32_xsave64: 6888 case X86::BI__builtin_ia32_xrstor: 6889 case X86::BI__builtin_ia32_xrstor64: 6890 case X86::BI__builtin_ia32_xsaveopt: 6891 case X86::BI__builtin_ia32_xsaveopt64: 6892 case X86::BI__builtin_ia32_xrstors: 6893 case X86::BI__builtin_ia32_xrstors64: 6894 case X86::BI__builtin_ia32_xsavec: 6895 case X86::BI__builtin_ia32_xsavec64: 6896 case X86::BI__builtin_ia32_xsaves: 6897 case X86::BI__builtin_ia32_xsaves64: { 6898 Intrinsic::ID ID; 6899 #define INTRINSIC_X86_XSAVE_ID(NAME) \ 6900 case X86::BI__builtin_ia32_##NAME: \ 6901 ID = Intrinsic::x86_##NAME; \ 6902 break 6903 switch (BuiltinID) { 6904 default: llvm_unreachable("Unsupported intrinsic!"); 6905 INTRINSIC_X86_XSAVE_ID(xsave); 6906 INTRINSIC_X86_XSAVE_ID(xsave64); 6907 INTRINSIC_X86_XSAVE_ID(xrstor); 6908 INTRINSIC_X86_XSAVE_ID(xrstor64); 6909 INTRINSIC_X86_XSAVE_ID(xsaveopt); 6910 INTRINSIC_X86_XSAVE_ID(xsaveopt64); 6911 INTRINSIC_X86_XSAVE_ID(xrstors); 6912 INTRINSIC_X86_XSAVE_ID(xrstors64); 6913 INTRINSIC_X86_XSAVE_ID(xsavec); 6914 INTRINSIC_X86_XSAVE_ID(xsavec64); 6915 INTRINSIC_X86_XSAVE_ID(xsaves); 6916 INTRINSIC_X86_XSAVE_ID(xsaves64); 6917 } 6918 #undef INTRINSIC_X86_XSAVE_ID 6919 Value *Mhi = Builder.CreateTrunc( 6920 Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty); 6921 Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty); 6922 Ops[1] = Mhi; 6923 Ops.push_back(Mlo); 6924 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 6925 } 6926 case X86::BI__builtin_ia32_storedqudi128_mask: 6927 case X86::BI__builtin_ia32_storedqusi128_mask: 6928 case X86::BI__builtin_ia32_storedquhi128_mask: 6929 case X86::BI__builtin_ia32_storedquqi128_mask: 6930 case X86::BI__builtin_ia32_storeupd128_mask: 6931 case X86::BI__builtin_ia32_storeups128_mask: 6932 case X86::BI__builtin_ia32_storedqudi256_mask: 6933 case X86::BI__builtin_ia32_storedqusi256_mask: 6934 case X86::BI__builtin_ia32_storedquhi256_mask: 6935 case X86::BI__builtin_ia32_storedquqi256_mask: 6936 case X86::BI__builtin_ia32_storeupd256_mask: 6937 case X86::BI__builtin_ia32_storeups256_mask: 6938 case X86::BI__builtin_ia32_storedqudi512_mask: 6939 case X86::BI__builtin_ia32_storedqusi512_mask: 6940 case X86::BI__builtin_ia32_storedquhi512_mask: 6941 case X86::BI__builtin_ia32_storedquqi512_mask: 6942 case X86::BI__builtin_ia32_storeupd512_mask: 6943 case X86::BI__builtin_ia32_storeups512_mask: 6944 return EmitX86MaskedStore(*this, Ops, 1); 6945 6946 case X86::BI__builtin_ia32_movdqa32store128_mask: 6947 case X86::BI__builtin_ia32_movdqa64store128_mask: 6948 case X86::BI__builtin_ia32_storeaps128_mask: 6949 case X86::BI__builtin_ia32_storeapd128_mask: 6950 case X86::BI__builtin_ia32_movdqa32store256_mask: 6951 case X86::BI__builtin_ia32_movdqa64store256_mask: 6952 case X86::BI__builtin_ia32_storeaps256_mask: 6953 case X86::BI__builtin_ia32_storeapd256_mask: 6954 case X86::BI__builtin_ia32_movdqa32store512_mask: 6955 case X86::BI__builtin_ia32_movdqa64store512_mask: 6956 case X86::BI__builtin_ia32_storeaps512_mask: 6957 case X86::BI__builtin_ia32_storeapd512_mask: { 6958 unsigned Align = 6959 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); 6960 return EmitX86MaskedStore(*this, Ops, Align); 6961 } 6962 case X86::BI__builtin_ia32_loadups128_mask: 6963 case X86::BI__builtin_ia32_loadups256_mask: 6964 case X86::BI__builtin_ia32_loadups512_mask: 6965 case X86::BI__builtin_ia32_loadupd128_mask: 6966 case X86::BI__builtin_ia32_loadupd256_mask: 6967 case X86::BI__builtin_ia32_loadupd512_mask: 6968 case X86::BI__builtin_ia32_loaddquqi128_mask: 6969 case X86::BI__builtin_ia32_loaddquqi256_mask: 6970 case X86::BI__builtin_ia32_loaddquqi512_mask: 6971 case X86::BI__builtin_ia32_loaddquhi128_mask: 6972 case X86::BI__builtin_ia32_loaddquhi256_mask: 6973 case X86::BI__builtin_ia32_loaddquhi512_mask: 6974 case X86::BI__builtin_ia32_loaddqusi128_mask: 6975 case X86::BI__builtin_ia32_loaddqusi256_mask: 6976 case X86::BI__builtin_ia32_loaddqusi512_mask: 6977 case X86::BI__builtin_ia32_loaddqudi128_mask: 6978 case X86::BI__builtin_ia32_loaddqudi256_mask: 6979 case X86::BI__builtin_ia32_loaddqudi512_mask: 6980 return EmitX86MaskedLoad(*this, Ops, 1); 6981 6982 case X86::BI__builtin_ia32_loadaps128_mask: 6983 case X86::BI__builtin_ia32_loadaps256_mask: 6984 case X86::BI__builtin_ia32_loadaps512_mask: 6985 case X86::BI__builtin_ia32_loadapd128_mask: 6986 case X86::BI__builtin_ia32_loadapd256_mask: 6987 case X86::BI__builtin_ia32_loadapd512_mask: 6988 case X86::BI__builtin_ia32_movdqa32load128_mask: 6989 case X86::BI__builtin_ia32_movdqa32load256_mask: 6990 case X86::BI__builtin_ia32_movdqa32load512_mask: 6991 case X86::BI__builtin_ia32_movdqa64load128_mask: 6992 case X86::BI__builtin_ia32_movdqa64load256_mask: 6993 case X86::BI__builtin_ia32_movdqa64load512_mask: { 6994 unsigned Align = 6995 getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity(); 6996 return EmitX86MaskedLoad(*this, Ops, Align); 6997 } 6998 case X86::BI__builtin_ia32_storehps: 6999 case X86::BI__builtin_ia32_storelps: { 7000 llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); 7001 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 7002 7003 // cast val v2i64 7004 Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast"); 7005 7006 // extract (0, 1) 7007 unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; 7008 llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index); 7009 Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract"); 7010 7011 // cast pointer to i64 & store 7012 Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); 7013 return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]); 7014 } 7015 case X86::BI__builtin_ia32_palignr128: 7016 case X86::BI__builtin_ia32_palignr256: 7017 case X86::BI__builtin_ia32_palignr128_mask: 7018 case X86::BI__builtin_ia32_palignr256_mask: 7019 case X86::BI__builtin_ia32_palignr512_mask: { 7020 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 7021 7022 unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); 7023 assert(NumElts % 16 == 0); 7024 7025 // If palignr is shifting the pair of vectors more than the size of two 7026 // lanes, emit zero. 7027 if (ShiftVal >= 32) 7028 return llvm::Constant::getNullValue(ConvertType(E->getType())); 7029 7030 // If palignr is shifting the pair of input vectors more than one lane, 7031 // but less than two lanes, convert to shifting in zeroes. 7032 if (ShiftVal > 16) { 7033 ShiftVal -= 16; 7034 Ops[1] = Ops[0]; 7035 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType()); 7036 } 7037 7038 uint32_t Indices[64]; 7039 // 256-bit palignr operates on 128-bit lanes so we need to handle that 7040 for (unsigned l = 0; l != NumElts; l += 16) { 7041 for (unsigned i = 0; i != 16; ++i) { 7042 unsigned Idx = ShiftVal + i; 7043 if (Idx >= 16) 7044 Idx += NumElts - 16; // End of lane, switch operand. 7045 Indices[l + i] = Idx + l; 7046 } 7047 } 7048 7049 Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0], 7050 makeArrayRef(Indices, NumElts), 7051 "palignr"); 7052 7053 // If this isn't a masked builtin, just return the align operation. 7054 if (Ops.size() == 3) 7055 return Align; 7056 7057 return EmitX86Select(*this, Ops[4], Align, Ops[3]); 7058 } 7059 7060 case X86::BI__builtin_ia32_movnti: 7061 case X86::BI__builtin_ia32_movnti64: { 7062 llvm::MDNode *Node = llvm::MDNode::get( 7063 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); 7064 7065 // Convert the type of the pointer to a pointer to the stored type. 7066 Value *BC = Builder.CreateBitCast(Ops[0], 7067 llvm::PointerType::getUnqual(Ops[1]->getType()), 7068 "cast"); 7069 StoreInst *SI = Builder.CreateDefaultAlignedStore(Ops[1], BC); 7070 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); 7071 7072 // No alignment for scalar intrinsic store. 7073 SI->setAlignment(1); 7074 return SI; 7075 } 7076 case X86::BI__builtin_ia32_movntsd: 7077 case X86::BI__builtin_ia32_movntss: { 7078 llvm::MDNode *Node = llvm::MDNode::get( 7079 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); 7080 7081 // Extract the 0'th element of the source vector. 7082 Value *Scl = Builder.CreateExtractElement(Ops[1], (uint64_t)0, "extract"); 7083 7084 // Convert the type of the pointer to a pointer to the stored type. 7085 Value *BC = Builder.CreateBitCast(Ops[0], 7086 llvm::PointerType::getUnqual(Scl->getType()), 7087 "cast"); 7088 7089 // Unaligned nontemporal store of the scalar value. 7090 StoreInst *SI = Builder.CreateDefaultAlignedStore(Scl, BC); 7091 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); 7092 SI->setAlignment(1); 7093 return SI; 7094 } 7095 7096 case X86::BI__builtin_ia32_selectb_128: 7097 case X86::BI__builtin_ia32_selectb_256: 7098 case X86::BI__builtin_ia32_selectb_512: 7099 case X86::BI__builtin_ia32_selectw_128: 7100 case X86::BI__builtin_ia32_selectw_256: 7101 case X86::BI__builtin_ia32_selectw_512: 7102 case X86::BI__builtin_ia32_selectd_128: 7103 case X86::BI__builtin_ia32_selectd_256: 7104 case X86::BI__builtin_ia32_selectd_512: 7105 case X86::BI__builtin_ia32_selectq_128: 7106 case X86::BI__builtin_ia32_selectq_256: 7107 case X86::BI__builtin_ia32_selectq_512: 7108 case X86::BI__builtin_ia32_selectps_128: 7109 case X86::BI__builtin_ia32_selectps_256: 7110 case X86::BI__builtin_ia32_selectps_512: 7111 case X86::BI__builtin_ia32_selectpd_128: 7112 case X86::BI__builtin_ia32_selectpd_256: 7113 case X86::BI__builtin_ia32_selectpd_512: 7114 return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]); 7115 case X86::BI__builtin_ia32_pcmpeqb128_mask: 7116 case X86::BI__builtin_ia32_pcmpeqb256_mask: 7117 case X86::BI__builtin_ia32_pcmpeqb512_mask: 7118 case X86::BI__builtin_ia32_pcmpeqw128_mask: 7119 case X86::BI__builtin_ia32_pcmpeqw256_mask: 7120 case X86::BI__builtin_ia32_pcmpeqw512_mask: 7121 case X86::BI__builtin_ia32_pcmpeqd128_mask: 7122 case X86::BI__builtin_ia32_pcmpeqd256_mask: 7123 case X86::BI__builtin_ia32_pcmpeqd512_mask: 7124 case X86::BI__builtin_ia32_pcmpeqq128_mask: 7125 case X86::BI__builtin_ia32_pcmpeqq256_mask: 7126 case X86::BI__builtin_ia32_pcmpeqq512_mask: 7127 return EmitX86MaskedCompare(*this, 0, false, Ops); 7128 case X86::BI__builtin_ia32_pcmpgtb128_mask: 7129 case X86::BI__builtin_ia32_pcmpgtb256_mask: 7130 case X86::BI__builtin_ia32_pcmpgtb512_mask: 7131 case X86::BI__builtin_ia32_pcmpgtw128_mask: 7132 case X86::BI__builtin_ia32_pcmpgtw256_mask: 7133 case X86::BI__builtin_ia32_pcmpgtw512_mask: 7134 case X86::BI__builtin_ia32_pcmpgtd128_mask: 7135 case X86::BI__builtin_ia32_pcmpgtd256_mask: 7136 case X86::BI__builtin_ia32_pcmpgtd512_mask: 7137 case X86::BI__builtin_ia32_pcmpgtq128_mask: 7138 case X86::BI__builtin_ia32_pcmpgtq256_mask: 7139 case X86::BI__builtin_ia32_pcmpgtq512_mask: 7140 return EmitX86MaskedCompare(*this, 6, true, Ops); 7141 case X86::BI__builtin_ia32_cmpb128_mask: 7142 case X86::BI__builtin_ia32_cmpb256_mask: 7143 case X86::BI__builtin_ia32_cmpb512_mask: 7144 case X86::BI__builtin_ia32_cmpw128_mask: 7145 case X86::BI__builtin_ia32_cmpw256_mask: 7146 case X86::BI__builtin_ia32_cmpw512_mask: 7147 case X86::BI__builtin_ia32_cmpd128_mask: 7148 case X86::BI__builtin_ia32_cmpd256_mask: 7149 case X86::BI__builtin_ia32_cmpd512_mask: 7150 case X86::BI__builtin_ia32_cmpq128_mask: 7151 case X86::BI__builtin_ia32_cmpq256_mask: 7152 case X86::BI__builtin_ia32_cmpq512_mask: { 7153 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 7154 return EmitX86MaskedCompare(*this, CC, true, Ops); 7155 } 7156 case X86::BI__builtin_ia32_ucmpb128_mask: 7157 case X86::BI__builtin_ia32_ucmpb256_mask: 7158 case X86::BI__builtin_ia32_ucmpb512_mask: 7159 case X86::BI__builtin_ia32_ucmpw128_mask: 7160 case X86::BI__builtin_ia32_ucmpw256_mask: 7161 case X86::BI__builtin_ia32_ucmpw512_mask: 7162 case X86::BI__builtin_ia32_ucmpd128_mask: 7163 case X86::BI__builtin_ia32_ucmpd256_mask: 7164 case X86::BI__builtin_ia32_ucmpd512_mask: 7165 case X86::BI__builtin_ia32_ucmpq128_mask: 7166 case X86::BI__builtin_ia32_ucmpq256_mask: 7167 case X86::BI__builtin_ia32_ucmpq512_mask: { 7168 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7; 7169 return EmitX86MaskedCompare(*this, CC, false, Ops); 7170 } 7171 7172 case X86::BI__builtin_ia32_vplzcntd_128_mask: 7173 case X86::BI__builtin_ia32_vplzcntd_256_mask: 7174 case X86::BI__builtin_ia32_vplzcntd_512_mask: 7175 case X86::BI__builtin_ia32_vplzcntq_128_mask: 7176 case X86::BI__builtin_ia32_vplzcntq_256_mask: 7177 case X86::BI__builtin_ia32_vplzcntq_512_mask: { 7178 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType()); 7179 return EmitX86Select(*this, Ops[2], 7180 Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}), 7181 Ops[1]); 7182 } 7183 7184 // TODO: Handle 64/512-bit vector widths of min/max. 7185 case X86::BI__builtin_ia32_pmaxsb128: 7186 case X86::BI__builtin_ia32_pmaxsw128: 7187 case X86::BI__builtin_ia32_pmaxsd128: 7188 case X86::BI__builtin_ia32_pmaxsb256: 7189 case X86::BI__builtin_ia32_pmaxsw256: 7190 case X86::BI__builtin_ia32_pmaxsd256: { 7191 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Ops[1]); 7192 return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); 7193 } 7194 case X86::BI__builtin_ia32_pmaxub128: 7195 case X86::BI__builtin_ia32_pmaxuw128: 7196 case X86::BI__builtin_ia32_pmaxud128: 7197 case X86::BI__builtin_ia32_pmaxub256: 7198 case X86::BI__builtin_ia32_pmaxuw256: 7199 case X86::BI__builtin_ia32_pmaxud256: { 7200 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0], Ops[1]); 7201 return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); 7202 } 7203 case X86::BI__builtin_ia32_pminsb128: 7204 case X86::BI__builtin_ia32_pminsw128: 7205 case X86::BI__builtin_ia32_pminsd128: 7206 case X86::BI__builtin_ia32_pminsb256: 7207 case X86::BI__builtin_ia32_pminsw256: 7208 case X86::BI__builtin_ia32_pminsd256: { 7209 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SLT, Ops[0], Ops[1]); 7210 return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); 7211 } 7212 case X86::BI__builtin_ia32_pminub128: 7213 case X86::BI__builtin_ia32_pminuw128: 7214 case X86::BI__builtin_ia32_pminud128: 7215 case X86::BI__builtin_ia32_pminub256: 7216 case X86::BI__builtin_ia32_pminuw256: 7217 case X86::BI__builtin_ia32_pminud256: { 7218 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, Ops[0], Ops[1]); 7219 return Builder.CreateSelect(Cmp, Ops[0], Ops[1]); 7220 } 7221 7222 // 3DNow! 7223 case X86::BI__builtin_ia32_pswapdsf: 7224 case X86::BI__builtin_ia32_pswapdsi: { 7225 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext()); 7226 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast"); 7227 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd); 7228 return Builder.CreateCall(F, Ops, "pswapd"); 7229 } 7230 case X86::BI__builtin_ia32_rdrand16_step: 7231 case X86::BI__builtin_ia32_rdrand32_step: 7232 case X86::BI__builtin_ia32_rdrand64_step: 7233 case X86::BI__builtin_ia32_rdseed16_step: 7234 case X86::BI__builtin_ia32_rdseed32_step: 7235 case X86::BI__builtin_ia32_rdseed64_step: { 7236 Intrinsic::ID ID; 7237 switch (BuiltinID) { 7238 default: llvm_unreachable("Unsupported intrinsic!"); 7239 case X86::BI__builtin_ia32_rdrand16_step: 7240 ID = Intrinsic::x86_rdrand_16; 7241 break; 7242 case X86::BI__builtin_ia32_rdrand32_step: 7243 ID = Intrinsic::x86_rdrand_32; 7244 break; 7245 case X86::BI__builtin_ia32_rdrand64_step: 7246 ID = Intrinsic::x86_rdrand_64; 7247 break; 7248 case X86::BI__builtin_ia32_rdseed16_step: 7249 ID = Intrinsic::x86_rdseed_16; 7250 break; 7251 case X86::BI__builtin_ia32_rdseed32_step: 7252 ID = Intrinsic::x86_rdseed_32; 7253 break; 7254 case X86::BI__builtin_ia32_rdseed64_step: 7255 ID = Intrinsic::x86_rdseed_64; 7256 break; 7257 } 7258 7259 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID)); 7260 Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0), 7261 Ops[0]); 7262 return Builder.CreateExtractValue(Call, 1); 7263 } 7264 7265 // SSE packed comparison intrinsics 7266 case X86::BI__builtin_ia32_cmpeqps: 7267 case X86::BI__builtin_ia32_cmpeqpd: 7268 return getVectorFCmpIR(CmpInst::FCMP_OEQ); 7269 case X86::BI__builtin_ia32_cmpltps: 7270 case X86::BI__builtin_ia32_cmpltpd: 7271 return getVectorFCmpIR(CmpInst::FCMP_OLT); 7272 case X86::BI__builtin_ia32_cmpleps: 7273 case X86::BI__builtin_ia32_cmplepd: 7274 return getVectorFCmpIR(CmpInst::FCMP_OLE); 7275 case X86::BI__builtin_ia32_cmpunordps: 7276 case X86::BI__builtin_ia32_cmpunordpd: 7277 return getVectorFCmpIR(CmpInst::FCMP_UNO); 7278 case X86::BI__builtin_ia32_cmpneqps: 7279 case X86::BI__builtin_ia32_cmpneqpd: 7280 return getVectorFCmpIR(CmpInst::FCMP_UNE); 7281 case X86::BI__builtin_ia32_cmpnltps: 7282 case X86::BI__builtin_ia32_cmpnltpd: 7283 return getVectorFCmpIR(CmpInst::FCMP_UGE); 7284 case X86::BI__builtin_ia32_cmpnleps: 7285 case X86::BI__builtin_ia32_cmpnlepd: 7286 return getVectorFCmpIR(CmpInst::FCMP_UGT); 7287 case X86::BI__builtin_ia32_cmpordps: 7288 case X86::BI__builtin_ia32_cmpordpd: 7289 return getVectorFCmpIR(CmpInst::FCMP_ORD); 7290 case X86::BI__builtin_ia32_cmpps: 7291 case X86::BI__builtin_ia32_cmpps256: 7292 case X86::BI__builtin_ia32_cmppd: 7293 case X86::BI__builtin_ia32_cmppd256: { 7294 unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 7295 // If this one of the SSE immediates, we can use native IR. 7296 if (CC < 8) { 7297 FCmpInst::Predicate Pred; 7298 switch (CC) { 7299 case 0: Pred = FCmpInst::FCMP_OEQ; break; 7300 case 1: Pred = FCmpInst::FCMP_OLT; break; 7301 case 2: Pred = FCmpInst::FCMP_OLE; break; 7302 case 3: Pred = FCmpInst::FCMP_UNO; break; 7303 case 4: Pred = FCmpInst::FCMP_UNE; break; 7304 case 5: Pred = FCmpInst::FCMP_UGE; break; 7305 case 6: Pred = FCmpInst::FCMP_UGT; break; 7306 case 7: Pred = FCmpInst::FCMP_ORD; break; 7307 } 7308 return getVectorFCmpIR(Pred); 7309 } 7310 7311 // We can't handle 8-31 immediates with native IR, use the intrinsic. 7312 Intrinsic::ID ID; 7313 switch (BuiltinID) { 7314 default: llvm_unreachable("Unsupported intrinsic!"); 7315 case X86::BI__builtin_ia32_cmpps: 7316 ID = Intrinsic::x86_sse_cmp_ps; 7317 break; 7318 case X86::BI__builtin_ia32_cmpps256: 7319 ID = Intrinsic::x86_avx_cmp_ps_256; 7320 break; 7321 case X86::BI__builtin_ia32_cmppd: 7322 ID = Intrinsic::x86_sse2_cmp_pd; 7323 break; 7324 case X86::BI__builtin_ia32_cmppd256: 7325 ID = Intrinsic::x86_avx_cmp_pd_256; 7326 break; 7327 } 7328 7329 return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); 7330 } 7331 7332 // SSE scalar comparison intrinsics 7333 case X86::BI__builtin_ia32_cmpeqss: 7334 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0); 7335 case X86::BI__builtin_ia32_cmpltss: 7336 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1); 7337 case X86::BI__builtin_ia32_cmpless: 7338 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2); 7339 case X86::BI__builtin_ia32_cmpunordss: 7340 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3); 7341 case X86::BI__builtin_ia32_cmpneqss: 7342 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4); 7343 case X86::BI__builtin_ia32_cmpnltss: 7344 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5); 7345 case X86::BI__builtin_ia32_cmpnless: 7346 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6); 7347 case X86::BI__builtin_ia32_cmpordss: 7348 return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7); 7349 case X86::BI__builtin_ia32_cmpeqsd: 7350 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0); 7351 case X86::BI__builtin_ia32_cmpltsd: 7352 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1); 7353 case X86::BI__builtin_ia32_cmplesd: 7354 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2); 7355 case X86::BI__builtin_ia32_cmpunordsd: 7356 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3); 7357 case X86::BI__builtin_ia32_cmpneqsd: 7358 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4); 7359 case X86::BI__builtin_ia32_cmpnltsd: 7360 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5); 7361 case X86::BI__builtin_ia32_cmpnlesd: 7362 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6); 7363 case X86::BI__builtin_ia32_cmpordsd: 7364 return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7); 7365 } 7366 } 7367 7368 7369 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, 7370 const CallExpr *E) { 7371 SmallVector<Value*, 4> Ops; 7372 7373 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) 7374 Ops.push_back(EmitScalarExpr(E->getArg(i))); 7375 7376 Intrinsic::ID ID = Intrinsic::not_intrinsic; 7377 7378 switch (BuiltinID) { 7379 default: return nullptr; 7380 7381 // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we 7382 // call __builtin_readcyclecounter. 7383 case PPC::BI__builtin_ppc_get_timebase: 7384 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter)); 7385 7386 // vec_ld, vec_lvsl, vec_lvsr 7387 case PPC::BI__builtin_altivec_lvx: 7388 case PPC::BI__builtin_altivec_lvxl: 7389 case PPC::BI__builtin_altivec_lvebx: 7390 case PPC::BI__builtin_altivec_lvehx: 7391 case PPC::BI__builtin_altivec_lvewx: 7392 case PPC::BI__builtin_altivec_lvsl: 7393 case PPC::BI__builtin_altivec_lvsr: 7394 case PPC::BI__builtin_vsx_lxvd2x: 7395 case PPC::BI__builtin_vsx_lxvw4x: 7396 { 7397 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 7398 7399 Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]); 7400 Ops.pop_back(); 7401 7402 switch (BuiltinID) { 7403 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!"); 7404 case PPC::BI__builtin_altivec_lvx: 7405 ID = Intrinsic::ppc_altivec_lvx; 7406 break; 7407 case PPC::BI__builtin_altivec_lvxl: 7408 ID = Intrinsic::ppc_altivec_lvxl; 7409 break; 7410 case PPC::BI__builtin_altivec_lvebx: 7411 ID = Intrinsic::ppc_altivec_lvebx; 7412 break; 7413 case PPC::BI__builtin_altivec_lvehx: 7414 ID = Intrinsic::ppc_altivec_lvehx; 7415 break; 7416 case PPC::BI__builtin_altivec_lvewx: 7417 ID = Intrinsic::ppc_altivec_lvewx; 7418 break; 7419 case PPC::BI__builtin_altivec_lvsl: 7420 ID = Intrinsic::ppc_altivec_lvsl; 7421 break; 7422 case PPC::BI__builtin_altivec_lvsr: 7423 ID = Intrinsic::ppc_altivec_lvsr; 7424 break; 7425 case PPC::BI__builtin_vsx_lxvd2x: 7426 ID = Intrinsic::ppc_vsx_lxvd2x; 7427 break; 7428 case PPC::BI__builtin_vsx_lxvw4x: 7429 ID = Intrinsic::ppc_vsx_lxvw4x; 7430 break; 7431 } 7432 llvm::Function *F = CGM.getIntrinsic(ID); 7433 return Builder.CreateCall(F, Ops, ""); 7434 } 7435 7436 // vec_st 7437 case PPC::BI__builtin_altivec_stvx: 7438 case PPC::BI__builtin_altivec_stvxl: 7439 case PPC::BI__builtin_altivec_stvebx: 7440 case PPC::BI__builtin_altivec_stvehx: 7441 case PPC::BI__builtin_altivec_stvewx: 7442 case PPC::BI__builtin_vsx_stxvd2x: 7443 case PPC::BI__builtin_vsx_stxvw4x: 7444 { 7445 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); 7446 Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]); 7447 Ops.pop_back(); 7448 7449 switch (BuiltinID) { 7450 default: llvm_unreachable("Unsupported st intrinsic!"); 7451 case PPC::BI__builtin_altivec_stvx: 7452 ID = Intrinsic::ppc_altivec_stvx; 7453 break; 7454 case PPC::BI__builtin_altivec_stvxl: 7455 ID = Intrinsic::ppc_altivec_stvxl; 7456 break; 7457 case PPC::BI__builtin_altivec_stvebx: 7458 ID = Intrinsic::ppc_altivec_stvebx; 7459 break; 7460 case PPC::BI__builtin_altivec_stvehx: 7461 ID = Intrinsic::ppc_altivec_stvehx; 7462 break; 7463 case PPC::BI__builtin_altivec_stvewx: 7464 ID = Intrinsic::ppc_altivec_stvewx; 7465 break; 7466 case PPC::BI__builtin_vsx_stxvd2x: 7467 ID = Intrinsic::ppc_vsx_stxvd2x; 7468 break; 7469 case PPC::BI__builtin_vsx_stxvw4x: 7470 ID = Intrinsic::ppc_vsx_stxvw4x; 7471 break; 7472 } 7473 llvm::Function *F = CGM.getIntrinsic(ID); 7474 return Builder.CreateCall(F, Ops, ""); 7475 } 7476 // Square root 7477 case PPC::BI__builtin_vsx_xvsqrtsp: 7478 case PPC::BI__builtin_vsx_xvsqrtdp: { 7479 llvm::Type *ResultType = ConvertType(E->getType()); 7480 Value *X = EmitScalarExpr(E->getArg(0)); 7481 ID = Intrinsic::sqrt; 7482 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 7483 return Builder.CreateCall(F, X); 7484 } 7485 // Count leading zeros 7486 case PPC::BI__builtin_altivec_vclzb: 7487 case PPC::BI__builtin_altivec_vclzh: 7488 case PPC::BI__builtin_altivec_vclzw: 7489 case PPC::BI__builtin_altivec_vclzd: { 7490 llvm::Type *ResultType = ConvertType(E->getType()); 7491 Value *X = EmitScalarExpr(E->getArg(0)); 7492 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 7493 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); 7494 return Builder.CreateCall(F, {X, Undef}); 7495 } 7496 // Copy sign 7497 case PPC::BI__builtin_vsx_xvcpsgnsp: 7498 case PPC::BI__builtin_vsx_xvcpsgndp: { 7499 llvm::Type *ResultType = ConvertType(E->getType()); 7500 Value *X = EmitScalarExpr(E->getArg(0)); 7501 Value *Y = EmitScalarExpr(E->getArg(1)); 7502 ID = Intrinsic::copysign; 7503 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 7504 return Builder.CreateCall(F, {X, Y}); 7505 } 7506 // Rounding/truncation 7507 case PPC::BI__builtin_vsx_xvrspip: 7508 case PPC::BI__builtin_vsx_xvrdpip: 7509 case PPC::BI__builtin_vsx_xvrdpim: 7510 case PPC::BI__builtin_vsx_xvrspim: 7511 case PPC::BI__builtin_vsx_xvrdpi: 7512 case PPC::BI__builtin_vsx_xvrspi: 7513 case PPC::BI__builtin_vsx_xvrdpic: 7514 case PPC::BI__builtin_vsx_xvrspic: 7515 case PPC::BI__builtin_vsx_xvrdpiz: 7516 case PPC::BI__builtin_vsx_xvrspiz: { 7517 llvm::Type *ResultType = ConvertType(E->getType()); 7518 Value *X = EmitScalarExpr(E->getArg(0)); 7519 if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim || 7520 BuiltinID == PPC::BI__builtin_vsx_xvrspim) 7521 ID = Intrinsic::floor; 7522 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi || 7523 BuiltinID == PPC::BI__builtin_vsx_xvrspi) 7524 ID = Intrinsic::round; 7525 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic || 7526 BuiltinID == PPC::BI__builtin_vsx_xvrspic) 7527 ID = Intrinsic::nearbyint; 7528 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip || 7529 BuiltinID == PPC::BI__builtin_vsx_xvrspip) 7530 ID = Intrinsic::ceil; 7531 else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz || 7532 BuiltinID == PPC::BI__builtin_vsx_xvrspiz) 7533 ID = Intrinsic::trunc; 7534 llvm::Function *F = CGM.getIntrinsic(ID, ResultType); 7535 return Builder.CreateCall(F, X); 7536 } 7537 7538 // Absolute value 7539 case PPC::BI__builtin_vsx_xvabsdp: 7540 case PPC::BI__builtin_vsx_xvabssp: { 7541 llvm::Type *ResultType = ConvertType(E->getType()); 7542 Value *X = EmitScalarExpr(E->getArg(0)); 7543 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 7544 return Builder.CreateCall(F, X); 7545 } 7546 7547 // FMA variations 7548 case PPC::BI__builtin_vsx_xvmaddadp: 7549 case PPC::BI__builtin_vsx_xvmaddasp: 7550 case PPC::BI__builtin_vsx_xvnmaddadp: 7551 case PPC::BI__builtin_vsx_xvnmaddasp: 7552 case PPC::BI__builtin_vsx_xvmsubadp: 7553 case PPC::BI__builtin_vsx_xvmsubasp: 7554 case PPC::BI__builtin_vsx_xvnmsubadp: 7555 case PPC::BI__builtin_vsx_xvnmsubasp: { 7556 llvm::Type *ResultType = ConvertType(E->getType()); 7557 Value *X = EmitScalarExpr(E->getArg(0)); 7558 Value *Y = EmitScalarExpr(E->getArg(1)); 7559 Value *Z = EmitScalarExpr(E->getArg(2)); 7560 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 7561 llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 7562 switch (BuiltinID) { 7563 case PPC::BI__builtin_vsx_xvmaddadp: 7564 case PPC::BI__builtin_vsx_xvmaddasp: 7565 return Builder.CreateCall(F, {X, Y, Z}); 7566 case PPC::BI__builtin_vsx_xvnmaddadp: 7567 case PPC::BI__builtin_vsx_xvnmaddasp: 7568 return Builder.CreateFSub(Zero, 7569 Builder.CreateCall(F, {X, Y, Z}), "sub"); 7570 case PPC::BI__builtin_vsx_xvmsubadp: 7571 case PPC::BI__builtin_vsx_xvmsubasp: 7572 return Builder.CreateCall(F, 7573 {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 7574 case PPC::BI__builtin_vsx_xvnmsubadp: 7575 case PPC::BI__builtin_vsx_xvnmsubasp: 7576 Value *FsubRes = 7577 Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 7578 return Builder.CreateFSub(Zero, FsubRes, "sub"); 7579 } 7580 llvm_unreachable("Unknown FMA operation"); 7581 return nullptr; // Suppress no-return warning 7582 } 7583 } 7584 } 7585 7586 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, 7587 const CallExpr *E) { 7588 switch (BuiltinID) { 7589 case AMDGPU::BI__builtin_amdgcn_div_scale: 7590 case AMDGPU::BI__builtin_amdgcn_div_scalef: { 7591 // Translate from the intrinsics's struct return to the builtin's out 7592 // argument. 7593 7594 Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3)); 7595 7596 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 7597 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 7598 llvm::Value *Z = EmitScalarExpr(E->getArg(2)); 7599 7600 llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale, 7601 X->getType()); 7602 7603 llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z}); 7604 7605 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0); 7606 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1); 7607 7608 llvm::Type *RealFlagType 7609 = FlagOutPtr.getPointer()->getType()->getPointerElementType(); 7610 7611 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType); 7612 Builder.CreateStore(FlagExt, FlagOutPtr); 7613 return Result; 7614 } 7615 case AMDGPU::BI__builtin_amdgcn_div_fmas: 7616 case AMDGPU::BI__builtin_amdgcn_div_fmasf: { 7617 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); 7618 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); 7619 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); 7620 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); 7621 7622 llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas, 7623 Src0->getType()); 7624 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); 7625 return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool}); 7626 } 7627 case AMDGPU::BI__builtin_amdgcn_div_fixup: 7628 case AMDGPU::BI__builtin_amdgcn_div_fixupf: 7629 return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup); 7630 case AMDGPU::BI__builtin_amdgcn_trig_preop: 7631 case AMDGPU::BI__builtin_amdgcn_trig_preopf: 7632 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop); 7633 case AMDGPU::BI__builtin_amdgcn_rcp: 7634 case AMDGPU::BI__builtin_amdgcn_rcpf: 7635 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp); 7636 case AMDGPU::BI__builtin_amdgcn_rsq: 7637 case AMDGPU::BI__builtin_amdgcn_rsqf: 7638 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq); 7639 case AMDGPU::BI__builtin_amdgcn_rsq_clamp: 7640 case AMDGPU::BI__builtin_amdgcn_rsq_clampf: 7641 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp); 7642 case AMDGPU::BI__builtin_amdgcn_sinf: 7643 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin); 7644 case AMDGPU::BI__builtin_amdgcn_cosf: 7645 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos); 7646 case AMDGPU::BI__builtin_amdgcn_log_clampf: 7647 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp); 7648 case AMDGPU::BI__builtin_amdgcn_ldexp: 7649 case AMDGPU::BI__builtin_amdgcn_ldexpf: 7650 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp); 7651 case AMDGPU::BI__builtin_amdgcn_frexp_mant: 7652 case AMDGPU::BI__builtin_amdgcn_frexp_mantf: { 7653 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant); 7654 } 7655 case AMDGPU::BI__builtin_amdgcn_frexp_exp: 7656 case AMDGPU::BI__builtin_amdgcn_frexp_expf: { 7657 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_exp); 7658 } 7659 case AMDGPU::BI__builtin_amdgcn_fract: 7660 case AMDGPU::BI__builtin_amdgcn_fractf: 7661 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract); 7662 case AMDGPU::BI__builtin_amdgcn_class: 7663 case AMDGPU::BI__builtin_amdgcn_classf: 7664 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class); 7665 7666 case AMDGPU::BI__builtin_amdgcn_read_exec: { 7667 CallInst *CI = cast<CallInst>( 7668 EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec")); 7669 CI->setConvergent(); 7670 return CI; 7671 } 7672 // Legacy amdgpu prefix 7673 case AMDGPU::BI__builtin_amdgpu_rsq: 7674 case AMDGPU::BI__builtin_amdgpu_rsqf: { 7675 if (getTarget().getTriple().getArch() == Triple::amdgcn) 7676 return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq); 7677 return emitUnaryBuiltin(*this, E, Intrinsic::r600_rsq); 7678 } 7679 case AMDGPU::BI__builtin_amdgpu_ldexp: 7680 case AMDGPU::BI__builtin_amdgpu_ldexpf: { 7681 if (getTarget().getTriple().getArch() == Triple::amdgcn) 7682 return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp); 7683 return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp); 7684 } 7685 7686 // amdgcn workitem 7687 case AMDGPU::BI__builtin_amdgcn_workitem_id_x: 7688 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024); 7689 case AMDGPU::BI__builtin_amdgcn_workitem_id_y: 7690 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024); 7691 case AMDGPU::BI__builtin_amdgcn_workitem_id_z: 7692 return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024); 7693 7694 // r600 workitem 7695 case AMDGPU::BI__builtin_r600_read_tidig_x: 7696 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024); 7697 case AMDGPU::BI__builtin_r600_read_tidig_y: 7698 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024); 7699 case AMDGPU::BI__builtin_r600_read_tidig_z: 7700 return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024); 7701 default: 7702 return nullptr; 7703 } 7704 } 7705 7706 /// Handle a SystemZ function in which the final argument is a pointer 7707 /// to an int that receives the post-instruction CC value. At the LLVM level 7708 /// this is represented as a function that returns a {result, cc} pair. 7709 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, 7710 unsigned IntrinsicID, 7711 const CallExpr *E) { 7712 unsigned NumArgs = E->getNumArgs() - 1; 7713 SmallVector<Value *, 8> Args(NumArgs); 7714 for (unsigned I = 0; I < NumArgs; ++I) 7715 Args[I] = CGF.EmitScalarExpr(E->getArg(I)); 7716 Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs)); 7717 Value *F = CGF.CGM.getIntrinsic(IntrinsicID); 7718 Value *Call = CGF.Builder.CreateCall(F, Args); 7719 Value *CC = CGF.Builder.CreateExtractValue(Call, 1); 7720 CGF.Builder.CreateStore(CC, CCPtr); 7721 return CGF.Builder.CreateExtractValue(Call, 0); 7722 } 7723 7724 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, 7725 const CallExpr *E) { 7726 switch (BuiltinID) { 7727 case SystemZ::BI__builtin_tbegin: { 7728 Value *TDB = EmitScalarExpr(E->getArg(0)); 7729 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 7730 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin); 7731 return Builder.CreateCall(F, {TDB, Control}); 7732 } 7733 case SystemZ::BI__builtin_tbegin_nofloat: { 7734 Value *TDB = EmitScalarExpr(E->getArg(0)); 7735 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 7736 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat); 7737 return Builder.CreateCall(F, {TDB, Control}); 7738 } 7739 case SystemZ::BI__builtin_tbeginc: { 7740 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy); 7741 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08); 7742 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc); 7743 return Builder.CreateCall(F, {TDB, Control}); 7744 } 7745 case SystemZ::BI__builtin_tabort: { 7746 Value *Data = EmitScalarExpr(E->getArg(0)); 7747 Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort); 7748 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort")); 7749 } 7750 case SystemZ::BI__builtin_non_tx_store: { 7751 Value *Address = EmitScalarExpr(E->getArg(0)); 7752 Value *Data = EmitScalarExpr(E->getArg(1)); 7753 Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg); 7754 return Builder.CreateCall(F, {Data, Address}); 7755 } 7756 7757 // Vector builtins. Note that most vector builtins are mapped automatically 7758 // to target-specific LLVM intrinsics. The ones handled specially here can 7759 // be represented via standard LLVM IR, which is preferable to enable common 7760 // LLVM optimizations. 7761 7762 case SystemZ::BI__builtin_s390_vpopctb: 7763 case SystemZ::BI__builtin_s390_vpopcth: 7764 case SystemZ::BI__builtin_s390_vpopctf: 7765 case SystemZ::BI__builtin_s390_vpopctg: { 7766 llvm::Type *ResultType = ConvertType(E->getType()); 7767 Value *X = EmitScalarExpr(E->getArg(0)); 7768 Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); 7769 return Builder.CreateCall(F, X); 7770 } 7771 7772 case SystemZ::BI__builtin_s390_vclzb: 7773 case SystemZ::BI__builtin_s390_vclzh: 7774 case SystemZ::BI__builtin_s390_vclzf: 7775 case SystemZ::BI__builtin_s390_vclzg: { 7776 llvm::Type *ResultType = ConvertType(E->getType()); 7777 Value *X = EmitScalarExpr(E->getArg(0)); 7778 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 7779 Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType); 7780 return Builder.CreateCall(F, {X, Undef}); 7781 } 7782 7783 case SystemZ::BI__builtin_s390_vctzb: 7784 case SystemZ::BI__builtin_s390_vctzh: 7785 case SystemZ::BI__builtin_s390_vctzf: 7786 case SystemZ::BI__builtin_s390_vctzg: { 7787 llvm::Type *ResultType = ConvertType(E->getType()); 7788 Value *X = EmitScalarExpr(E->getArg(0)); 7789 Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false); 7790 Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType); 7791 return Builder.CreateCall(F, {X, Undef}); 7792 } 7793 7794 case SystemZ::BI__builtin_s390_vfsqdb: { 7795 llvm::Type *ResultType = ConvertType(E->getType()); 7796 Value *X = EmitScalarExpr(E->getArg(0)); 7797 Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType); 7798 return Builder.CreateCall(F, X); 7799 } 7800 case SystemZ::BI__builtin_s390_vfmadb: { 7801 llvm::Type *ResultType = ConvertType(E->getType()); 7802 Value *X = EmitScalarExpr(E->getArg(0)); 7803 Value *Y = EmitScalarExpr(E->getArg(1)); 7804 Value *Z = EmitScalarExpr(E->getArg(2)); 7805 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 7806 return Builder.CreateCall(F, {X, Y, Z}); 7807 } 7808 case SystemZ::BI__builtin_s390_vfmsdb: { 7809 llvm::Type *ResultType = ConvertType(E->getType()); 7810 Value *X = EmitScalarExpr(E->getArg(0)); 7811 Value *Y = EmitScalarExpr(E->getArg(1)); 7812 Value *Z = EmitScalarExpr(E->getArg(2)); 7813 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 7814 Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType); 7815 return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")}); 7816 } 7817 case SystemZ::BI__builtin_s390_vflpdb: { 7818 llvm::Type *ResultType = ConvertType(E->getType()); 7819 Value *X = EmitScalarExpr(E->getArg(0)); 7820 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 7821 return Builder.CreateCall(F, X); 7822 } 7823 case SystemZ::BI__builtin_s390_vflndb: { 7824 llvm::Type *ResultType = ConvertType(E->getType()); 7825 Value *X = EmitScalarExpr(E->getArg(0)); 7826 Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType); 7827 Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType); 7828 return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub"); 7829 } 7830 case SystemZ::BI__builtin_s390_vfidb: { 7831 llvm::Type *ResultType = ConvertType(E->getType()); 7832 Value *X = EmitScalarExpr(E->getArg(0)); 7833 // Constant-fold the M4 and M5 mask arguments. 7834 llvm::APSInt M4, M5; 7835 bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext()); 7836 bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext()); 7837 assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?"); 7838 (void)IsConstM4; (void)IsConstM5; 7839 // Check whether this instance of vfidb can be represented via a LLVM 7840 // standard intrinsic. We only support some combinations of M4 and M5. 7841 Intrinsic::ID ID = Intrinsic::not_intrinsic; 7842 switch (M4.getZExtValue()) { 7843 default: break; 7844 case 0: // IEEE-inexact exception allowed 7845 switch (M5.getZExtValue()) { 7846 default: break; 7847 case 0: ID = Intrinsic::rint; break; 7848 } 7849 break; 7850 case 4: // IEEE-inexact exception suppressed 7851 switch (M5.getZExtValue()) { 7852 default: break; 7853 case 0: ID = Intrinsic::nearbyint; break; 7854 case 1: ID = Intrinsic::round; break; 7855 case 5: ID = Intrinsic::trunc; break; 7856 case 6: ID = Intrinsic::ceil; break; 7857 case 7: ID = Intrinsic::floor; break; 7858 } 7859 break; 7860 } 7861 if (ID != Intrinsic::not_intrinsic) { 7862 Function *F = CGM.getIntrinsic(ID, ResultType); 7863 return Builder.CreateCall(F, X); 7864 } 7865 Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb); 7866 Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4); 7867 Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5); 7868 return Builder.CreateCall(F, {X, M4Value, M5Value}); 7869 } 7870 7871 // Vector intrisincs that output the post-instruction CC value. 7872 7873 #define INTRINSIC_WITH_CC(NAME) \ 7874 case SystemZ::BI__builtin_##NAME: \ 7875 return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E) 7876 7877 INTRINSIC_WITH_CC(s390_vpkshs); 7878 INTRINSIC_WITH_CC(s390_vpksfs); 7879 INTRINSIC_WITH_CC(s390_vpksgs); 7880 7881 INTRINSIC_WITH_CC(s390_vpklshs); 7882 INTRINSIC_WITH_CC(s390_vpklsfs); 7883 INTRINSIC_WITH_CC(s390_vpklsgs); 7884 7885 INTRINSIC_WITH_CC(s390_vceqbs); 7886 INTRINSIC_WITH_CC(s390_vceqhs); 7887 INTRINSIC_WITH_CC(s390_vceqfs); 7888 INTRINSIC_WITH_CC(s390_vceqgs); 7889 7890 INTRINSIC_WITH_CC(s390_vchbs); 7891 INTRINSIC_WITH_CC(s390_vchhs); 7892 INTRINSIC_WITH_CC(s390_vchfs); 7893 INTRINSIC_WITH_CC(s390_vchgs); 7894 7895 INTRINSIC_WITH_CC(s390_vchlbs); 7896 INTRINSIC_WITH_CC(s390_vchlhs); 7897 INTRINSIC_WITH_CC(s390_vchlfs); 7898 INTRINSIC_WITH_CC(s390_vchlgs); 7899 7900 INTRINSIC_WITH_CC(s390_vfaebs); 7901 INTRINSIC_WITH_CC(s390_vfaehs); 7902 INTRINSIC_WITH_CC(s390_vfaefs); 7903 7904 INTRINSIC_WITH_CC(s390_vfaezbs); 7905 INTRINSIC_WITH_CC(s390_vfaezhs); 7906 INTRINSIC_WITH_CC(s390_vfaezfs); 7907 7908 INTRINSIC_WITH_CC(s390_vfeebs); 7909 INTRINSIC_WITH_CC(s390_vfeehs); 7910 INTRINSIC_WITH_CC(s390_vfeefs); 7911 7912 INTRINSIC_WITH_CC(s390_vfeezbs); 7913 INTRINSIC_WITH_CC(s390_vfeezhs); 7914 INTRINSIC_WITH_CC(s390_vfeezfs); 7915 7916 INTRINSIC_WITH_CC(s390_vfenebs); 7917 INTRINSIC_WITH_CC(s390_vfenehs); 7918 INTRINSIC_WITH_CC(s390_vfenefs); 7919 7920 INTRINSIC_WITH_CC(s390_vfenezbs); 7921 INTRINSIC_WITH_CC(s390_vfenezhs); 7922 INTRINSIC_WITH_CC(s390_vfenezfs); 7923 7924 INTRINSIC_WITH_CC(s390_vistrbs); 7925 INTRINSIC_WITH_CC(s390_vistrhs); 7926 INTRINSIC_WITH_CC(s390_vistrfs); 7927 7928 INTRINSIC_WITH_CC(s390_vstrcbs); 7929 INTRINSIC_WITH_CC(s390_vstrchs); 7930 INTRINSIC_WITH_CC(s390_vstrcfs); 7931 7932 INTRINSIC_WITH_CC(s390_vstrczbs); 7933 INTRINSIC_WITH_CC(s390_vstrczhs); 7934 INTRINSIC_WITH_CC(s390_vstrczfs); 7935 7936 INTRINSIC_WITH_CC(s390_vfcedbs); 7937 INTRINSIC_WITH_CC(s390_vfchdbs); 7938 INTRINSIC_WITH_CC(s390_vfchedbs); 7939 7940 INTRINSIC_WITH_CC(s390_vftcidb); 7941 7942 #undef INTRINSIC_WITH_CC 7943 7944 default: 7945 return nullptr; 7946 } 7947 } 7948 7949 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, 7950 const CallExpr *E) { 7951 auto MakeLdg = [&](unsigned IntrinsicID) { 7952 Value *Ptr = EmitScalarExpr(E->getArg(0)); 7953 AlignmentSource AlignSource; 7954 clang::CharUnits Align = 7955 getNaturalPointeeTypeAlignment(E->getArg(0)->getType(), &AlignSource); 7956 return Builder.CreateCall( 7957 CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(), 7958 Ptr->getType()}), 7959 {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())}); 7960 }; 7961 7962 switch (BuiltinID) { 7963 case NVPTX::BI__nvvm_atom_add_gen_i: 7964 case NVPTX::BI__nvvm_atom_add_gen_l: 7965 case NVPTX::BI__nvvm_atom_add_gen_ll: 7966 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E); 7967 7968 case NVPTX::BI__nvvm_atom_sub_gen_i: 7969 case NVPTX::BI__nvvm_atom_sub_gen_l: 7970 case NVPTX::BI__nvvm_atom_sub_gen_ll: 7971 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E); 7972 7973 case NVPTX::BI__nvvm_atom_and_gen_i: 7974 case NVPTX::BI__nvvm_atom_and_gen_l: 7975 case NVPTX::BI__nvvm_atom_and_gen_ll: 7976 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E); 7977 7978 case NVPTX::BI__nvvm_atom_or_gen_i: 7979 case NVPTX::BI__nvvm_atom_or_gen_l: 7980 case NVPTX::BI__nvvm_atom_or_gen_ll: 7981 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E); 7982 7983 case NVPTX::BI__nvvm_atom_xor_gen_i: 7984 case NVPTX::BI__nvvm_atom_xor_gen_l: 7985 case NVPTX::BI__nvvm_atom_xor_gen_ll: 7986 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E); 7987 7988 case NVPTX::BI__nvvm_atom_xchg_gen_i: 7989 case NVPTX::BI__nvvm_atom_xchg_gen_l: 7990 case NVPTX::BI__nvvm_atom_xchg_gen_ll: 7991 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E); 7992 7993 case NVPTX::BI__nvvm_atom_max_gen_i: 7994 case NVPTX::BI__nvvm_atom_max_gen_l: 7995 case NVPTX::BI__nvvm_atom_max_gen_ll: 7996 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E); 7997 7998 case NVPTX::BI__nvvm_atom_max_gen_ui: 7999 case NVPTX::BI__nvvm_atom_max_gen_ul: 8000 case NVPTX::BI__nvvm_atom_max_gen_ull: 8001 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E); 8002 8003 case NVPTX::BI__nvvm_atom_min_gen_i: 8004 case NVPTX::BI__nvvm_atom_min_gen_l: 8005 case NVPTX::BI__nvvm_atom_min_gen_ll: 8006 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E); 8007 8008 case NVPTX::BI__nvvm_atom_min_gen_ui: 8009 case NVPTX::BI__nvvm_atom_min_gen_ul: 8010 case NVPTX::BI__nvvm_atom_min_gen_ull: 8011 return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E); 8012 8013 case NVPTX::BI__nvvm_atom_cas_gen_i: 8014 case NVPTX::BI__nvvm_atom_cas_gen_l: 8015 case NVPTX::BI__nvvm_atom_cas_gen_ll: 8016 // __nvvm_atom_cas_gen_* should return the old value rather than the 8017 // success flag. 8018 return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false); 8019 8020 case NVPTX::BI__nvvm_atom_add_gen_f: { 8021 Value *Ptr = EmitScalarExpr(E->getArg(0)); 8022 Value *Val = EmitScalarExpr(E->getArg(1)); 8023 // atomicrmw only deals with integer arguments so we need to use 8024 // LLVM's nvvm_atomic_load_add_f32 intrinsic for that. 8025 Value *FnALAF32 = 8026 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType()); 8027 return Builder.CreateCall(FnALAF32, {Ptr, Val}); 8028 } 8029 8030 case NVPTX::BI__nvvm_atom_inc_gen_ui: { 8031 Value *Ptr = EmitScalarExpr(E->getArg(0)); 8032 Value *Val = EmitScalarExpr(E->getArg(1)); 8033 Value *FnALI32 = 8034 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType()); 8035 return Builder.CreateCall(FnALI32, {Ptr, Val}); 8036 } 8037 8038 case NVPTX::BI__nvvm_atom_dec_gen_ui: { 8039 Value *Ptr = EmitScalarExpr(E->getArg(0)); 8040 Value *Val = EmitScalarExpr(E->getArg(1)); 8041 Value *FnALD32 = 8042 CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType()); 8043 return Builder.CreateCall(FnALD32, {Ptr, Val}); 8044 } 8045 8046 case NVPTX::BI__nvvm_ldg_c: 8047 case NVPTX::BI__nvvm_ldg_c2: 8048 case NVPTX::BI__nvvm_ldg_c4: 8049 case NVPTX::BI__nvvm_ldg_s: 8050 case NVPTX::BI__nvvm_ldg_s2: 8051 case NVPTX::BI__nvvm_ldg_s4: 8052 case NVPTX::BI__nvvm_ldg_i: 8053 case NVPTX::BI__nvvm_ldg_i2: 8054 case NVPTX::BI__nvvm_ldg_i4: 8055 case NVPTX::BI__nvvm_ldg_l: 8056 case NVPTX::BI__nvvm_ldg_ll: 8057 case NVPTX::BI__nvvm_ldg_ll2: 8058 case NVPTX::BI__nvvm_ldg_uc: 8059 case NVPTX::BI__nvvm_ldg_uc2: 8060 case NVPTX::BI__nvvm_ldg_uc4: 8061 case NVPTX::BI__nvvm_ldg_us: 8062 case NVPTX::BI__nvvm_ldg_us2: 8063 case NVPTX::BI__nvvm_ldg_us4: 8064 case NVPTX::BI__nvvm_ldg_ui: 8065 case NVPTX::BI__nvvm_ldg_ui2: 8066 case NVPTX::BI__nvvm_ldg_ui4: 8067 case NVPTX::BI__nvvm_ldg_ul: 8068 case NVPTX::BI__nvvm_ldg_ull: 8069 case NVPTX::BI__nvvm_ldg_ull2: 8070 // PTX Interoperability section 2.2: "For a vector with an even number of 8071 // elements, its alignment is set to number of elements times the alignment 8072 // of its member: n*alignof(t)." 8073 return MakeLdg(Intrinsic::nvvm_ldg_global_i); 8074 case NVPTX::BI__nvvm_ldg_f: 8075 case NVPTX::BI__nvvm_ldg_f2: 8076 case NVPTX::BI__nvvm_ldg_f4: 8077 case NVPTX::BI__nvvm_ldg_d: 8078 case NVPTX::BI__nvvm_ldg_d2: 8079 return MakeLdg(Intrinsic::nvvm_ldg_global_f); 8080 default: 8081 return nullptr; 8082 } 8083 } 8084 8085 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, 8086 const CallExpr *E) { 8087 switch (BuiltinID) { 8088 case WebAssembly::BI__builtin_wasm_current_memory: { 8089 llvm::Type *ResultType = ConvertType(E->getType()); 8090 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType); 8091 return Builder.CreateCall(Callee); 8092 } 8093 case WebAssembly::BI__builtin_wasm_grow_memory: { 8094 Value *X = EmitScalarExpr(E->getArg(0)); 8095 Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType()); 8096 return Builder.CreateCall(Callee, X); 8097 } 8098 8099 default: 8100 return nullptr; 8101 } 8102 } 8103