1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This contains code to emit Builtin calls as LLVM code. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "CodeGenFunction.h" 15 #include "CGCXXABI.h" 16 #include "CGObjCRuntime.h" 17 #include "CodeGenModule.h" 18 #include "TargetInfo.h" 19 #include "clang/AST/ASTContext.h" 20 #include "clang/AST/Decl.h" 21 #include "clang/Basic/TargetBuiltins.h" 22 #include "clang/Basic/TargetInfo.h" 23 #include "clang/CodeGen/CGFunctionInfo.h" 24 #include "llvm/ADT/StringExtras.h" 25 #include "llvm/IR/CallSite.h" 26 #include "llvm/IR/DataLayout.h" 27 #include "llvm/IR/InlineAsm.h" 28 #include "llvm/IR/Intrinsics.h" 29 #include <sstream> 30 31 using namespace clang; 32 using namespace CodeGen; 33 using namespace llvm; 34 35 /// getBuiltinLibFunction - Given a builtin id for a function like 36 /// "__builtin_fabsf", return a Function* for "fabsf". 37 llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD, 38 unsigned BuiltinID) { 39 assert(Context.BuiltinInfo.isLibFunction(BuiltinID)); 40 41 // Get the name, skip over the __builtin_ prefix (if necessary). 42 StringRef Name; 43 GlobalDecl D(FD); 44 45 // If the builtin has been declared explicitly with an assembler label, 46 // use the mangled name. This differs from the plain label on platforms 47 // that prefix labels. 48 if (FD->hasAttr<AsmLabelAttr>()) 49 Name = getMangledName(D); 50 else 51 Name = Context.BuiltinInfo.GetName(BuiltinID) + 10; 52 53 llvm::FunctionType *Ty = 54 cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType())); 55 56 return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false); 57 } 58 59 /// Emit the conversions required to turn the given value into an 60 /// integer of the given size. 61 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V, 62 QualType T, llvm::IntegerType *IntType) { 63 V = CGF.EmitToMemory(V, T); 64 65 if (V->getType()->isPointerTy()) 66 return CGF.Builder.CreatePtrToInt(V, IntType); 67 68 assert(V->getType() == IntType); 69 return V; 70 } 71 72 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, 73 QualType T, llvm::Type *ResultType) { 74 V = CGF.EmitFromMemory(V, T); 75 76 if (ResultType->isPointerTy()) 77 return CGF.Builder.CreateIntToPtr(V, ResultType); 78 79 assert(V->getType() == ResultType); 80 return V; 81 } 82 83 /// Utility to insert an atomic instruction based on Instrinsic::ID 84 /// and the expression node. 85 static RValue EmitBinaryAtomic(CodeGenFunction &CGF, 86 llvm::AtomicRMWInst::BinOp Kind, 87 const CallExpr *E) { 88 QualType T = E->getType(); 89 assert(E->getArg(0)->getType()->isPointerType()); 90 assert(CGF.getContext().hasSameUnqualifiedType(T, 91 E->getArg(0)->getType()->getPointeeType())); 92 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 93 94 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 95 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 96 97 llvm::IntegerType *IntType = 98 llvm::IntegerType::get(CGF.getLLVMContext(), 99 CGF.getContext().getTypeSize(T)); 100 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 101 102 llvm::Value *Args[2]; 103 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 104 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 105 llvm::Type *ValueType = Args[1]->getType(); 106 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 107 108 llvm::Value *Result = 109 CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1], 110 llvm::SequentiallyConsistent); 111 Result = EmitFromInt(CGF, Result, T, ValueType); 112 return RValue::get(Result); 113 } 114 115 /// Utility to insert an atomic instruction based Instrinsic::ID and 116 /// the expression node, where the return value is the result of the 117 /// operation. 118 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, 119 llvm::AtomicRMWInst::BinOp Kind, 120 const CallExpr *E, 121 Instruction::BinaryOps Op, 122 bool Invert = false) { 123 QualType T = E->getType(); 124 assert(E->getArg(0)->getType()->isPointerType()); 125 assert(CGF.getContext().hasSameUnqualifiedType(T, 126 E->getArg(0)->getType()->getPointeeType())); 127 assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType())); 128 129 llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0)); 130 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 131 132 llvm::IntegerType *IntType = 133 llvm::IntegerType::get(CGF.getLLVMContext(), 134 CGF.getContext().getTypeSize(T)); 135 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 136 137 llvm::Value *Args[2]; 138 Args[1] = CGF.EmitScalarExpr(E->getArg(1)); 139 llvm::Type *ValueType = Args[1]->getType(); 140 Args[1] = EmitToInt(CGF, Args[1], T, IntType); 141 Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType); 142 143 llvm::Value *Result = 144 CGF.Builder.CreateAtomicRMW(Kind, Args[0], Args[1], 145 llvm::SequentiallyConsistent); 146 Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); 147 if (Invert) 148 Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result, 149 llvm::ConstantInt::get(IntType, -1)); 150 Result = EmitFromInt(CGF, Result, T, ValueType); 151 return RValue::get(Result); 152 } 153 154 /// EmitFAbs - Emit a call to @llvm.fabs(). 155 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { 156 Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); 157 llvm::CallInst *Call = CGF.Builder.CreateCall(F, V); 158 Call->setDoesNotAccessMemory(); 159 return Call; 160 } 161 162 /// Emit the computation of the sign bit for a floating point value. Returns 163 /// the i1 sign bit value. 164 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) { 165 LLVMContext &C = CGF.CGM.getLLVMContext(); 166 167 llvm::Type *Ty = V->getType(); 168 int Width = Ty->getPrimitiveSizeInBits(); 169 llvm::Type *IntTy = llvm::IntegerType::get(C, Width); 170 V = CGF.Builder.CreateBitCast(V, IntTy); 171 if (Ty->isPPC_FP128Ty()) { 172 // The higher-order double comes first, and so we need to truncate the 173 // pair to extract the overall sign. The order of the pair is the same 174 // in both little- and big-Endian modes. 175 Width >>= 1; 176 IntTy = llvm::IntegerType::get(C, Width); 177 V = CGF.Builder.CreateTrunc(V, IntTy); 178 } 179 Value *Zero = llvm::Constant::getNullValue(IntTy); 180 return CGF.Builder.CreateICmpSLT(V, Zero); 181 } 182 183 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn, 184 const CallExpr *E, llvm::Value *calleeValue) { 185 return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E, 186 ReturnValueSlot(), Fn); 187 } 188 189 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.* 190 /// depending on IntrinsicID. 191 /// 192 /// \arg CGF The current codegen function. 193 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate. 194 /// \arg X The first argument to the llvm.*.with.overflow.*. 195 /// \arg Y The second argument to the llvm.*.with.overflow.*. 196 /// \arg Carry The carry returned by the llvm.*.with.overflow.*. 197 /// \returns The result (i.e. sum/product) returned by the intrinsic. 198 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF, 199 const llvm::Intrinsic::ID IntrinsicID, 200 llvm::Value *X, llvm::Value *Y, 201 llvm::Value *&Carry) { 202 // Make sure we have integers of the same width. 203 assert(X->getType() == Y->getType() && 204 "Arguments must be the same type. (Did you forget to make sure both " 205 "arguments have the same integer width?)"); 206 207 llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType()); 208 llvm::Value *Tmp = CGF.Builder.CreateCall2(Callee, X, Y); 209 Carry = CGF.Builder.CreateExtractValue(Tmp, 1); 210 return CGF.Builder.CreateExtractValue(Tmp, 0); 211 } 212 213 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, 214 unsigned BuiltinID, const CallExpr *E, 215 ReturnValueSlot ReturnValue) { 216 // See if we can constant fold this builtin. If so, don't emit it at all. 217 Expr::EvalResult Result; 218 if (E->EvaluateAsRValue(Result, CGM.getContext()) && 219 !Result.hasSideEffects()) { 220 if (Result.Val.isInt()) 221 return RValue::get(llvm::ConstantInt::get(getLLVMContext(), 222 Result.Val.getInt())); 223 if (Result.Val.isFloat()) 224 return RValue::get(llvm::ConstantFP::get(getLLVMContext(), 225 Result.Val.getFloat())); 226 } 227 228 switch (BuiltinID) { 229 default: break; // Handle intrinsics and libm functions below. 230 case Builtin::BI__builtin___CFStringMakeConstantString: 231 case Builtin::BI__builtin___NSStringMakeConstantString: 232 return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr)); 233 case Builtin::BI__builtin_stdarg_start: 234 case Builtin::BI__builtin_va_start: 235 case Builtin::BI__va_start: 236 case Builtin::BI__builtin_va_end: { 237 Value *ArgValue = (BuiltinID == Builtin::BI__va_start) 238 ? EmitScalarExpr(E->getArg(0)) 239 : EmitVAListRef(E->getArg(0)); 240 llvm::Type *DestType = Int8PtrTy; 241 if (ArgValue->getType() != DestType) 242 ArgValue = Builder.CreateBitCast(ArgValue, DestType, 243 ArgValue->getName().data()); 244 245 Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ? 246 Intrinsic::vaend : Intrinsic::vastart; 247 return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue)); 248 } 249 case Builtin::BI__builtin_va_copy: { 250 Value *DstPtr = EmitVAListRef(E->getArg(0)); 251 Value *SrcPtr = EmitVAListRef(E->getArg(1)); 252 253 llvm::Type *Type = Int8PtrTy; 254 255 DstPtr = Builder.CreateBitCast(DstPtr, Type); 256 SrcPtr = Builder.CreateBitCast(SrcPtr, Type); 257 return RValue::get(Builder.CreateCall2(CGM.getIntrinsic(Intrinsic::vacopy), 258 DstPtr, SrcPtr)); 259 } 260 case Builtin::BI__builtin_abs: 261 case Builtin::BI__builtin_labs: 262 case Builtin::BI__builtin_llabs: { 263 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 264 265 Value *NegOp = Builder.CreateNeg(ArgValue, "neg"); 266 Value *CmpResult = 267 Builder.CreateICmpSGE(ArgValue, 268 llvm::Constant::getNullValue(ArgValue->getType()), 269 "abscond"); 270 Value *Result = 271 Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs"); 272 273 return RValue::get(Result); 274 } 275 case Builtin::BI__builtin_fabs: 276 case Builtin::BI__builtin_fabsf: 277 case Builtin::BI__builtin_fabsl: { 278 Value *Arg1 = EmitScalarExpr(E->getArg(0)); 279 Value *Result = EmitFAbs(*this, Arg1); 280 return RValue::get(Result); 281 } 282 case Builtin::BI__builtin_fmod: 283 case Builtin::BI__builtin_fmodf: 284 case Builtin::BI__builtin_fmodl: { 285 Value *Arg1 = EmitScalarExpr(E->getArg(0)); 286 Value *Arg2 = EmitScalarExpr(E->getArg(1)); 287 Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod"); 288 return RValue::get(Result); 289 } 290 291 case Builtin::BI__builtin_conj: 292 case Builtin::BI__builtin_conjf: 293 case Builtin::BI__builtin_conjl: { 294 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 295 Value *Real = ComplexVal.first; 296 Value *Imag = ComplexVal.second; 297 Value *Zero = 298 Imag->getType()->isFPOrFPVectorTy() 299 ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType()) 300 : llvm::Constant::getNullValue(Imag->getType()); 301 302 Imag = Builder.CreateFSub(Zero, Imag, "sub"); 303 return RValue::getComplex(std::make_pair(Real, Imag)); 304 } 305 case Builtin::BI__builtin_creal: 306 case Builtin::BI__builtin_crealf: 307 case Builtin::BI__builtin_creall: 308 case Builtin::BIcreal: 309 case Builtin::BIcrealf: 310 case Builtin::BIcreall: { 311 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 312 return RValue::get(ComplexVal.first); 313 } 314 315 case Builtin::BI__builtin_cimag: 316 case Builtin::BI__builtin_cimagf: 317 case Builtin::BI__builtin_cimagl: 318 case Builtin::BIcimag: 319 case Builtin::BIcimagf: 320 case Builtin::BIcimagl: { 321 ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); 322 return RValue::get(ComplexVal.second); 323 } 324 325 case Builtin::BI__builtin_ctzs: 326 case Builtin::BI__builtin_ctz: 327 case Builtin::BI__builtin_ctzl: 328 case Builtin::BI__builtin_ctzll: { 329 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 330 331 llvm::Type *ArgType = ArgValue->getType(); 332 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 333 334 llvm::Type *ResultType = ConvertType(E->getType()); 335 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 336 Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef); 337 if (Result->getType() != ResultType) 338 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 339 "cast"); 340 return RValue::get(Result); 341 } 342 case Builtin::BI__builtin_clzs: 343 case Builtin::BI__builtin_clz: 344 case Builtin::BI__builtin_clzl: 345 case Builtin::BI__builtin_clzll: { 346 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 347 348 llvm::Type *ArgType = ArgValue->getType(); 349 Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); 350 351 llvm::Type *ResultType = ConvertType(E->getType()); 352 Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); 353 Value *Result = Builder.CreateCall2(F, ArgValue, ZeroUndef); 354 if (Result->getType() != ResultType) 355 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 356 "cast"); 357 return RValue::get(Result); 358 } 359 case Builtin::BI__builtin_ffs: 360 case Builtin::BI__builtin_ffsl: 361 case Builtin::BI__builtin_ffsll: { 362 // ffs(x) -> x ? cttz(x) + 1 : 0 363 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 364 365 llvm::Type *ArgType = ArgValue->getType(); 366 Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); 367 368 llvm::Type *ResultType = ConvertType(E->getType()); 369 Value *Tmp = Builder.CreateAdd(Builder.CreateCall2(F, ArgValue, 370 Builder.getTrue()), 371 llvm::ConstantInt::get(ArgType, 1)); 372 Value *Zero = llvm::Constant::getNullValue(ArgType); 373 Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero"); 374 Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs"); 375 if (Result->getType() != ResultType) 376 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 377 "cast"); 378 return RValue::get(Result); 379 } 380 case Builtin::BI__builtin_parity: 381 case Builtin::BI__builtin_parityl: 382 case Builtin::BI__builtin_parityll: { 383 // parity(x) -> ctpop(x) & 1 384 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 385 386 llvm::Type *ArgType = ArgValue->getType(); 387 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 388 389 llvm::Type *ResultType = ConvertType(E->getType()); 390 Value *Tmp = Builder.CreateCall(F, ArgValue); 391 Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); 392 if (Result->getType() != ResultType) 393 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 394 "cast"); 395 return RValue::get(Result); 396 } 397 case Builtin::BI__builtin_popcount: 398 case Builtin::BI__builtin_popcountl: 399 case Builtin::BI__builtin_popcountll: { 400 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 401 402 llvm::Type *ArgType = ArgValue->getType(); 403 Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); 404 405 llvm::Type *ResultType = ConvertType(E->getType()); 406 Value *Result = Builder.CreateCall(F, ArgValue); 407 if (Result->getType() != ResultType) 408 Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, 409 "cast"); 410 return RValue::get(Result); 411 } 412 case Builtin::BI__builtin_expect: { 413 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 414 llvm::Type *ArgType = ArgValue->getType(); 415 416 Value *ExpectedValue = EmitScalarExpr(E->getArg(1)); 417 // Don't generate llvm.expect on -O0 as the backend won't use it for 418 // anything. 419 // Note, we still IRGen ExpectedValue because it could have side-effects. 420 if (CGM.getCodeGenOpts().OptimizationLevel == 0) 421 return RValue::get(ArgValue); 422 423 Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType); 424 Value *Result = Builder.CreateCall2(FnExpect, ArgValue, ExpectedValue, 425 "expval"); 426 return RValue::get(Result); 427 } 428 case Builtin::BI__builtin_assume_aligned: { 429 Value *PtrValue = EmitScalarExpr(E->getArg(0)); 430 Value *OffsetValue = 431 (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr; 432 433 Value *AlignmentValue = EmitScalarExpr(E->getArg(1)); 434 ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue); 435 unsigned Alignment = (unsigned) AlignmentCI->getZExtValue(); 436 437 EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue); 438 return RValue::get(PtrValue); 439 } 440 case Builtin::BI__assume: 441 case Builtin::BI__builtin_assume: { 442 if (E->getArg(0)->HasSideEffects(getContext())) 443 return RValue::get(nullptr); 444 445 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 446 Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume); 447 return RValue::get(Builder.CreateCall(FnAssume, ArgValue)); 448 } 449 case Builtin::BI__builtin_bswap16: 450 case Builtin::BI__builtin_bswap32: 451 case Builtin::BI__builtin_bswap64: { 452 Value *ArgValue = EmitScalarExpr(E->getArg(0)); 453 llvm::Type *ArgType = ArgValue->getType(); 454 Value *F = CGM.getIntrinsic(Intrinsic::bswap, ArgType); 455 return RValue::get(Builder.CreateCall(F, ArgValue)); 456 } 457 case Builtin::BI__builtin_object_size: { 458 // We rely on constant folding to deal with expressions with side effects. 459 assert(!E->getArg(0)->HasSideEffects(getContext()) && 460 "should have been constant folded"); 461 462 // We pass this builtin onto the optimizer so that it can 463 // figure out the object size in more complex cases. 464 llvm::Type *ResType = ConvertType(E->getType()); 465 466 // LLVM only supports 0 and 2, make sure that we pass along that 467 // as a boolean. 468 Value *Ty = EmitScalarExpr(E->getArg(1)); 469 ConstantInt *CI = dyn_cast<ConstantInt>(Ty); 470 assert(CI); 471 uint64_t val = CI->getZExtValue(); 472 CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1); 473 // FIXME: Get right address space. 474 llvm::Type *Tys[] = { ResType, Builder.getInt8PtrTy(0) }; 475 Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys); 476 return RValue::get(Builder.CreateCall2(F, EmitScalarExpr(E->getArg(0)),CI)); 477 } 478 case Builtin::BI__builtin_prefetch: { 479 Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); 480 // FIXME: Technically these constants should of type 'int', yes? 481 RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) : 482 llvm::ConstantInt::get(Int32Ty, 0); 483 Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : 484 llvm::ConstantInt::get(Int32Ty, 3); 485 Value *Data = llvm::ConstantInt::get(Int32Ty, 1); 486 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 487 return RValue::get(Builder.CreateCall4(F, Address, RW, Locality, Data)); 488 } 489 case Builtin::BI__builtin_readcyclecounter: { 490 Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); 491 return RValue::get(Builder.CreateCall(F)); 492 } 493 case Builtin::BI__builtin___clear_cache: { 494 Value *Begin = EmitScalarExpr(E->getArg(0)); 495 Value *End = EmitScalarExpr(E->getArg(1)); 496 Value *F = CGM.getIntrinsic(Intrinsic::clear_cache); 497 return RValue::get(Builder.CreateCall2(F, Begin, End)); 498 } 499 case Builtin::BI__builtin_trap: { 500 Value *F = CGM.getIntrinsic(Intrinsic::trap); 501 return RValue::get(Builder.CreateCall(F)); 502 } 503 case Builtin::BI__debugbreak: { 504 Value *F = CGM.getIntrinsic(Intrinsic::debugtrap); 505 return RValue::get(Builder.CreateCall(F)); 506 } 507 case Builtin::BI__builtin_unreachable: { 508 if (SanOpts.has(SanitizerKind::Unreachable)) { 509 SanitizerScope SanScope(this); 510 EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()), 511 SanitizerKind::Unreachable), 512 "builtin_unreachable", EmitCheckSourceLocation(E->getExprLoc()), 513 None); 514 } else 515 Builder.CreateUnreachable(); 516 517 // We do need to preserve an insertion point. 518 EmitBlock(createBasicBlock("unreachable.cont")); 519 520 return RValue::get(nullptr); 521 } 522 523 case Builtin::BI__builtin_powi: 524 case Builtin::BI__builtin_powif: 525 case Builtin::BI__builtin_powil: { 526 Value *Base = EmitScalarExpr(E->getArg(0)); 527 Value *Exponent = EmitScalarExpr(E->getArg(1)); 528 llvm::Type *ArgType = Base->getType(); 529 Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType); 530 return RValue::get(Builder.CreateCall2(F, Base, Exponent)); 531 } 532 533 case Builtin::BI__builtin_isgreater: 534 case Builtin::BI__builtin_isgreaterequal: 535 case Builtin::BI__builtin_isless: 536 case Builtin::BI__builtin_islessequal: 537 case Builtin::BI__builtin_islessgreater: 538 case Builtin::BI__builtin_isunordered: { 539 // Ordered comparisons: we know the arguments to these are matching scalar 540 // floating point values. 541 Value *LHS = EmitScalarExpr(E->getArg(0)); 542 Value *RHS = EmitScalarExpr(E->getArg(1)); 543 544 switch (BuiltinID) { 545 default: llvm_unreachable("Unknown ordered comparison"); 546 case Builtin::BI__builtin_isgreater: 547 LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp"); 548 break; 549 case Builtin::BI__builtin_isgreaterequal: 550 LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp"); 551 break; 552 case Builtin::BI__builtin_isless: 553 LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp"); 554 break; 555 case Builtin::BI__builtin_islessequal: 556 LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp"); 557 break; 558 case Builtin::BI__builtin_islessgreater: 559 LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp"); 560 break; 561 case Builtin::BI__builtin_isunordered: 562 LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp"); 563 break; 564 } 565 // ZExt bool to int type. 566 return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType()))); 567 } 568 case Builtin::BI__builtin_isnan: { 569 Value *V = EmitScalarExpr(E->getArg(0)); 570 V = Builder.CreateFCmpUNO(V, V, "cmp"); 571 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 572 } 573 574 case Builtin::BI__builtin_isinf: { 575 // isinf(x) --> fabs(x) == infinity 576 Value *V = EmitScalarExpr(E->getArg(0)); 577 V = EmitFAbs(*this, V); 578 579 V = Builder.CreateFCmpOEQ(V, ConstantFP::getInfinity(V->getType()),"isinf"); 580 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 581 } 582 583 case Builtin::BI__builtin_isinf_sign: { 584 // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0 585 Value *Arg = EmitScalarExpr(E->getArg(0)); 586 Value *AbsArg = EmitFAbs(*this, Arg); 587 Value *IsInf = Builder.CreateFCmpOEQ( 588 AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf"); 589 Value *IsNeg = EmitSignBit(*this, Arg); 590 591 llvm::Type *IntTy = ConvertType(E->getType()); 592 Value *Zero = Constant::getNullValue(IntTy); 593 Value *One = ConstantInt::get(IntTy, 1); 594 Value *NegativeOne = ConstantInt::get(IntTy, -1); 595 Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One); 596 Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero); 597 return RValue::get(Result); 598 } 599 600 case Builtin::BI__builtin_isnormal: { 601 // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min 602 Value *V = EmitScalarExpr(E->getArg(0)); 603 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 604 605 Value *Abs = EmitFAbs(*this, V); 606 Value *IsLessThanInf = 607 Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 608 APFloat Smallest = APFloat::getSmallestNormalized( 609 getContext().getFloatTypeSemantics(E->getArg(0)->getType())); 610 Value *IsNormal = 611 Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest), 612 "isnormal"); 613 V = Builder.CreateAnd(Eq, IsLessThanInf, "and"); 614 V = Builder.CreateAnd(V, IsNormal, "and"); 615 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 616 } 617 618 case Builtin::BI__builtin_isfinite: { 619 // isfinite(x) --> x == x && fabs(x) != infinity; 620 Value *V = EmitScalarExpr(E->getArg(0)); 621 Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq"); 622 623 Value *Abs = EmitFAbs(*this, V); 624 Value *IsNotInf = 625 Builder.CreateFCmpUNE(Abs, ConstantFP::getInfinity(V->getType()),"isinf"); 626 627 V = Builder.CreateAnd(Eq, IsNotInf, "and"); 628 return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType()))); 629 } 630 631 case Builtin::BI__builtin_fpclassify: { 632 Value *V = EmitScalarExpr(E->getArg(5)); 633 llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); 634 635 // Create Result 636 BasicBlock *Begin = Builder.GetInsertBlock(); 637 BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn); 638 Builder.SetInsertPoint(End); 639 PHINode *Result = 640 Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4, 641 "fpclassify_result"); 642 643 // if (V==0) return FP_ZERO 644 Builder.SetInsertPoint(Begin); 645 Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty), 646 "iszero"); 647 Value *ZeroLiteral = EmitScalarExpr(E->getArg(4)); 648 BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn); 649 Builder.CreateCondBr(IsZero, End, NotZero); 650 Result->addIncoming(ZeroLiteral, Begin); 651 652 // if (V != V) return FP_NAN 653 Builder.SetInsertPoint(NotZero); 654 Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp"); 655 Value *NanLiteral = EmitScalarExpr(E->getArg(0)); 656 BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn); 657 Builder.CreateCondBr(IsNan, End, NotNan); 658 Result->addIncoming(NanLiteral, NotZero); 659 660 // if (fabs(V) == infinity) return FP_INFINITY 661 Builder.SetInsertPoint(NotNan); 662 Value *VAbs = EmitFAbs(*this, V); 663 Value *IsInf = 664 Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()), 665 "isinf"); 666 Value *InfLiteral = EmitScalarExpr(E->getArg(1)); 667 BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn); 668 Builder.CreateCondBr(IsInf, End, NotInf); 669 Result->addIncoming(InfLiteral, NotNan); 670 671 // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL 672 Builder.SetInsertPoint(NotInf); 673 APFloat Smallest = APFloat::getSmallestNormalized( 674 getContext().getFloatTypeSemantics(E->getArg(5)->getType())); 675 Value *IsNormal = 676 Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest), 677 "isnormal"); 678 Value *NormalResult = 679 Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)), 680 EmitScalarExpr(E->getArg(3))); 681 Builder.CreateBr(End); 682 Result->addIncoming(NormalResult, NotInf); 683 684 // return Result 685 Builder.SetInsertPoint(End); 686 return RValue::get(Result); 687 } 688 689 case Builtin::BIalloca: 690 case Builtin::BI_alloca: 691 case Builtin::BI__builtin_alloca: { 692 Value *Size = EmitScalarExpr(E->getArg(0)); 693 return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size)); 694 } 695 case Builtin::BIbzero: 696 case Builtin::BI__builtin_bzero: { 697 std::pair<llvm::Value*, unsigned> Dest = 698 EmitPointerWithAlignment(E->getArg(0)); 699 Value *SizeVal = EmitScalarExpr(E->getArg(1)); 700 Builder.CreateMemSet(Dest.first, Builder.getInt8(0), SizeVal, 701 Dest.second, false); 702 return RValue::get(Dest.first); 703 } 704 case Builtin::BImemcpy: 705 case Builtin::BI__builtin_memcpy: { 706 std::pair<llvm::Value*, unsigned> Dest = 707 EmitPointerWithAlignment(E->getArg(0)); 708 std::pair<llvm::Value*, unsigned> Src = 709 EmitPointerWithAlignment(E->getArg(1)); 710 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 711 unsigned Align = std::min(Dest.second, Src.second); 712 Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false); 713 return RValue::get(Dest.first); 714 } 715 716 case Builtin::BI__builtin___memcpy_chk: { 717 // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. 718 llvm::APSInt Size, DstSize; 719 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 720 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 721 break; 722 if (Size.ugt(DstSize)) 723 break; 724 std::pair<llvm::Value*, unsigned> Dest = 725 EmitPointerWithAlignment(E->getArg(0)); 726 std::pair<llvm::Value*, unsigned> Src = 727 EmitPointerWithAlignment(E->getArg(1)); 728 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 729 unsigned Align = std::min(Dest.second, Src.second); 730 Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false); 731 return RValue::get(Dest.first); 732 } 733 734 case Builtin::BI__builtin_objc_memmove_collectable: { 735 Value *Address = EmitScalarExpr(E->getArg(0)); 736 Value *SrcAddr = EmitScalarExpr(E->getArg(1)); 737 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 738 CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, 739 Address, SrcAddr, SizeVal); 740 return RValue::get(Address); 741 } 742 743 case Builtin::BI__builtin___memmove_chk: { 744 // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2. 745 llvm::APSInt Size, DstSize; 746 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 747 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 748 break; 749 if (Size.ugt(DstSize)) 750 break; 751 std::pair<llvm::Value*, unsigned> Dest = 752 EmitPointerWithAlignment(E->getArg(0)); 753 std::pair<llvm::Value*, unsigned> Src = 754 EmitPointerWithAlignment(E->getArg(1)); 755 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 756 unsigned Align = std::min(Dest.second, Src.second); 757 Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false); 758 return RValue::get(Dest.first); 759 } 760 761 case Builtin::BImemmove: 762 case Builtin::BI__builtin_memmove: { 763 std::pair<llvm::Value*, unsigned> Dest = 764 EmitPointerWithAlignment(E->getArg(0)); 765 std::pair<llvm::Value*, unsigned> Src = 766 EmitPointerWithAlignment(E->getArg(1)); 767 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 768 unsigned Align = std::min(Dest.second, Src.second); 769 Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false); 770 return RValue::get(Dest.first); 771 } 772 case Builtin::BImemset: 773 case Builtin::BI__builtin_memset: { 774 std::pair<llvm::Value*, unsigned> Dest = 775 EmitPointerWithAlignment(E->getArg(0)); 776 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 777 Builder.getInt8Ty()); 778 Value *SizeVal = EmitScalarExpr(E->getArg(2)); 779 Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false); 780 return RValue::get(Dest.first); 781 } 782 case Builtin::BI__builtin___memset_chk: { 783 // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. 784 llvm::APSInt Size, DstSize; 785 if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) || 786 !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext())) 787 break; 788 if (Size.ugt(DstSize)) 789 break; 790 std::pair<llvm::Value*, unsigned> Dest = 791 EmitPointerWithAlignment(E->getArg(0)); 792 Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), 793 Builder.getInt8Ty()); 794 Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); 795 Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false); 796 return RValue::get(Dest.first); 797 } 798 case Builtin::BI__builtin_dwarf_cfa: { 799 // The offset in bytes from the first argument to the CFA. 800 // 801 // Why on earth is this in the frontend? Is there any reason at 802 // all that the backend can't reasonably determine this while 803 // lowering llvm.eh.dwarf.cfa()? 804 // 805 // TODO: If there's a satisfactory reason, add a target hook for 806 // this instead of hard-coding 0, which is correct for most targets. 807 int32_t Offset = 0; 808 809 Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa); 810 return RValue::get(Builder.CreateCall(F, 811 llvm::ConstantInt::get(Int32Ty, Offset))); 812 } 813 case Builtin::BI__builtin_return_address: { 814 Value *Depth = EmitScalarExpr(E->getArg(0)); 815 Depth = Builder.CreateIntCast(Depth, Int32Ty, false); 816 Value *F = CGM.getIntrinsic(Intrinsic::returnaddress); 817 return RValue::get(Builder.CreateCall(F, Depth)); 818 } 819 case Builtin::BI__builtin_frame_address: { 820 Value *Depth = EmitScalarExpr(E->getArg(0)); 821 Depth = Builder.CreateIntCast(Depth, Int32Ty, false); 822 Value *F = CGM.getIntrinsic(Intrinsic::frameaddress); 823 return RValue::get(Builder.CreateCall(F, Depth)); 824 } 825 case Builtin::BI__builtin_extract_return_addr: { 826 Value *Address = EmitScalarExpr(E->getArg(0)); 827 Value *Result = getTargetHooks().decodeReturnAddress(*this, Address); 828 return RValue::get(Result); 829 } 830 case Builtin::BI__builtin_frob_return_addr: { 831 Value *Address = EmitScalarExpr(E->getArg(0)); 832 Value *Result = getTargetHooks().encodeReturnAddress(*this, Address); 833 return RValue::get(Result); 834 } 835 case Builtin::BI__builtin_dwarf_sp_column: { 836 llvm::IntegerType *Ty 837 = cast<llvm::IntegerType>(ConvertType(E->getType())); 838 int Column = getTargetHooks().getDwarfEHStackPointer(CGM); 839 if (Column == -1) { 840 CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column"); 841 return RValue::get(llvm::UndefValue::get(Ty)); 842 } 843 return RValue::get(llvm::ConstantInt::get(Ty, Column, true)); 844 } 845 case Builtin::BI__builtin_init_dwarf_reg_size_table: { 846 Value *Address = EmitScalarExpr(E->getArg(0)); 847 if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address)) 848 CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table"); 849 return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); 850 } 851 case Builtin::BI__builtin_eh_return: { 852 Value *Int = EmitScalarExpr(E->getArg(0)); 853 Value *Ptr = EmitScalarExpr(E->getArg(1)); 854 855 llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType()); 856 assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) && 857 "LLVM's __builtin_eh_return only supports 32- and 64-bit variants"); 858 Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32 859 ? Intrinsic::eh_return_i32 860 : Intrinsic::eh_return_i64); 861 Builder.CreateCall2(F, Int, Ptr); 862 Builder.CreateUnreachable(); 863 864 // We do need to preserve an insertion point. 865 EmitBlock(createBasicBlock("builtin_eh_return.cont")); 866 867 return RValue::get(nullptr); 868 } 869 case Builtin::BI__builtin_unwind_init: { 870 Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init); 871 return RValue::get(Builder.CreateCall(F)); 872 } 873 case Builtin::BI__builtin_extend_pointer: { 874 // Extends a pointer to the size of an _Unwind_Word, which is 875 // uint64_t on all platforms. Generally this gets poked into a 876 // register and eventually used as an address, so if the 877 // addressing registers are wider than pointers and the platform 878 // doesn't implicitly ignore high-order bits when doing 879 // addressing, we need to make sure we zext / sext based on 880 // the platform's expectations. 881 // 882 // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html 883 884 // Cast the pointer to intptr_t. 885 Value *Ptr = EmitScalarExpr(E->getArg(0)); 886 Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast"); 887 888 // If that's 64 bits, we're done. 889 if (IntPtrTy->getBitWidth() == 64) 890 return RValue::get(Result); 891 892 // Otherwise, ask the codegen data what to do. 893 if (getTargetHooks().extendPointerWithSExt()) 894 return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext")); 895 else 896 return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext")); 897 } 898 case Builtin::BI__builtin_setjmp: { 899 // Buffer is a void**. 900 Value *Buf = EmitScalarExpr(E->getArg(0)); 901 902 // Store the frame pointer to the setjmp buffer. 903 Value *FrameAddr = 904 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 905 ConstantInt::get(Int32Ty, 0)); 906 Builder.CreateStore(FrameAddr, Buf); 907 908 // Store the stack pointer to the setjmp buffer. 909 Value *StackAddr = 910 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave)); 911 Value *StackSaveSlot = 912 Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2)); 913 Builder.CreateStore(StackAddr, StackSaveSlot); 914 915 // Call LLVM's EH setjmp, which is lightweight. 916 Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp); 917 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 918 return RValue::get(Builder.CreateCall(F, Buf)); 919 } 920 case Builtin::BI__builtin_longjmp: { 921 Value *Buf = EmitScalarExpr(E->getArg(0)); 922 Buf = Builder.CreateBitCast(Buf, Int8PtrTy); 923 924 // Call LLVM's EH longjmp, which is lightweight. 925 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf); 926 927 // longjmp doesn't return; mark this as unreachable. 928 Builder.CreateUnreachable(); 929 930 // We do need to preserve an insertion point. 931 EmitBlock(createBasicBlock("longjmp.cont")); 932 933 return RValue::get(nullptr); 934 } 935 case Builtin::BI__sync_fetch_and_add: 936 case Builtin::BI__sync_fetch_and_sub: 937 case Builtin::BI__sync_fetch_and_or: 938 case Builtin::BI__sync_fetch_and_and: 939 case Builtin::BI__sync_fetch_and_xor: 940 case Builtin::BI__sync_fetch_and_nand: 941 case Builtin::BI__sync_add_and_fetch: 942 case Builtin::BI__sync_sub_and_fetch: 943 case Builtin::BI__sync_and_and_fetch: 944 case Builtin::BI__sync_or_and_fetch: 945 case Builtin::BI__sync_xor_and_fetch: 946 case Builtin::BI__sync_nand_and_fetch: 947 case Builtin::BI__sync_val_compare_and_swap: 948 case Builtin::BI__sync_bool_compare_and_swap: 949 case Builtin::BI__sync_lock_test_and_set: 950 case Builtin::BI__sync_lock_release: 951 case Builtin::BI__sync_swap: 952 llvm_unreachable("Shouldn't make it through sema"); 953 case Builtin::BI__sync_fetch_and_add_1: 954 case Builtin::BI__sync_fetch_and_add_2: 955 case Builtin::BI__sync_fetch_and_add_4: 956 case Builtin::BI__sync_fetch_and_add_8: 957 case Builtin::BI__sync_fetch_and_add_16: 958 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E); 959 case Builtin::BI__sync_fetch_and_sub_1: 960 case Builtin::BI__sync_fetch_and_sub_2: 961 case Builtin::BI__sync_fetch_and_sub_4: 962 case Builtin::BI__sync_fetch_and_sub_8: 963 case Builtin::BI__sync_fetch_and_sub_16: 964 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E); 965 case Builtin::BI__sync_fetch_and_or_1: 966 case Builtin::BI__sync_fetch_and_or_2: 967 case Builtin::BI__sync_fetch_and_or_4: 968 case Builtin::BI__sync_fetch_and_or_8: 969 case Builtin::BI__sync_fetch_and_or_16: 970 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E); 971 case Builtin::BI__sync_fetch_and_and_1: 972 case Builtin::BI__sync_fetch_and_and_2: 973 case Builtin::BI__sync_fetch_and_and_4: 974 case Builtin::BI__sync_fetch_and_and_8: 975 case Builtin::BI__sync_fetch_and_and_16: 976 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E); 977 case Builtin::BI__sync_fetch_and_xor_1: 978 case Builtin::BI__sync_fetch_and_xor_2: 979 case Builtin::BI__sync_fetch_and_xor_4: 980 case Builtin::BI__sync_fetch_and_xor_8: 981 case Builtin::BI__sync_fetch_and_xor_16: 982 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E); 983 case Builtin::BI__sync_fetch_and_nand_1: 984 case Builtin::BI__sync_fetch_and_nand_2: 985 case Builtin::BI__sync_fetch_and_nand_4: 986 case Builtin::BI__sync_fetch_and_nand_8: 987 case Builtin::BI__sync_fetch_and_nand_16: 988 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E); 989 990 // Clang extensions: not overloaded yet. 991 case Builtin::BI__sync_fetch_and_min: 992 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E); 993 case Builtin::BI__sync_fetch_and_max: 994 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E); 995 case Builtin::BI__sync_fetch_and_umin: 996 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E); 997 case Builtin::BI__sync_fetch_and_umax: 998 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E); 999 1000 case Builtin::BI__sync_add_and_fetch_1: 1001 case Builtin::BI__sync_add_and_fetch_2: 1002 case Builtin::BI__sync_add_and_fetch_4: 1003 case Builtin::BI__sync_add_and_fetch_8: 1004 case Builtin::BI__sync_add_and_fetch_16: 1005 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E, 1006 llvm::Instruction::Add); 1007 case Builtin::BI__sync_sub_and_fetch_1: 1008 case Builtin::BI__sync_sub_and_fetch_2: 1009 case Builtin::BI__sync_sub_and_fetch_4: 1010 case Builtin::BI__sync_sub_and_fetch_8: 1011 case Builtin::BI__sync_sub_and_fetch_16: 1012 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E, 1013 llvm::Instruction::Sub); 1014 case Builtin::BI__sync_and_and_fetch_1: 1015 case Builtin::BI__sync_and_and_fetch_2: 1016 case Builtin::BI__sync_and_and_fetch_4: 1017 case Builtin::BI__sync_and_and_fetch_8: 1018 case Builtin::BI__sync_and_and_fetch_16: 1019 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E, 1020 llvm::Instruction::And); 1021 case Builtin::BI__sync_or_and_fetch_1: 1022 case Builtin::BI__sync_or_and_fetch_2: 1023 case Builtin::BI__sync_or_and_fetch_4: 1024 case Builtin::BI__sync_or_and_fetch_8: 1025 case Builtin::BI__sync_or_and_fetch_16: 1026 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E, 1027 llvm::Instruction::Or); 1028 case Builtin::BI__sync_xor_and_fetch_1: 1029 case Builtin::BI__sync_xor_and_fetch_2: 1030 case Builtin::BI__sync_xor_and_fetch_4: 1031 case Builtin::BI__sync_xor_and_fetch_8: 1032 case Builtin::BI__sync_xor_and_fetch_16: 1033 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E, 1034 llvm::Instruction::Xor); 1035 case Builtin::BI__sync_nand_and_fetch_1: 1036 case Builtin::BI__sync_nand_and_fetch_2: 1037 case Builtin::BI__sync_nand_and_fetch_4: 1038 case Builtin::BI__sync_nand_and_fetch_8: 1039 case Builtin::BI__sync_nand_and_fetch_16: 1040 return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E, 1041 llvm::Instruction::And, true); 1042 1043 case Builtin::BI__sync_val_compare_and_swap_1: 1044 case Builtin::BI__sync_val_compare_and_swap_2: 1045 case Builtin::BI__sync_val_compare_and_swap_4: 1046 case Builtin::BI__sync_val_compare_and_swap_8: 1047 case Builtin::BI__sync_val_compare_and_swap_16: { 1048 QualType T = E->getType(); 1049 llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0)); 1050 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 1051 1052 llvm::IntegerType *IntType = 1053 llvm::IntegerType::get(getLLVMContext(), 1054 getContext().getTypeSize(T)); 1055 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 1056 1057 Value *Args[3]; 1058 Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType); 1059 Args[1] = EmitScalarExpr(E->getArg(1)); 1060 llvm::Type *ValueType = Args[1]->getType(); 1061 Args[1] = EmitToInt(*this, Args[1], T, IntType); 1062 Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType); 1063 1064 Value *Result = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2], 1065 llvm::SequentiallyConsistent, 1066 llvm::SequentiallyConsistent); 1067 Result = Builder.CreateExtractValue(Result, 0); 1068 Result = EmitFromInt(*this, Result, T, ValueType); 1069 return RValue::get(Result); 1070 } 1071 1072 case Builtin::BI__sync_bool_compare_and_swap_1: 1073 case Builtin::BI__sync_bool_compare_and_swap_2: 1074 case Builtin::BI__sync_bool_compare_and_swap_4: 1075 case Builtin::BI__sync_bool_compare_and_swap_8: 1076 case Builtin::BI__sync_bool_compare_and_swap_16: { 1077 QualType T = E->getArg(1)->getType(); 1078 llvm::Value *DestPtr = EmitScalarExpr(E->getArg(0)); 1079 unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace(); 1080 1081 llvm::IntegerType *IntType = 1082 llvm::IntegerType::get(getLLVMContext(), 1083 getContext().getTypeSize(T)); 1084 llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace); 1085 1086 Value *Args[3]; 1087 Args[0] = Builder.CreateBitCast(DestPtr, IntPtrType); 1088 Args[1] = EmitToInt(*this, EmitScalarExpr(E->getArg(1)), T, IntType); 1089 Args[2] = EmitToInt(*this, EmitScalarExpr(E->getArg(2)), T, IntType); 1090 1091 Value *Pair = Builder.CreateAtomicCmpXchg(Args[0], Args[1], Args[2], 1092 llvm::SequentiallyConsistent, 1093 llvm::SequentiallyConsistent); 1094 Value *Result = Builder.CreateExtractValue(Pair, 1); 1095 // zext bool to int. 1096 Result = Builder.CreateZExt(Result, ConvertType(E->getType())); 1097 return RValue::get(Result); 1098 } 1099 1100 case Builtin::BI__sync_swap_1: 1101 case Builtin::BI__sync_swap_2: 1102 case Builtin::BI__sync_swap_4: 1103 case Builtin::BI__sync_swap_8: 1104 case Builtin::BI__sync_swap_16: 1105 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1106 1107 case Builtin::BI__sync_lock_test_and_set_1: 1108 case Builtin::BI__sync_lock_test_and_set_2: 1109 case Builtin::BI__sync_lock_test_and_set_4: 1110 case Builtin::BI__sync_lock_test_and_set_8: 1111 case Builtin::BI__sync_lock_test_and_set_16: 1112 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1113 1114 case Builtin::BI__sync_lock_release_1: 1115 case Builtin::BI__sync_lock_release_2: 1116 case Builtin::BI__sync_lock_release_4: 1117 case Builtin::BI__sync_lock_release_8: 1118 case Builtin::BI__sync_lock_release_16: { 1119 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1120 QualType ElTy = E->getArg(0)->getType()->getPointeeType(); 1121 CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); 1122 llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), 1123 StoreSize.getQuantity() * 8); 1124 Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); 1125 llvm::StoreInst *Store = 1126 Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr); 1127 Store->setAlignment(StoreSize.getQuantity()); 1128 Store->setAtomic(llvm::Release); 1129 return RValue::get(nullptr); 1130 } 1131 1132 case Builtin::BI__sync_synchronize: { 1133 // We assume this is supposed to correspond to a C++0x-style 1134 // sequentially-consistent fence (i.e. this is only usable for 1135 // synchonization, not device I/O or anything like that). This intrinsic 1136 // is really badly designed in the sense that in theory, there isn't 1137 // any way to safely use it... but in practice, it mostly works 1138 // to use it with non-atomic loads and stores to get acquire/release 1139 // semantics. 1140 Builder.CreateFence(llvm::SequentiallyConsistent); 1141 return RValue::get(nullptr); 1142 } 1143 1144 case Builtin::BI__c11_atomic_is_lock_free: 1145 case Builtin::BI__atomic_is_lock_free: { 1146 // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the 1147 // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since 1148 // _Atomic(T) is always properly-aligned. 1149 const char *LibCallName = "__atomic_is_lock_free"; 1150 CallArgList Args; 1151 Args.add(RValue::get(EmitScalarExpr(E->getArg(0))), 1152 getContext().getSizeType()); 1153 if (BuiltinID == Builtin::BI__atomic_is_lock_free) 1154 Args.add(RValue::get(EmitScalarExpr(E->getArg(1))), 1155 getContext().VoidPtrTy); 1156 else 1157 Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)), 1158 getContext().VoidPtrTy); 1159 const CGFunctionInfo &FuncInfo = 1160 CGM.getTypes().arrangeFreeFunctionCall(E->getType(), Args, 1161 FunctionType::ExtInfo(), 1162 RequiredArgs::All); 1163 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); 1164 llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); 1165 return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args); 1166 } 1167 1168 case Builtin::BI__atomic_test_and_set: { 1169 // Look at the argument type to determine whether this is a volatile 1170 // operation. The parameter type is always volatile. 1171 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1172 bool Volatile = 1173 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1174 1175 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1176 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 1177 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1178 Value *NewVal = Builder.getInt8(1); 1179 Value *Order = EmitScalarExpr(E->getArg(1)); 1180 if (isa<llvm::ConstantInt>(Order)) { 1181 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1182 AtomicRMWInst *Result = nullptr; 1183 switch (ord) { 1184 case 0: // memory_order_relaxed 1185 default: // invalid order 1186 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1187 Ptr, NewVal, 1188 llvm::Monotonic); 1189 break; 1190 case 1: // memory_order_consume 1191 case 2: // memory_order_acquire 1192 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1193 Ptr, NewVal, 1194 llvm::Acquire); 1195 break; 1196 case 3: // memory_order_release 1197 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1198 Ptr, NewVal, 1199 llvm::Release); 1200 break; 1201 case 4: // memory_order_acq_rel 1202 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1203 Ptr, NewVal, 1204 llvm::AcquireRelease); 1205 break; 1206 case 5: // memory_order_seq_cst 1207 Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1208 Ptr, NewVal, 1209 llvm::SequentiallyConsistent); 1210 break; 1211 } 1212 Result->setVolatile(Volatile); 1213 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1214 } 1215 1216 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1217 1218 llvm::BasicBlock *BBs[5] = { 1219 createBasicBlock("monotonic", CurFn), 1220 createBasicBlock("acquire", CurFn), 1221 createBasicBlock("release", CurFn), 1222 createBasicBlock("acqrel", CurFn), 1223 createBasicBlock("seqcst", CurFn) 1224 }; 1225 llvm::AtomicOrdering Orders[5] = { 1226 llvm::Monotonic, llvm::Acquire, llvm::Release, 1227 llvm::AcquireRelease, llvm::SequentiallyConsistent 1228 }; 1229 1230 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1231 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1232 1233 Builder.SetInsertPoint(ContBB); 1234 PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set"); 1235 1236 for (unsigned i = 0; i < 5; ++i) { 1237 Builder.SetInsertPoint(BBs[i]); 1238 AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, 1239 Ptr, NewVal, Orders[i]); 1240 RMW->setVolatile(Volatile); 1241 Result->addIncoming(RMW, BBs[i]); 1242 Builder.CreateBr(ContBB); 1243 } 1244 1245 SI->addCase(Builder.getInt32(0), BBs[0]); 1246 SI->addCase(Builder.getInt32(1), BBs[1]); 1247 SI->addCase(Builder.getInt32(2), BBs[1]); 1248 SI->addCase(Builder.getInt32(3), BBs[2]); 1249 SI->addCase(Builder.getInt32(4), BBs[3]); 1250 SI->addCase(Builder.getInt32(5), BBs[4]); 1251 1252 Builder.SetInsertPoint(ContBB); 1253 return RValue::get(Builder.CreateIsNotNull(Result, "tobool")); 1254 } 1255 1256 case Builtin::BI__atomic_clear: { 1257 QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); 1258 bool Volatile = 1259 PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified(); 1260 1261 Value *Ptr = EmitScalarExpr(E->getArg(0)); 1262 unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace(); 1263 Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace)); 1264 Value *NewVal = Builder.getInt8(0); 1265 Value *Order = EmitScalarExpr(E->getArg(1)); 1266 if (isa<llvm::ConstantInt>(Order)) { 1267 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1268 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1269 Store->setAlignment(1); 1270 switch (ord) { 1271 case 0: // memory_order_relaxed 1272 default: // invalid order 1273 Store->setOrdering(llvm::Monotonic); 1274 break; 1275 case 3: // memory_order_release 1276 Store->setOrdering(llvm::Release); 1277 break; 1278 case 5: // memory_order_seq_cst 1279 Store->setOrdering(llvm::SequentiallyConsistent); 1280 break; 1281 } 1282 return RValue::get(nullptr); 1283 } 1284 1285 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1286 1287 llvm::BasicBlock *BBs[3] = { 1288 createBasicBlock("monotonic", CurFn), 1289 createBasicBlock("release", CurFn), 1290 createBasicBlock("seqcst", CurFn) 1291 }; 1292 llvm::AtomicOrdering Orders[3] = { 1293 llvm::Monotonic, llvm::Release, llvm::SequentiallyConsistent 1294 }; 1295 1296 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1297 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]); 1298 1299 for (unsigned i = 0; i < 3; ++i) { 1300 Builder.SetInsertPoint(BBs[i]); 1301 StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); 1302 Store->setAlignment(1); 1303 Store->setOrdering(Orders[i]); 1304 Builder.CreateBr(ContBB); 1305 } 1306 1307 SI->addCase(Builder.getInt32(0), BBs[0]); 1308 SI->addCase(Builder.getInt32(3), BBs[1]); 1309 SI->addCase(Builder.getInt32(5), BBs[2]); 1310 1311 Builder.SetInsertPoint(ContBB); 1312 return RValue::get(nullptr); 1313 } 1314 1315 case Builtin::BI__atomic_thread_fence: 1316 case Builtin::BI__atomic_signal_fence: 1317 case Builtin::BI__c11_atomic_thread_fence: 1318 case Builtin::BI__c11_atomic_signal_fence: { 1319 llvm::SynchronizationScope Scope; 1320 if (BuiltinID == Builtin::BI__atomic_signal_fence || 1321 BuiltinID == Builtin::BI__c11_atomic_signal_fence) 1322 Scope = llvm::SingleThread; 1323 else 1324 Scope = llvm::CrossThread; 1325 Value *Order = EmitScalarExpr(E->getArg(0)); 1326 if (isa<llvm::ConstantInt>(Order)) { 1327 int ord = cast<llvm::ConstantInt>(Order)->getZExtValue(); 1328 switch (ord) { 1329 case 0: // memory_order_relaxed 1330 default: // invalid order 1331 break; 1332 case 1: // memory_order_consume 1333 case 2: // memory_order_acquire 1334 Builder.CreateFence(llvm::Acquire, Scope); 1335 break; 1336 case 3: // memory_order_release 1337 Builder.CreateFence(llvm::Release, Scope); 1338 break; 1339 case 4: // memory_order_acq_rel 1340 Builder.CreateFence(llvm::AcquireRelease, Scope); 1341 break; 1342 case 5: // memory_order_seq_cst 1343 Builder.CreateFence(llvm::SequentiallyConsistent, Scope); 1344 break; 1345 } 1346 return RValue::get(nullptr); 1347 } 1348 1349 llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB; 1350 AcquireBB = createBasicBlock("acquire", CurFn); 1351 ReleaseBB = createBasicBlock("release", CurFn); 1352 AcqRelBB = createBasicBlock("acqrel", CurFn); 1353 SeqCstBB = createBasicBlock("seqcst", CurFn); 1354 llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn); 1355 1356 Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false); 1357 llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB); 1358 1359 Builder.SetInsertPoint(AcquireBB); 1360 Builder.CreateFence(llvm::Acquire, Scope); 1361 Builder.CreateBr(ContBB); 1362 SI->addCase(Builder.getInt32(1), AcquireBB); 1363 SI->addCase(Builder.getInt32(2), AcquireBB); 1364 1365 Builder.SetInsertPoint(ReleaseBB); 1366 Builder.CreateFence(llvm::Release, Scope); 1367 Builder.CreateBr(ContBB); 1368 SI->addCase(Builder.getInt32(3), ReleaseBB); 1369 1370 Builder.SetInsertPoint(AcqRelBB); 1371 Builder.CreateFence(llvm::AcquireRelease, Scope); 1372 Builder.CreateBr(ContBB); 1373 SI->addCase(Builder.getInt32(4), AcqRelBB); 1374 1375 Builder.SetInsertPoint(SeqCstBB); 1376 Builder.CreateFence(llvm::SequentiallyConsistent, Scope); 1377 Builder.CreateBr(ContBB); 1378 SI->addCase(Builder.getInt32(5), SeqCstBB); 1379 1380 Builder.SetInsertPoint(ContBB); 1381 return RValue::get(nullptr); 1382 } 1383 1384 // Library functions with special handling. 1385 case Builtin::BIsqrt: 1386 case Builtin::BIsqrtf: 1387 case Builtin::BIsqrtl: { 1388 // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only 1389 // in finite- or unsafe-math mode (the intrinsic has different semantics 1390 // for handling negative numbers compared to the library function, so 1391 // -fmath-errno=0 is not enough). 1392 if (!FD->hasAttr<ConstAttr>()) 1393 break; 1394 if (!(CGM.getCodeGenOpts().UnsafeFPMath || 1395 CGM.getCodeGenOpts().NoNaNsFPMath)) 1396 break; 1397 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 1398 llvm::Type *ArgType = Arg0->getType(); 1399 Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType); 1400 return RValue::get(Builder.CreateCall(F, Arg0)); 1401 } 1402 1403 case Builtin::BI__builtin_pow: 1404 case Builtin::BI__builtin_powf: 1405 case Builtin::BI__builtin_powl: 1406 case Builtin::BIpow: 1407 case Builtin::BIpowf: 1408 case Builtin::BIpowl: { 1409 // Transform a call to pow* into a @llvm.pow.* intrinsic call. 1410 if (!FD->hasAttr<ConstAttr>()) 1411 break; 1412 Value *Base = EmitScalarExpr(E->getArg(0)); 1413 Value *Exponent = EmitScalarExpr(E->getArg(1)); 1414 llvm::Type *ArgType = Base->getType(); 1415 Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType); 1416 return RValue::get(Builder.CreateCall2(F, Base, Exponent)); 1417 } 1418 1419 case Builtin::BIfma: 1420 case Builtin::BIfmaf: 1421 case Builtin::BIfmal: 1422 case Builtin::BI__builtin_fma: 1423 case Builtin::BI__builtin_fmaf: 1424 case Builtin::BI__builtin_fmal: { 1425 // Rewrite fma to intrinsic. 1426 Value *FirstArg = EmitScalarExpr(E->getArg(0)); 1427 llvm::Type *ArgType = FirstArg->getType(); 1428 Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType); 1429 return RValue::get(Builder.CreateCall3(F, FirstArg, 1430 EmitScalarExpr(E->getArg(1)), 1431 EmitScalarExpr(E->getArg(2)))); 1432 } 1433 1434 case Builtin::BI__builtin_signbit: 1435 case Builtin::BI__builtin_signbitf: 1436 case Builtin::BI__builtin_signbitl: { 1437 return RValue::get( 1438 Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))), 1439 ConvertType(E->getType()))); 1440 } 1441 case Builtin::BI__builtin_annotation: { 1442 llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0)); 1443 llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation, 1444 AnnVal->getType()); 1445 1446 // Get the annotation string, go through casts. Sema requires this to be a 1447 // non-wide string literal, potentially casted, so the cast<> is safe. 1448 const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts(); 1449 StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString(); 1450 return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc())); 1451 } 1452 case Builtin::BI__builtin_addcb: 1453 case Builtin::BI__builtin_addcs: 1454 case Builtin::BI__builtin_addc: 1455 case Builtin::BI__builtin_addcl: 1456 case Builtin::BI__builtin_addcll: 1457 case Builtin::BI__builtin_subcb: 1458 case Builtin::BI__builtin_subcs: 1459 case Builtin::BI__builtin_subc: 1460 case Builtin::BI__builtin_subcl: 1461 case Builtin::BI__builtin_subcll: { 1462 1463 // We translate all of these builtins from expressions of the form: 1464 // int x = ..., y = ..., carryin = ..., carryout, result; 1465 // result = __builtin_addc(x, y, carryin, &carryout); 1466 // 1467 // to LLVM IR of the form: 1468 // 1469 // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y) 1470 // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0 1471 // %carry1 = extractvalue {i32, i1} %tmp1, 1 1472 // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1, 1473 // i32 %carryin) 1474 // %result = extractvalue {i32, i1} %tmp2, 0 1475 // %carry2 = extractvalue {i32, i1} %tmp2, 1 1476 // %tmp3 = or i1 %carry1, %carry2 1477 // %tmp4 = zext i1 %tmp3 to i32 1478 // store i32 %tmp4, i32* %carryout 1479 1480 // Scalarize our inputs. 1481 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 1482 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 1483 llvm::Value *Carryin = EmitScalarExpr(E->getArg(2)); 1484 std::pair<llvm::Value*, unsigned> CarryOutPtr = 1485 EmitPointerWithAlignment(E->getArg(3)); 1486 1487 // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow. 1488 llvm::Intrinsic::ID IntrinsicId; 1489 switch (BuiltinID) { 1490 default: llvm_unreachable("Unknown multiprecision builtin id."); 1491 case Builtin::BI__builtin_addcb: 1492 case Builtin::BI__builtin_addcs: 1493 case Builtin::BI__builtin_addc: 1494 case Builtin::BI__builtin_addcl: 1495 case Builtin::BI__builtin_addcll: 1496 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 1497 break; 1498 case Builtin::BI__builtin_subcb: 1499 case Builtin::BI__builtin_subcs: 1500 case Builtin::BI__builtin_subc: 1501 case Builtin::BI__builtin_subcl: 1502 case Builtin::BI__builtin_subcll: 1503 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 1504 break; 1505 } 1506 1507 // Construct our resulting LLVM IR expression. 1508 llvm::Value *Carry1; 1509 llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId, 1510 X, Y, Carry1); 1511 llvm::Value *Carry2; 1512 llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId, 1513 Sum1, Carryin, Carry2); 1514 llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2), 1515 X->getType()); 1516 llvm::StoreInst *CarryOutStore = Builder.CreateStore(CarryOut, 1517 CarryOutPtr.first); 1518 CarryOutStore->setAlignment(CarryOutPtr.second); 1519 return RValue::get(Sum2); 1520 } 1521 case Builtin::BI__builtin_uadd_overflow: 1522 case Builtin::BI__builtin_uaddl_overflow: 1523 case Builtin::BI__builtin_uaddll_overflow: 1524 case Builtin::BI__builtin_usub_overflow: 1525 case Builtin::BI__builtin_usubl_overflow: 1526 case Builtin::BI__builtin_usubll_overflow: 1527 case Builtin::BI__builtin_umul_overflow: 1528 case Builtin::BI__builtin_umull_overflow: 1529 case Builtin::BI__builtin_umulll_overflow: 1530 case Builtin::BI__builtin_sadd_overflow: 1531 case Builtin::BI__builtin_saddl_overflow: 1532 case Builtin::BI__builtin_saddll_overflow: 1533 case Builtin::BI__builtin_ssub_overflow: 1534 case Builtin::BI__builtin_ssubl_overflow: 1535 case Builtin::BI__builtin_ssubll_overflow: 1536 case Builtin::BI__builtin_smul_overflow: 1537 case Builtin::BI__builtin_smull_overflow: 1538 case Builtin::BI__builtin_smulll_overflow: { 1539 1540 // We translate all of these builtins directly to the relevant llvm IR node. 1541 1542 // Scalarize our inputs. 1543 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 1544 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 1545 std::pair<llvm::Value *, unsigned> SumOutPtr = 1546 EmitPointerWithAlignment(E->getArg(2)); 1547 1548 // Decide which of the overflow intrinsics we are lowering to: 1549 llvm::Intrinsic::ID IntrinsicId; 1550 switch (BuiltinID) { 1551 default: llvm_unreachable("Unknown security overflow builtin id."); 1552 case Builtin::BI__builtin_uadd_overflow: 1553 case Builtin::BI__builtin_uaddl_overflow: 1554 case Builtin::BI__builtin_uaddll_overflow: 1555 IntrinsicId = llvm::Intrinsic::uadd_with_overflow; 1556 break; 1557 case Builtin::BI__builtin_usub_overflow: 1558 case Builtin::BI__builtin_usubl_overflow: 1559 case Builtin::BI__builtin_usubll_overflow: 1560 IntrinsicId = llvm::Intrinsic::usub_with_overflow; 1561 break; 1562 case Builtin::BI__builtin_umul_overflow: 1563 case Builtin::BI__builtin_umull_overflow: 1564 case Builtin::BI__builtin_umulll_overflow: 1565 IntrinsicId = llvm::Intrinsic::umul_with_overflow; 1566 break; 1567 case Builtin::BI__builtin_sadd_overflow: 1568 case Builtin::BI__builtin_saddl_overflow: 1569 case Builtin::BI__builtin_saddll_overflow: 1570 IntrinsicId = llvm::Intrinsic::sadd_with_overflow; 1571 break; 1572 case Builtin::BI__builtin_ssub_overflow: 1573 case Builtin::BI__builtin_ssubl_overflow: 1574 case Builtin::BI__builtin_ssubll_overflow: 1575 IntrinsicId = llvm::Intrinsic::ssub_with_overflow; 1576 break; 1577 case Builtin::BI__builtin_smul_overflow: 1578 case Builtin::BI__builtin_smull_overflow: 1579 case Builtin::BI__builtin_smulll_overflow: 1580 IntrinsicId = llvm::Intrinsic::smul_with_overflow; 1581 break; 1582 } 1583 1584 1585 llvm::Value *Carry; 1586 llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); 1587 llvm::StoreInst *SumOutStore = Builder.CreateStore(Sum, SumOutPtr.first); 1588 SumOutStore->setAlignment(SumOutPtr.second); 1589 1590 return RValue::get(Carry); 1591 } 1592 case Builtin::BI__builtin_addressof: 1593 return RValue::get(EmitLValue(E->getArg(0)).getAddress()); 1594 case Builtin::BI__builtin_operator_new: 1595 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 1596 E->getArg(0), false); 1597 case Builtin::BI__builtin_operator_delete: 1598 return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(), 1599 E->getArg(0), true); 1600 case Builtin::BI__noop: 1601 // __noop always evaluates to an integer literal zero. 1602 return RValue::get(ConstantInt::get(IntTy, 0)); 1603 case Builtin::BI__builtin_call_with_static_chain: { 1604 const CallExpr *Call = cast<CallExpr>(E->getArg(0)); 1605 const Expr *Chain = E->getArg(1); 1606 return EmitCall(Call->getCallee()->getType(), 1607 EmitScalarExpr(Call->getCallee()), Call, ReturnValue, 1608 Call->getCalleeDecl(), EmitScalarExpr(Chain)); 1609 } 1610 case Builtin::BI_InterlockedExchange: 1611 case Builtin::BI_InterlockedExchangePointer: 1612 return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E); 1613 case Builtin::BI_InterlockedCompareExchangePointer: { 1614 llvm::Type *RTy; 1615 llvm::IntegerType *IntType = 1616 IntegerType::get(getLLVMContext(), 1617 getContext().getTypeSize(E->getType())); 1618 llvm::Type *IntPtrType = IntType->getPointerTo(); 1619 1620 llvm::Value *Destination = 1621 Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType); 1622 1623 llvm::Value *Exchange = EmitScalarExpr(E->getArg(1)); 1624 RTy = Exchange->getType(); 1625 Exchange = Builder.CreatePtrToInt(Exchange, IntType); 1626 1627 llvm::Value *Comparand = 1628 Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType); 1629 1630 auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange, 1631 SequentiallyConsistent, 1632 SequentiallyConsistent); 1633 Result->setVolatile(true); 1634 1635 return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result, 1636 0), 1637 RTy)); 1638 } 1639 case Builtin::BI_InterlockedCompareExchange: { 1640 AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg( 1641 EmitScalarExpr(E->getArg(0)), 1642 EmitScalarExpr(E->getArg(2)), 1643 EmitScalarExpr(E->getArg(1)), 1644 SequentiallyConsistent, 1645 SequentiallyConsistent); 1646 CXI->setVolatile(true); 1647 return RValue::get(Builder.CreateExtractValue(CXI, 0)); 1648 } 1649 case Builtin::BI_InterlockedIncrement: { 1650 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 1651 AtomicRMWInst::Add, 1652 EmitScalarExpr(E->getArg(0)), 1653 ConstantInt::get(Int32Ty, 1), 1654 llvm::SequentiallyConsistent); 1655 RMWI->setVolatile(true); 1656 return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(Int32Ty, 1))); 1657 } 1658 case Builtin::BI_InterlockedDecrement: { 1659 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 1660 AtomicRMWInst::Sub, 1661 EmitScalarExpr(E->getArg(0)), 1662 ConstantInt::get(Int32Ty, 1), 1663 llvm::SequentiallyConsistent); 1664 RMWI->setVolatile(true); 1665 return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(Int32Ty, 1))); 1666 } 1667 case Builtin::BI_InterlockedExchangeAdd: { 1668 AtomicRMWInst *RMWI = Builder.CreateAtomicRMW( 1669 AtomicRMWInst::Add, 1670 EmitScalarExpr(E->getArg(0)), 1671 EmitScalarExpr(E->getArg(1)), 1672 llvm::SequentiallyConsistent); 1673 RMWI->setVolatile(true); 1674 return RValue::get(RMWI); 1675 } 1676 case Builtin::BI__readfsdword: { 1677 Value *IntToPtr = 1678 Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)), 1679 llvm::PointerType::get(CGM.Int32Ty, 257)); 1680 LoadInst *Load = 1681 Builder.CreateAlignedLoad(IntToPtr, /*Align=*/4, /*isVolatile=*/true); 1682 return RValue::get(Load); 1683 } 1684 1685 case Builtin::BI__exception_code: 1686 case Builtin::BI_exception_code: 1687 return RValue::get(EmitSEHExceptionCode()); 1688 case Builtin::BI__exception_info: 1689 case Builtin::BI_exception_info: 1690 return RValue::get(EmitSEHExceptionInfo()); 1691 case Builtin::BI__abnormal_termination: 1692 case Builtin::BI_abnormal_termination: 1693 return RValue::get(EmitSEHAbnormalTermination()); 1694 case Builtin::BI_setjmpex: { 1695 if (getTarget().getTriple().isOSMSVCRT()) { 1696 llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; 1697 llvm::AttributeSet ReturnsTwiceAttr = 1698 AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex, 1699 llvm::Attribute::ReturnsTwice); 1700 llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction( 1701 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), 1702 "_setjmpex", ReturnsTwiceAttr); 1703 llvm::Value *Buf = Builder.CreateBitOrPointerCast( 1704 EmitScalarExpr(E->getArg(0)), Int8PtrTy); 1705 llvm::Value *FrameAddr = 1706 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 1707 ConstantInt::get(Int32Ty, 0)); 1708 llvm::Value *Args[] = {Buf, FrameAddr}; 1709 llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args); 1710 CS.setAttributes(ReturnsTwiceAttr); 1711 return RValue::get(CS.getInstruction()); 1712 } 1713 break; 1714 } 1715 case Builtin::BI_setjmp: { 1716 if (getTarget().getTriple().isOSMSVCRT()) { 1717 llvm::AttributeSet ReturnsTwiceAttr = 1718 AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex, 1719 llvm::Attribute::ReturnsTwice); 1720 llvm::Value *Buf = Builder.CreateBitOrPointerCast( 1721 EmitScalarExpr(E->getArg(0)), Int8PtrTy); 1722 llvm::CallSite CS; 1723 if (getTarget().getTriple().getArch() == llvm::Triple::x86) { 1724 llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy}; 1725 llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction( 1726 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true), 1727 "_setjmp3", ReturnsTwiceAttr); 1728 llvm::Value *Count = ConstantInt::get(IntTy, 0); 1729 llvm::Value *Args[] = {Buf, Count}; 1730 CS = EmitRuntimeCallOrInvoke(SetJmp3, Args); 1731 } else { 1732 llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy}; 1733 llvm::Constant *SetJmp = CGM.CreateRuntimeFunction( 1734 llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false), 1735 "_setjmp", ReturnsTwiceAttr); 1736 llvm::Value *FrameAddr = 1737 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress), 1738 ConstantInt::get(Int32Ty, 0)); 1739 llvm::Value *Args[] = {Buf, FrameAddr}; 1740 CS = EmitRuntimeCallOrInvoke(SetJmp, Args); 1741 } 1742 CS.setAttributes(ReturnsTwiceAttr); 1743 return RValue::get(CS.getInstruction()); 1744 } 1745 break; 1746 } 1747 1748 case Builtin::BI__GetExceptionInfo: { 1749 if (llvm::GlobalVariable *GV = 1750 CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType())) 1751 return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy)); 1752 break; 1753 } 1754 } 1755 1756 // If this is an alias for a lib function (e.g. __builtin_sin), emit 1757 // the call using the normal call path, but using the unmangled 1758 // version of the function name. 1759 if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) 1760 return emitLibraryCall(*this, FD, E, 1761 CGM.getBuiltinLibFunction(FD, BuiltinID)); 1762 1763 // If this is a predefined lib function (e.g. malloc), emit the call 1764 // using exactly the normal call path. 1765 if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) 1766 return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee())); 1767 1768 // See if we have a target specific intrinsic. 1769 const char *Name = getContext().BuiltinInfo.GetName(BuiltinID); 1770 Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; 1771 if (const char *Prefix = 1772 llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch())) { 1773 IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name); 1774 // NOTE we dont need to perform a compatibility flag check here since the 1775 // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the 1776 // MS builtins via ALL_MS_LANGUAGES and are filtered earlier. 1777 if (IntrinsicID == Intrinsic::not_intrinsic) 1778 IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix, Name); 1779 } 1780 1781 if (IntrinsicID != Intrinsic::not_intrinsic) { 1782 SmallVector<Value*, 16> Args; 1783 1784 // Find out if any arguments are required to be integer constant 1785 // expressions. 1786 unsigned ICEArguments = 0; 1787 ASTContext::GetBuiltinTypeError Error; 1788 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 1789 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 1790 1791 Function *F = CGM.getIntrinsic(IntrinsicID); 1792 llvm::FunctionType *FTy = F->getFunctionType(); 1793 1794 for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) { 1795 Value *ArgValue; 1796 // If this is a normal argument, just emit it as a scalar. 1797 if ((ICEArguments & (1 << i)) == 0) { 1798 ArgValue = EmitScalarExpr(E->getArg(i)); 1799 } else { 1800 // If this is required to be a constant, constant fold it so that we 1801 // know that the generated intrinsic gets a ConstantInt. 1802 llvm::APSInt Result; 1803 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext()); 1804 assert(IsConst && "Constant arg isn't actually constant?"); 1805 (void)IsConst; 1806 ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result); 1807 } 1808 1809 // If the intrinsic arg type is different from the builtin arg type 1810 // we need to do a bit cast. 1811 llvm::Type *PTy = FTy->getParamType(i); 1812 if (PTy != ArgValue->getType()) { 1813 assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) && 1814 "Must be able to losslessly bit cast to param"); 1815 ArgValue = Builder.CreateBitCast(ArgValue, PTy); 1816 } 1817 1818 Args.push_back(ArgValue); 1819 } 1820 1821 Value *V = Builder.CreateCall(F, Args); 1822 QualType BuiltinRetType = E->getType(); 1823 1824 llvm::Type *RetTy = VoidTy; 1825 if (!BuiltinRetType->isVoidType()) 1826 RetTy = ConvertType(BuiltinRetType); 1827 1828 if (RetTy != V->getType()) { 1829 assert(V->getType()->canLosslesslyBitCastTo(RetTy) && 1830 "Must be able to losslessly bit cast result type"); 1831 V = Builder.CreateBitCast(V, RetTy); 1832 } 1833 1834 return RValue::get(V); 1835 } 1836 1837 // See if we have a target specific builtin that needs to be lowered. 1838 if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E)) 1839 return RValue::get(V); 1840 1841 ErrorUnsupported(E, "builtin function"); 1842 1843 // Unknown builtin, for now just dump it out and return undef. 1844 return GetUndefRValue(E->getType()); 1845 } 1846 1847 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID, 1848 const CallExpr *E) { 1849 switch (getTarget().getTriple().getArch()) { 1850 case llvm::Triple::arm: 1851 case llvm::Triple::armeb: 1852 case llvm::Triple::thumb: 1853 case llvm::Triple::thumbeb: 1854 return EmitARMBuiltinExpr(BuiltinID, E); 1855 case llvm::Triple::aarch64: 1856 case llvm::Triple::aarch64_be: 1857 return EmitAArch64BuiltinExpr(BuiltinID, E); 1858 case llvm::Triple::x86: 1859 case llvm::Triple::x86_64: 1860 return EmitX86BuiltinExpr(BuiltinID, E); 1861 case llvm::Triple::ppc: 1862 case llvm::Triple::ppc64: 1863 case llvm::Triple::ppc64le: 1864 return EmitPPCBuiltinExpr(BuiltinID, E); 1865 case llvm::Triple::r600: 1866 case llvm::Triple::amdgcn: 1867 return EmitR600BuiltinExpr(BuiltinID, E); 1868 case llvm::Triple::systemz: 1869 return EmitSystemZBuiltinExpr(BuiltinID, E); 1870 default: 1871 return nullptr; 1872 } 1873 } 1874 1875 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF, 1876 NeonTypeFlags TypeFlags, 1877 bool V1Ty=false) { 1878 int IsQuad = TypeFlags.isQuad(); 1879 switch (TypeFlags.getEltType()) { 1880 case NeonTypeFlags::Int8: 1881 case NeonTypeFlags::Poly8: 1882 return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); 1883 case NeonTypeFlags::Int16: 1884 case NeonTypeFlags::Poly16: 1885 case NeonTypeFlags::Float16: 1886 return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad)); 1887 case NeonTypeFlags::Int32: 1888 return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad)); 1889 case NeonTypeFlags::Int64: 1890 case NeonTypeFlags::Poly64: 1891 return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad)); 1892 case NeonTypeFlags::Poly128: 1893 // FIXME: i128 and f128 doesn't get fully support in Clang and llvm. 1894 // There is a lot of i128 and f128 API missing. 1895 // so we use v16i8 to represent poly128 and get pattern matched. 1896 return llvm::VectorType::get(CGF->Int8Ty, 16); 1897 case NeonTypeFlags::Float32: 1898 return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad)); 1899 case NeonTypeFlags::Float64: 1900 return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad)); 1901 } 1902 llvm_unreachable("Unknown vector element type!"); 1903 } 1904 1905 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) { 1906 unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements(); 1907 Value* SV = llvm::ConstantVector::getSplat(nElts, C); 1908 return Builder.CreateShuffleVector(V, V, SV, "lane"); 1909 } 1910 1911 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops, 1912 const char *name, 1913 unsigned shift, bool rightshift) { 1914 unsigned j = 0; 1915 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 1916 ai != ae; ++ai, ++j) 1917 if (shift > 0 && shift == j) 1918 Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift); 1919 else 1920 Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name); 1921 1922 return Builder.CreateCall(F, Ops, name); 1923 } 1924 1925 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty, 1926 bool neg) { 1927 int SV = cast<ConstantInt>(V)->getSExtValue(); 1928 1929 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 1930 llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV); 1931 return llvm::ConstantVector::getSplat(VTy->getNumElements(), C); 1932 } 1933 1934 // \brief Right-shift a vector by a constant. 1935 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift, 1936 llvm::Type *Ty, bool usgn, 1937 const char *name) { 1938 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 1939 1940 int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue(); 1941 int EltSize = VTy->getScalarSizeInBits(); 1942 1943 Vec = Builder.CreateBitCast(Vec, Ty); 1944 1945 // lshr/ashr are undefined when the shift amount is equal to the vector 1946 // element size. 1947 if (ShiftAmt == EltSize) { 1948 if (usgn) { 1949 // Right-shifting an unsigned value by its size yields 0. 1950 llvm::Constant *Zero = ConstantInt::get(VTy->getElementType(), 0); 1951 return llvm::ConstantVector::getSplat(VTy->getNumElements(), Zero); 1952 } else { 1953 // Right-shifting a signed value by its size is equivalent 1954 // to a shift of size-1. 1955 --ShiftAmt; 1956 Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt); 1957 } 1958 } 1959 1960 Shift = EmitNeonShiftVector(Shift, Ty, false); 1961 if (usgn) 1962 return Builder.CreateLShr(Vec, Shift, name); 1963 else 1964 return Builder.CreateAShr(Vec, Shift, name); 1965 } 1966 1967 /// GetPointeeAlignment - Given an expression with a pointer type, find the 1968 /// alignment of the type referenced by the pointer. Skip over implicit 1969 /// casts. 1970 std::pair<llvm::Value*, unsigned> 1971 CodeGenFunction::EmitPointerWithAlignment(const Expr *Addr) { 1972 assert(Addr->getType()->isPointerType()); 1973 Addr = Addr->IgnoreParens(); 1974 if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Addr)) { 1975 if ((ICE->getCastKind() == CK_BitCast || ICE->getCastKind() == CK_NoOp) && 1976 ICE->getSubExpr()->getType()->isPointerType()) { 1977 std::pair<llvm::Value*, unsigned> Ptr = 1978 EmitPointerWithAlignment(ICE->getSubExpr()); 1979 Ptr.first = Builder.CreateBitCast(Ptr.first, 1980 ConvertType(Addr->getType())); 1981 return Ptr; 1982 } else if (ICE->getCastKind() == CK_ArrayToPointerDecay) { 1983 LValue LV = EmitLValue(ICE->getSubExpr()); 1984 unsigned Align = LV.getAlignment().getQuantity(); 1985 if (!Align) { 1986 // FIXME: Once LValues are fixed to always set alignment, 1987 // zap this code. 1988 QualType PtTy = ICE->getSubExpr()->getType(); 1989 if (!PtTy->isIncompleteType()) 1990 Align = getContext().getTypeAlignInChars(PtTy).getQuantity(); 1991 else 1992 Align = 1; 1993 } 1994 return std::make_pair(LV.getAddress(), Align); 1995 } 1996 } 1997 if (const UnaryOperator *UO = dyn_cast<UnaryOperator>(Addr)) { 1998 if (UO->getOpcode() == UO_AddrOf) { 1999 LValue LV = EmitLValue(UO->getSubExpr()); 2000 unsigned Align = LV.getAlignment().getQuantity(); 2001 if (!Align) { 2002 // FIXME: Once LValues are fixed to always set alignment, 2003 // zap this code. 2004 QualType PtTy = UO->getSubExpr()->getType(); 2005 if (!PtTy->isIncompleteType()) 2006 Align = getContext().getTypeAlignInChars(PtTy).getQuantity(); 2007 else 2008 Align = 1; 2009 } 2010 return std::make_pair(LV.getAddress(), Align); 2011 } 2012 } 2013 2014 unsigned Align = 1; 2015 QualType PtTy = Addr->getType()->getPointeeType(); 2016 if (!PtTy->isIncompleteType()) 2017 Align = getContext().getTypeAlignInChars(PtTy).getQuantity(); 2018 2019 return std::make_pair(EmitScalarExpr(Addr), Align); 2020 } 2021 2022 enum { 2023 AddRetType = (1 << 0), 2024 Add1ArgType = (1 << 1), 2025 Add2ArgTypes = (1 << 2), 2026 2027 VectorizeRetType = (1 << 3), 2028 VectorizeArgTypes = (1 << 4), 2029 2030 InventFloatType = (1 << 5), 2031 UnsignedAlts = (1 << 6), 2032 2033 Use64BitVectors = (1 << 7), 2034 Use128BitVectors = (1 << 8), 2035 2036 Vectorize1ArgType = Add1ArgType | VectorizeArgTypes, 2037 VectorRet = AddRetType | VectorizeRetType, 2038 VectorRetGetArgs01 = 2039 AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes, 2040 FpCmpzModifiers = 2041 AddRetType | VectorizeRetType | Add1ArgType | InventFloatType 2042 }; 2043 2044 struct NeonIntrinsicInfo { 2045 unsigned BuiltinID; 2046 unsigned LLVMIntrinsic; 2047 unsigned AltLLVMIntrinsic; 2048 const char *NameHint; 2049 unsigned TypeModifier; 2050 2051 bool operator<(unsigned RHSBuiltinID) const { 2052 return BuiltinID < RHSBuiltinID; 2053 } 2054 }; 2055 2056 #define NEONMAP0(NameBase) \ 2057 { NEON::BI__builtin_neon_ ## NameBase, 0, 0, #NameBase, 0 } 2058 2059 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ 2060 { NEON:: BI__builtin_neon_ ## NameBase, \ 2061 Intrinsic::LLVMIntrinsic, 0, #NameBase, TypeModifier } 2062 2063 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \ 2064 { NEON:: BI__builtin_neon_ ## NameBase, \ 2065 Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \ 2066 #NameBase, TypeModifier } 2067 2068 static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = { 2069 NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 2070 NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts), 2071 NEONMAP1(vabs_v, arm_neon_vabs, 0), 2072 NEONMAP1(vabsq_v, arm_neon_vabs, 0), 2073 NEONMAP0(vaddhn_v), 2074 NEONMAP1(vaesdq_v, arm_neon_aesd, 0), 2075 NEONMAP1(vaeseq_v, arm_neon_aese, 0), 2076 NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0), 2077 NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0), 2078 NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType), 2079 NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType), 2080 NEONMAP1(vcage_v, arm_neon_vacge, 0), 2081 NEONMAP1(vcageq_v, arm_neon_vacge, 0), 2082 NEONMAP1(vcagt_v, arm_neon_vacgt, 0), 2083 NEONMAP1(vcagtq_v, arm_neon_vacgt, 0), 2084 NEONMAP1(vcale_v, arm_neon_vacge, 0), 2085 NEONMAP1(vcaleq_v, arm_neon_vacge, 0), 2086 NEONMAP1(vcalt_v, arm_neon_vacgt, 0), 2087 NEONMAP1(vcaltq_v, arm_neon_vacgt, 0), 2088 NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType), 2089 NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType), 2090 NEONMAP1(vclz_v, ctlz, Add1ArgType), 2091 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 2092 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 2093 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 2094 NEONMAP1(vcvt_f16_v, arm_neon_vcvtfp2hf, 0), 2095 NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0), 2096 NEONMAP0(vcvt_f32_v), 2097 NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 2098 NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0), 2099 NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0), 2100 NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0), 2101 NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0), 2102 NEONMAP0(vcvt_s32_v), 2103 NEONMAP0(vcvt_s64_v), 2104 NEONMAP0(vcvt_u32_v), 2105 NEONMAP0(vcvt_u64_v), 2106 NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0), 2107 NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0), 2108 NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0), 2109 NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0), 2110 NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0), 2111 NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0), 2112 NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0), 2113 NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0), 2114 NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0), 2115 NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0), 2116 NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0), 2117 NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0), 2118 NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0), 2119 NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0), 2120 NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0), 2121 NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0), 2122 NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0), 2123 NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0), 2124 NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0), 2125 NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0), 2126 NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0), 2127 NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0), 2128 NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0), 2129 NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0), 2130 NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0), 2131 NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0), 2132 NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0), 2133 NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0), 2134 NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0), 2135 NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0), 2136 NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0), 2137 NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0), 2138 NEONMAP0(vcvtq_f32_v), 2139 NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0), 2140 NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0), 2141 NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0), 2142 NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0), 2143 NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0), 2144 NEONMAP0(vcvtq_s32_v), 2145 NEONMAP0(vcvtq_s64_v), 2146 NEONMAP0(vcvtq_u32_v), 2147 NEONMAP0(vcvtq_u64_v), 2148 NEONMAP0(vext_v), 2149 NEONMAP0(vextq_v), 2150 NEONMAP0(vfma_v), 2151 NEONMAP0(vfmaq_v), 2152 NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 2153 NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts), 2154 NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 2155 NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts), 2156 NEONMAP0(vld1_dup_v), 2157 NEONMAP1(vld1_v, arm_neon_vld1, 0), 2158 NEONMAP0(vld1q_dup_v), 2159 NEONMAP1(vld1q_v, arm_neon_vld1, 0), 2160 NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0), 2161 NEONMAP1(vld2_v, arm_neon_vld2, 0), 2162 NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0), 2163 NEONMAP1(vld2q_v, arm_neon_vld2, 0), 2164 NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0), 2165 NEONMAP1(vld3_v, arm_neon_vld3, 0), 2166 NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0), 2167 NEONMAP1(vld3q_v, arm_neon_vld3, 0), 2168 NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0), 2169 NEONMAP1(vld4_v, arm_neon_vld4, 0), 2170 NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0), 2171 NEONMAP1(vld4q_v, arm_neon_vld4, 0), 2172 NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 2173 NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType), 2174 NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType), 2175 NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts), 2176 NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 2177 NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType), 2178 NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType), 2179 NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts), 2180 NEONMAP0(vmovl_v), 2181 NEONMAP0(vmovn_v), 2182 NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType), 2183 NEONMAP0(vmull_v), 2184 NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType), 2185 NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 2186 NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts), 2187 NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType), 2188 NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 2189 NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts), 2190 NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType), 2191 NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts), 2192 NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts), 2193 NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType), 2194 NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType), 2195 NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 2196 NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts), 2197 NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0), 2198 NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0), 2199 NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType), 2200 NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType), 2201 NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType), 2202 NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts), 2203 NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType), 2204 NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType), 2205 NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType), 2206 NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType), 2207 NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType), 2208 NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 2209 NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts), 2210 NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 2211 NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 2212 NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts), 2213 NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts), 2214 NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0), 2215 NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0), 2216 NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 2217 NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts), 2218 NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType), 2219 NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 2220 NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0), 2221 NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType), 2222 NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType), 2223 NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 2224 NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts), 2225 NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType), 2226 NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType), 2227 NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType), 2228 NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType), 2229 NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType), 2230 NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType), 2231 NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType), 2232 NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType), 2233 NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType), 2234 NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType), 2235 NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType), 2236 NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType), 2237 NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 2238 NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts), 2239 NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 2240 NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts), 2241 NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 2242 NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0), 2243 NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType), 2244 NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType), 2245 NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType), 2246 NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0), 2247 NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0), 2248 NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0), 2249 NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0), 2250 NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0), 2251 NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0), 2252 NEONMAP0(vshl_n_v), 2253 NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 2254 NEONMAP0(vshll_n_v), 2255 NEONMAP0(vshlq_n_v), 2256 NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts), 2257 NEONMAP0(vshr_n_v), 2258 NEONMAP0(vshrn_n_v), 2259 NEONMAP0(vshrq_n_v), 2260 NEONMAP1(vst1_v, arm_neon_vst1, 0), 2261 NEONMAP1(vst1q_v, arm_neon_vst1, 0), 2262 NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0), 2263 NEONMAP1(vst2_v, arm_neon_vst2, 0), 2264 NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0), 2265 NEONMAP1(vst2q_v, arm_neon_vst2, 0), 2266 NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0), 2267 NEONMAP1(vst3_v, arm_neon_vst3, 0), 2268 NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0), 2269 NEONMAP1(vst3q_v, arm_neon_vst3, 0), 2270 NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0), 2271 NEONMAP1(vst4_v, arm_neon_vst4, 0), 2272 NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0), 2273 NEONMAP1(vst4q_v, arm_neon_vst4, 0), 2274 NEONMAP0(vsubhn_v), 2275 NEONMAP0(vtrn_v), 2276 NEONMAP0(vtrnq_v), 2277 NEONMAP0(vtst_v), 2278 NEONMAP0(vtstq_v), 2279 NEONMAP0(vuzp_v), 2280 NEONMAP0(vuzpq_v), 2281 NEONMAP0(vzip_v), 2282 NEONMAP0(vzipq_v) 2283 }; 2284 2285 static NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = { 2286 NEONMAP1(vabs_v, aarch64_neon_abs, 0), 2287 NEONMAP1(vabsq_v, aarch64_neon_abs, 0), 2288 NEONMAP0(vaddhn_v), 2289 NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0), 2290 NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), 2291 NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), 2292 NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0), 2293 NEONMAP1(vcage_v, aarch64_neon_facge, 0), 2294 NEONMAP1(vcageq_v, aarch64_neon_facge, 0), 2295 NEONMAP1(vcagt_v, aarch64_neon_facgt, 0), 2296 NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0), 2297 NEONMAP1(vcale_v, aarch64_neon_facge, 0), 2298 NEONMAP1(vcaleq_v, aarch64_neon_facge, 0), 2299 NEONMAP1(vcalt_v, aarch64_neon_facgt, 0), 2300 NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0), 2301 NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType), 2302 NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType), 2303 NEONMAP1(vclz_v, ctlz, Add1ArgType), 2304 NEONMAP1(vclzq_v, ctlz, Add1ArgType), 2305 NEONMAP1(vcnt_v, ctpop, Add1ArgType), 2306 NEONMAP1(vcntq_v, ctpop, Add1ArgType), 2307 NEONMAP1(vcvt_f16_v, aarch64_neon_vcvtfp2hf, 0), 2308 NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0), 2309 NEONMAP0(vcvt_f32_v), 2310 NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 2311 NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 2312 NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 2313 NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 2314 NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 2315 NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 2316 NEONMAP0(vcvtq_f32_v), 2317 NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 2318 NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0), 2319 NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0), 2320 NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0), 2321 NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0), 2322 NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0), 2323 NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), 2324 NEONMAP0(vext_v), 2325 NEONMAP0(vextq_v), 2326 NEONMAP0(vfma_v), 2327 NEONMAP0(vfmaq_v), 2328 NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 2329 NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts), 2330 NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 2331 NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts), 2332 NEONMAP0(vmovl_v), 2333 NEONMAP0(vmovn_v), 2334 NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType), 2335 NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType), 2336 NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType), 2337 NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 2338 NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts), 2339 NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType), 2340 NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType), 2341 NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType), 2342 NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 2343 NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts), 2344 NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0), 2345 NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0), 2346 NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType), 2347 NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType), 2348 NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType), 2349 NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts), 2350 NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType), 2351 NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType), 2352 NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType), 2353 NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType), 2354 NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType), 2355 NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 2356 NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts), 2357 NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts), 2358 NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 2359 NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts), 2360 NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts), 2361 NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0), 2362 NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0), 2363 NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 2364 NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), 2365 NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType), 2366 NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 2367 NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), 2368 NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType), 2369 NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType), 2370 NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 2371 NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts), 2372 NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 2373 NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts), 2374 NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 2375 NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts), 2376 NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 2377 NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0), 2378 NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType), 2379 NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType), 2380 NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType), 2381 NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0), 2382 NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0), 2383 NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0), 2384 NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0), 2385 NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0), 2386 NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0), 2387 NEONMAP0(vshl_n_v), 2388 NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 2389 NEONMAP0(vshll_n_v), 2390 NEONMAP0(vshlq_n_v), 2391 NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), 2392 NEONMAP0(vshr_n_v), 2393 NEONMAP0(vshrn_n_v), 2394 NEONMAP0(vshrq_n_v), 2395 NEONMAP0(vsubhn_v), 2396 NEONMAP0(vtst_v), 2397 NEONMAP0(vtstq_v), 2398 }; 2399 2400 static NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = { 2401 NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType), 2402 NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType), 2403 NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType), 2404 NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 2405 NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 2406 NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType), 2407 NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType), 2408 NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 2409 NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 2410 NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 2411 NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType), 2412 NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType), 2413 NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType), 2414 NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType), 2415 NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType), 2416 NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 2417 NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 2418 NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 2419 NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 2420 NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 2421 NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType), 2422 NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType), 2423 NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType), 2424 NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType), 2425 NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 2426 NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 2427 NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType), 2428 NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType), 2429 NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 2430 NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 2431 NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 2432 NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 2433 NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 2434 NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 2435 NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType), 2436 NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), 2437 NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 2438 NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 2439 NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType), 2440 NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType), 2441 NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 2442 NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 2443 NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType), 2444 NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType), 2445 NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType), 2446 NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), 2447 NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), 2448 NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), 2449 NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0), 2450 NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 2451 NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 2452 NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 2453 NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 2454 NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 2455 NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 2456 NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 2457 NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 2458 NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType), 2459 NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType), 2460 NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 2461 NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 2462 NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 2463 NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 2464 NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 2465 NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 2466 NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 2467 NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 2468 NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType), 2469 NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType), 2470 NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0), 2471 NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType), 2472 NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType), 2473 NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 2474 NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType), 2475 NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 2476 NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), 2477 NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 2478 NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType), 2479 NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 2480 NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType), 2481 NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType), 2482 NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType), 2483 NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 2484 NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType), 2485 NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors), 2486 NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType), 2487 NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 2488 NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 2489 NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType), 2490 NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType), 2491 NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors), 2492 NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors), 2493 NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType), 2494 NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType), 2495 NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors), 2496 NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType), 2497 NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors), 2498 NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0), 2499 NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType), 2500 NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType), 2501 NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 2502 NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 2503 NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors), 2504 NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors), 2505 NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType), 2506 NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 2507 NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors), 2508 NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 2509 NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType), 2510 NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors), 2511 NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType), 2512 NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors), 2513 NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType), 2514 NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 2515 NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 2516 NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType), 2517 NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType), 2518 NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors), 2519 NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors), 2520 NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType), 2521 NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType), 2522 NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType), 2523 NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType), 2524 NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 2525 NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 2526 NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors), 2527 NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors), 2528 NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType), 2529 NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 2530 NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors), 2531 NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 2532 NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 2533 NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 2534 NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 2535 NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType), 2536 NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType), 2537 NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 2538 NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 2539 NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors), 2540 NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors), 2541 NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType), 2542 NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType), 2543 NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType), 2544 NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType), 2545 NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 2546 NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors), 2547 NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType), 2548 NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType), 2549 NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType), 2550 NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 2551 NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 2552 NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors), 2553 NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors), 2554 NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType), 2555 NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 2556 NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors), 2557 NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 2558 NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 2559 NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType), 2560 NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType), 2561 NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors), 2562 NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors), 2563 NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType), 2564 NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType), 2565 NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType), 2566 NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType), 2567 NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType), 2568 NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType), 2569 NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType), 2570 NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType), 2571 NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType), 2572 NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType), 2573 NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType), 2574 NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType), 2575 NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0), 2576 NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0), 2577 NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0), 2578 NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0), 2579 NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType), 2580 NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType), 2581 NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType), 2582 NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType), 2583 NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 2584 NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType), 2585 NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors), 2586 NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType), 2587 NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType), 2588 NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType), 2589 NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 2590 NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType), 2591 NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors), 2592 NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType), 2593 }; 2594 2595 #undef NEONMAP0 2596 #undef NEONMAP1 2597 #undef NEONMAP2 2598 2599 static bool NEONSIMDIntrinsicsProvenSorted = false; 2600 2601 static bool AArch64SIMDIntrinsicsProvenSorted = false; 2602 static bool AArch64SISDIntrinsicsProvenSorted = false; 2603 2604 2605 static const NeonIntrinsicInfo * 2606 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap, 2607 unsigned BuiltinID, bool &MapProvenSorted) { 2608 2609 #ifndef NDEBUG 2610 if (!MapProvenSorted) { 2611 // FIXME: use std::is_sorted once C++11 is allowed 2612 for (unsigned i = 0; i < IntrinsicMap.size() - 1; ++i) 2613 assert(IntrinsicMap[i].BuiltinID <= IntrinsicMap[i + 1].BuiltinID); 2614 MapProvenSorted = true; 2615 } 2616 #endif 2617 2618 const NeonIntrinsicInfo *Builtin = 2619 std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID); 2620 2621 if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID) 2622 return Builtin; 2623 2624 return nullptr; 2625 } 2626 2627 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID, 2628 unsigned Modifier, 2629 llvm::Type *ArgType, 2630 const CallExpr *E) { 2631 int VectorSize = 0; 2632 if (Modifier & Use64BitVectors) 2633 VectorSize = 64; 2634 else if (Modifier & Use128BitVectors) 2635 VectorSize = 128; 2636 2637 // Return type. 2638 SmallVector<llvm::Type *, 3> Tys; 2639 if (Modifier & AddRetType) { 2640 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 2641 if (Modifier & VectorizeRetType) 2642 Ty = llvm::VectorType::get( 2643 Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1); 2644 2645 Tys.push_back(Ty); 2646 } 2647 2648 // Arguments. 2649 if (Modifier & VectorizeArgTypes) { 2650 int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1; 2651 ArgType = llvm::VectorType::get(ArgType, Elts); 2652 } 2653 2654 if (Modifier & (Add1ArgType | Add2ArgTypes)) 2655 Tys.push_back(ArgType); 2656 2657 if (Modifier & Add2ArgTypes) 2658 Tys.push_back(ArgType); 2659 2660 if (Modifier & InventFloatType) 2661 Tys.push_back(FloatTy); 2662 2663 return CGM.getIntrinsic(IntrinsicID, Tys); 2664 } 2665 2666 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, 2667 const NeonIntrinsicInfo &SISDInfo, 2668 SmallVectorImpl<Value *> &Ops, 2669 const CallExpr *E) { 2670 unsigned BuiltinID = SISDInfo.BuiltinID; 2671 unsigned int Int = SISDInfo.LLVMIntrinsic; 2672 unsigned Modifier = SISDInfo.TypeModifier; 2673 const char *s = SISDInfo.NameHint; 2674 2675 switch (BuiltinID) { 2676 case NEON::BI__builtin_neon_vcled_s64: 2677 case NEON::BI__builtin_neon_vcled_u64: 2678 case NEON::BI__builtin_neon_vcles_f32: 2679 case NEON::BI__builtin_neon_vcled_f64: 2680 case NEON::BI__builtin_neon_vcltd_s64: 2681 case NEON::BI__builtin_neon_vcltd_u64: 2682 case NEON::BI__builtin_neon_vclts_f32: 2683 case NEON::BI__builtin_neon_vcltd_f64: 2684 case NEON::BI__builtin_neon_vcales_f32: 2685 case NEON::BI__builtin_neon_vcaled_f64: 2686 case NEON::BI__builtin_neon_vcalts_f32: 2687 case NEON::BI__builtin_neon_vcaltd_f64: 2688 // Only one direction of comparisons actually exist, cmle is actually a cmge 2689 // with swapped operands. The table gives us the right intrinsic but we 2690 // still need to do the swap. 2691 std::swap(Ops[0], Ops[1]); 2692 break; 2693 } 2694 2695 assert(Int && "Generic code assumes a valid intrinsic"); 2696 2697 // Determine the type(s) of this overloaded AArch64 intrinsic. 2698 const Expr *Arg = E->getArg(0); 2699 llvm::Type *ArgTy = CGF.ConvertType(Arg->getType()); 2700 Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E); 2701 2702 int j = 0; 2703 ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0); 2704 for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); 2705 ai != ae; ++ai, ++j) { 2706 llvm::Type *ArgTy = ai->getType(); 2707 if (Ops[j]->getType()->getPrimitiveSizeInBits() == 2708 ArgTy->getPrimitiveSizeInBits()) 2709 continue; 2710 2711 assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy()); 2712 // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate 2713 // it before inserting. 2714 Ops[j] = 2715 CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType()); 2716 Ops[j] = 2717 CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0); 2718 } 2719 2720 Value *Result = CGF.EmitNeonCall(F, Ops, s); 2721 llvm::Type *ResultType = CGF.ConvertType(E->getType()); 2722 if (ResultType->getPrimitiveSizeInBits() < 2723 Result->getType()->getPrimitiveSizeInBits()) 2724 return CGF.Builder.CreateExtractElement(Result, C0); 2725 2726 return CGF.Builder.CreateBitCast(Result, ResultType, s); 2727 } 2728 2729 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr( 2730 unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, 2731 const char *NameHint, unsigned Modifier, const CallExpr *E, 2732 SmallVectorImpl<llvm::Value *> &Ops, llvm::Value *Align) { 2733 // Get the last argument, which specifies the vector type. 2734 llvm::APSInt NeonTypeConst; 2735 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 2736 if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext())) 2737 return nullptr; 2738 2739 // Determine the type of this overloaded NEON intrinsic. 2740 NeonTypeFlags Type(NeonTypeConst.getZExtValue()); 2741 bool Usgn = Type.isUnsigned(); 2742 bool Quad = Type.isQuad(); 2743 2744 llvm::VectorType *VTy = GetNeonType(this, Type); 2745 llvm::Type *Ty = VTy; 2746 if (!Ty) 2747 return nullptr; 2748 2749 unsigned Int = LLVMIntrinsic; 2750 if ((Modifier & UnsignedAlts) && !Usgn) 2751 Int = AltLLVMIntrinsic; 2752 2753 switch (BuiltinID) { 2754 default: break; 2755 case NEON::BI__builtin_neon_vabs_v: 2756 case NEON::BI__builtin_neon_vabsq_v: 2757 if (VTy->getElementType()->isFloatingPointTy()) 2758 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs"); 2759 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs"); 2760 case NEON::BI__builtin_neon_vaddhn_v: { 2761 llvm::VectorType *SrcTy = 2762 llvm::VectorType::getExtendedElementVectorType(VTy); 2763 2764 // %sum = add <4 x i32> %lhs, %rhs 2765 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 2766 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 2767 Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn"); 2768 2769 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 2770 Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(), 2771 SrcTy->getScalarSizeInBits() / 2); 2772 ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt); 2773 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn"); 2774 2775 // %res = trunc <4 x i32> %high to <4 x i16> 2776 return Builder.CreateTrunc(Ops[0], VTy, "vaddhn"); 2777 } 2778 case NEON::BI__builtin_neon_vcale_v: 2779 case NEON::BI__builtin_neon_vcaleq_v: 2780 case NEON::BI__builtin_neon_vcalt_v: 2781 case NEON::BI__builtin_neon_vcaltq_v: 2782 std::swap(Ops[0], Ops[1]); 2783 case NEON::BI__builtin_neon_vcage_v: 2784 case NEON::BI__builtin_neon_vcageq_v: 2785 case NEON::BI__builtin_neon_vcagt_v: 2786 case NEON::BI__builtin_neon_vcagtq_v: { 2787 llvm::Type *VecFlt = llvm::VectorType::get( 2788 VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy, 2789 VTy->getNumElements()); 2790 llvm::Type *Tys[] = { VTy, VecFlt }; 2791 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 2792 return EmitNeonCall(F, Ops, NameHint); 2793 } 2794 case NEON::BI__builtin_neon_vclz_v: 2795 case NEON::BI__builtin_neon_vclzq_v: 2796 // We generate target-independent intrinsic, which needs a second argument 2797 // for whether or not clz of zero is undefined; on ARM it isn't. 2798 Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef())); 2799 break; 2800 case NEON::BI__builtin_neon_vcvt_f32_v: 2801 case NEON::BI__builtin_neon_vcvtq_f32_v: 2802 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2803 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad)); 2804 return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 2805 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 2806 case NEON::BI__builtin_neon_vcvt_n_f32_v: 2807 case NEON::BI__builtin_neon_vcvt_n_f64_v: 2808 case NEON::BI__builtin_neon_vcvtq_n_f32_v: 2809 case NEON::BI__builtin_neon_vcvtq_n_f64_v: { 2810 bool Double = 2811 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 2812 llvm::Type *FloatTy = 2813 GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64 2814 : NeonTypeFlags::Float32, 2815 false, Quad)); 2816 llvm::Type *Tys[2] = { FloatTy, Ty }; 2817 Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic; 2818 Function *F = CGM.getIntrinsic(Int, Tys); 2819 return EmitNeonCall(F, Ops, "vcvt_n"); 2820 } 2821 case NEON::BI__builtin_neon_vcvt_n_s32_v: 2822 case NEON::BI__builtin_neon_vcvt_n_u32_v: 2823 case NEON::BI__builtin_neon_vcvt_n_s64_v: 2824 case NEON::BI__builtin_neon_vcvt_n_u64_v: 2825 case NEON::BI__builtin_neon_vcvtq_n_s32_v: 2826 case NEON::BI__builtin_neon_vcvtq_n_u32_v: 2827 case NEON::BI__builtin_neon_vcvtq_n_s64_v: 2828 case NEON::BI__builtin_neon_vcvtq_n_u64_v: { 2829 bool Double = 2830 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 2831 llvm::Type *FloatTy = 2832 GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64 2833 : NeonTypeFlags::Float32, 2834 false, Quad)); 2835 llvm::Type *Tys[2] = { Ty, FloatTy }; 2836 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys); 2837 return EmitNeonCall(F, Ops, "vcvt_n"); 2838 } 2839 case NEON::BI__builtin_neon_vcvt_s32_v: 2840 case NEON::BI__builtin_neon_vcvt_u32_v: 2841 case NEON::BI__builtin_neon_vcvt_s64_v: 2842 case NEON::BI__builtin_neon_vcvt_u64_v: 2843 case NEON::BI__builtin_neon_vcvtq_s32_v: 2844 case NEON::BI__builtin_neon_vcvtq_u32_v: 2845 case NEON::BI__builtin_neon_vcvtq_s64_v: 2846 case NEON::BI__builtin_neon_vcvtq_u64_v: { 2847 bool Double = 2848 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 2849 llvm::Type *FloatTy = 2850 GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64 2851 : NeonTypeFlags::Float32, 2852 false, Quad)); 2853 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); 2854 return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 2855 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt"); 2856 } 2857 case NEON::BI__builtin_neon_vcvta_s32_v: 2858 case NEON::BI__builtin_neon_vcvta_s64_v: 2859 case NEON::BI__builtin_neon_vcvta_u32_v: 2860 case NEON::BI__builtin_neon_vcvta_u64_v: 2861 case NEON::BI__builtin_neon_vcvtaq_s32_v: 2862 case NEON::BI__builtin_neon_vcvtaq_s64_v: 2863 case NEON::BI__builtin_neon_vcvtaq_u32_v: 2864 case NEON::BI__builtin_neon_vcvtaq_u64_v: 2865 case NEON::BI__builtin_neon_vcvtn_s32_v: 2866 case NEON::BI__builtin_neon_vcvtn_s64_v: 2867 case NEON::BI__builtin_neon_vcvtn_u32_v: 2868 case NEON::BI__builtin_neon_vcvtn_u64_v: 2869 case NEON::BI__builtin_neon_vcvtnq_s32_v: 2870 case NEON::BI__builtin_neon_vcvtnq_s64_v: 2871 case NEON::BI__builtin_neon_vcvtnq_u32_v: 2872 case NEON::BI__builtin_neon_vcvtnq_u64_v: 2873 case NEON::BI__builtin_neon_vcvtp_s32_v: 2874 case NEON::BI__builtin_neon_vcvtp_s64_v: 2875 case NEON::BI__builtin_neon_vcvtp_u32_v: 2876 case NEON::BI__builtin_neon_vcvtp_u64_v: 2877 case NEON::BI__builtin_neon_vcvtpq_s32_v: 2878 case NEON::BI__builtin_neon_vcvtpq_s64_v: 2879 case NEON::BI__builtin_neon_vcvtpq_u32_v: 2880 case NEON::BI__builtin_neon_vcvtpq_u64_v: 2881 case NEON::BI__builtin_neon_vcvtm_s32_v: 2882 case NEON::BI__builtin_neon_vcvtm_s64_v: 2883 case NEON::BI__builtin_neon_vcvtm_u32_v: 2884 case NEON::BI__builtin_neon_vcvtm_u64_v: 2885 case NEON::BI__builtin_neon_vcvtmq_s32_v: 2886 case NEON::BI__builtin_neon_vcvtmq_s64_v: 2887 case NEON::BI__builtin_neon_vcvtmq_u32_v: 2888 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 2889 bool Double = 2890 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 2891 llvm::Type *InTy = 2892 GetNeonType(this, 2893 NeonTypeFlags(Double ? NeonTypeFlags::Float64 2894 : NeonTypeFlags::Float32, false, Quad)); 2895 llvm::Type *Tys[2] = { Ty, InTy }; 2896 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint); 2897 } 2898 case NEON::BI__builtin_neon_vext_v: 2899 case NEON::BI__builtin_neon_vextq_v: { 2900 int CV = cast<ConstantInt>(Ops[2])->getSExtValue(); 2901 SmallVector<Constant*, 16> Indices; 2902 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 2903 Indices.push_back(ConstantInt::get(Int32Ty, i+CV)); 2904 2905 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2906 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 2907 Value *SV = llvm::ConstantVector::get(Indices); 2908 return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vext"); 2909 } 2910 case NEON::BI__builtin_neon_vfma_v: 2911 case NEON::BI__builtin_neon_vfmaq_v: { 2912 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 2913 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2914 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 2915 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 2916 2917 // NEON intrinsic puts accumulator first, unlike the LLVM fma. 2918 return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); 2919 } 2920 case NEON::BI__builtin_neon_vld1_v: 2921 case NEON::BI__builtin_neon_vld1q_v: 2922 Ops.push_back(Align); 2923 return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vld1"); 2924 case NEON::BI__builtin_neon_vld2_v: 2925 case NEON::BI__builtin_neon_vld2q_v: 2926 case NEON::BI__builtin_neon_vld3_v: 2927 case NEON::BI__builtin_neon_vld3q_v: 2928 case NEON::BI__builtin_neon_vld4_v: 2929 case NEON::BI__builtin_neon_vld4q_v: { 2930 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty); 2931 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, NameHint); 2932 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 2933 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2934 return Builder.CreateStore(Ops[1], Ops[0]); 2935 } 2936 case NEON::BI__builtin_neon_vld1_dup_v: 2937 case NEON::BI__builtin_neon_vld1q_dup_v: { 2938 Value *V = UndefValue::get(Ty); 2939 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 2940 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2941 LoadInst *Ld = Builder.CreateLoad(Ops[0]); 2942 Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); 2943 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 2944 Ops[0] = Builder.CreateInsertElement(V, Ld, CI); 2945 return EmitNeonSplat(Ops[0], CI); 2946 } 2947 case NEON::BI__builtin_neon_vld2_lane_v: 2948 case NEON::BI__builtin_neon_vld2q_lane_v: 2949 case NEON::BI__builtin_neon_vld3_lane_v: 2950 case NEON::BI__builtin_neon_vld3q_lane_v: 2951 case NEON::BI__builtin_neon_vld4_lane_v: 2952 case NEON::BI__builtin_neon_vld4q_lane_v: { 2953 Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty); 2954 for (unsigned I = 2; I < Ops.size() - 1; ++I) 2955 Ops[I] = Builder.CreateBitCast(Ops[I], Ty); 2956 Ops.push_back(Align); 2957 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint); 2958 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 2959 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 2960 return Builder.CreateStore(Ops[1], Ops[0]); 2961 } 2962 case NEON::BI__builtin_neon_vmovl_v: { 2963 llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy); 2964 Ops[0] = Builder.CreateBitCast(Ops[0], DTy); 2965 if (Usgn) 2966 return Builder.CreateZExt(Ops[0], Ty, "vmovl"); 2967 return Builder.CreateSExt(Ops[0], Ty, "vmovl"); 2968 } 2969 case NEON::BI__builtin_neon_vmovn_v: { 2970 llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy); 2971 Ops[0] = Builder.CreateBitCast(Ops[0], QTy); 2972 return Builder.CreateTrunc(Ops[0], Ty, "vmovn"); 2973 } 2974 case NEON::BI__builtin_neon_vmull_v: 2975 // FIXME: the integer vmull operations could be emitted in terms of pure 2976 // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of 2977 // hoisting the exts outside loops. Until global ISel comes along that can 2978 // see through such movement this leads to bad CodeGen. So we need an 2979 // intrinsic for now. 2980 Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls; 2981 Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int; 2982 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 2983 case NEON::BI__builtin_neon_vpadal_v: 2984 case NEON::BI__builtin_neon_vpadalq_v: { 2985 // The source operand type has twice as many elements of half the size. 2986 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 2987 llvm::Type *EltTy = 2988 llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 2989 llvm::Type *NarrowTy = 2990 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 2991 llvm::Type *Tys[2] = { Ty, NarrowTy }; 2992 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint); 2993 } 2994 case NEON::BI__builtin_neon_vpaddl_v: 2995 case NEON::BI__builtin_neon_vpaddlq_v: { 2996 // The source operand type has twice as many elements of half the size. 2997 unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits(); 2998 llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2); 2999 llvm::Type *NarrowTy = 3000 llvm::VectorType::get(EltTy, VTy->getNumElements() * 2); 3001 llvm::Type *Tys[2] = { Ty, NarrowTy }; 3002 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl"); 3003 } 3004 case NEON::BI__builtin_neon_vqdmlal_v: 3005 case NEON::BI__builtin_neon_vqdmlsl_v: { 3006 SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end()); 3007 Value *Mul = EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), 3008 MulOps, "vqdmlal"); 3009 3010 SmallVector<Value *, 2> AccumOps; 3011 AccumOps.push_back(Ops[0]); 3012 AccumOps.push_back(Mul); 3013 return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), 3014 AccumOps, NameHint); 3015 } 3016 case NEON::BI__builtin_neon_vqshl_n_v: 3017 case NEON::BI__builtin_neon_vqshlq_n_v: 3018 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n", 3019 1, false); 3020 case NEON::BI__builtin_neon_vqshlu_n_v: 3021 case NEON::BI__builtin_neon_vqshluq_n_v: 3022 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n", 3023 1, false); 3024 case NEON::BI__builtin_neon_vrecpe_v: 3025 case NEON::BI__builtin_neon_vrecpeq_v: 3026 case NEON::BI__builtin_neon_vrsqrte_v: 3027 case NEON::BI__builtin_neon_vrsqrteq_v: 3028 Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic; 3029 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint); 3030 3031 case NEON::BI__builtin_neon_vrshr_n_v: 3032 case NEON::BI__builtin_neon_vrshrq_n_v: 3033 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 3034 1, true); 3035 case NEON::BI__builtin_neon_vshl_n_v: 3036 case NEON::BI__builtin_neon_vshlq_n_v: 3037 Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); 3038 return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1], 3039 "vshl_n"); 3040 case NEON::BI__builtin_neon_vshll_n_v: { 3041 llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy); 3042 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3043 if (Usgn) 3044 Ops[0] = Builder.CreateZExt(Ops[0], VTy); 3045 else 3046 Ops[0] = Builder.CreateSExt(Ops[0], VTy); 3047 Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false); 3048 return Builder.CreateShl(Ops[0], Ops[1], "vshll_n"); 3049 } 3050 case NEON::BI__builtin_neon_vshrn_n_v: { 3051 llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy); 3052 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3053 Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false); 3054 if (Usgn) 3055 Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]); 3056 else 3057 Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]); 3058 return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n"); 3059 } 3060 case NEON::BI__builtin_neon_vshr_n_v: 3061 case NEON::BI__builtin_neon_vshrq_n_v: 3062 return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n"); 3063 case NEON::BI__builtin_neon_vst1_v: 3064 case NEON::BI__builtin_neon_vst1q_v: 3065 case NEON::BI__builtin_neon_vst2_v: 3066 case NEON::BI__builtin_neon_vst2q_v: 3067 case NEON::BI__builtin_neon_vst3_v: 3068 case NEON::BI__builtin_neon_vst3q_v: 3069 case NEON::BI__builtin_neon_vst4_v: 3070 case NEON::BI__builtin_neon_vst4q_v: 3071 case NEON::BI__builtin_neon_vst2_lane_v: 3072 case NEON::BI__builtin_neon_vst2q_lane_v: 3073 case NEON::BI__builtin_neon_vst3_lane_v: 3074 case NEON::BI__builtin_neon_vst3q_lane_v: 3075 case NEON::BI__builtin_neon_vst4_lane_v: 3076 case NEON::BI__builtin_neon_vst4q_lane_v: 3077 Ops.push_back(Align); 3078 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, ""); 3079 case NEON::BI__builtin_neon_vsubhn_v: { 3080 llvm::VectorType *SrcTy = 3081 llvm::VectorType::getExtendedElementVectorType(VTy); 3082 3083 // %sum = add <4 x i32> %lhs, %rhs 3084 Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy); 3085 Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy); 3086 Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn"); 3087 3088 // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16> 3089 Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(), 3090 SrcTy->getScalarSizeInBits() / 2); 3091 ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt); 3092 Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn"); 3093 3094 // %res = trunc <4 x i32> %high to <4 x i16> 3095 return Builder.CreateTrunc(Ops[0], VTy, "vsubhn"); 3096 } 3097 case NEON::BI__builtin_neon_vtrn_v: 3098 case NEON::BI__builtin_neon_vtrnq_v: { 3099 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 3100 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3101 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3102 Value *SV = nullptr; 3103 3104 for (unsigned vi = 0; vi != 2; ++vi) { 3105 SmallVector<Constant*, 16> Indices; 3106 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 3107 Indices.push_back(Builder.getInt32(i+vi)); 3108 Indices.push_back(Builder.getInt32(i+e+vi)); 3109 } 3110 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 3111 SV = llvm::ConstantVector::get(Indices); 3112 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); 3113 SV = Builder.CreateStore(SV, Addr); 3114 } 3115 return SV; 3116 } 3117 case NEON::BI__builtin_neon_vtst_v: 3118 case NEON::BI__builtin_neon_vtstq_v: { 3119 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3120 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3121 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 3122 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 3123 ConstantAggregateZero::get(Ty)); 3124 return Builder.CreateSExt(Ops[0], Ty, "vtst"); 3125 } 3126 case NEON::BI__builtin_neon_vuzp_v: 3127 case NEON::BI__builtin_neon_vuzpq_v: { 3128 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 3129 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3130 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3131 Value *SV = nullptr; 3132 3133 for (unsigned vi = 0; vi != 2; ++vi) { 3134 SmallVector<Constant*, 16> Indices; 3135 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 3136 Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi)); 3137 3138 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 3139 SV = llvm::ConstantVector::get(Indices); 3140 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); 3141 SV = Builder.CreateStore(SV, Addr); 3142 } 3143 return SV; 3144 } 3145 case NEON::BI__builtin_neon_vzip_v: 3146 case NEON::BI__builtin_neon_vzipq_v: { 3147 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 3148 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3149 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 3150 Value *SV = nullptr; 3151 3152 for (unsigned vi = 0; vi != 2; ++vi) { 3153 SmallVector<Constant*, 16> Indices; 3154 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 3155 Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1)); 3156 Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e)); 3157 } 3158 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 3159 SV = llvm::ConstantVector::get(Indices); 3160 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); 3161 SV = Builder.CreateStore(SV, Addr); 3162 } 3163 return SV; 3164 } 3165 } 3166 3167 assert(Int && "Expected valid intrinsic number"); 3168 3169 // Determine the type(s) of this overloaded AArch64 intrinsic. 3170 Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E); 3171 3172 Value *Result = EmitNeonCall(F, Ops, NameHint); 3173 llvm::Type *ResultType = ConvertType(E->getType()); 3174 // AArch64 intrinsic one-element vector type cast to 3175 // scalar type expected by the builtin 3176 return Builder.CreateBitCast(Result, ResultType, NameHint); 3177 } 3178 3179 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr( 3180 Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp, 3181 const CmpInst::Predicate Ip, const Twine &Name) { 3182 llvm::Type *OTy = Op->getType(); 3183 3184 // FIXME: this is utterly horrific. We should not be looking at previous 3185 // codegen context to find out what needs doing. Unfortunately TableGen 3186 // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32 3187 // (etc). 3188 if (BitCastInst *BI = dyn_cast<BitCastInst>(Op)) 3189 OTy = BI->getOperand(0)->getType(); 3190 3191 Op = Builder.CreateBitCast(Op, OTy); 3192 if (OTy->getScalarType()->isFloatingPointTy()) { 3193 Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy)); 3194 } else { 3195 Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy)); 3196 } 3197 return Builder.CreateSExt(Op, Ty, Name); 3198 } 3199 3200 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops, 3201 Value *ExtOp, Value *IndexOp, 3202 llvm::Type *ResTy, unsigned IntID, 3203 const char *Name) { 3204 SmallVector<Value *, 2> TblOps; 3205 if (ExtOp) 3206 TblOps.push_back(ExtOp); 3207 3208 // Build a vector containing sequential number like (0, 1, 2, ..., 15) 3209 SmallVector<Constant*, 16> Indices; 3210 llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType()); 3211 for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) { 3212 Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i)); 3213 Indices.push_back(ConstantInt::get(CGF.Int32Ty, 2*i+1)); 3214 } 3215 Value *SV = llvm::ConstantVector::get(Indices); 3216 3217 int PairPos = 0, End = Ops.size() - 1; 3218 while (PairPos < End) { 3219 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 3220 Ops[PairPos+1], SV, Name)); 3221 PairPos += 2; 3222 } 3223 3224 // If there's an odd number of 64-bit lookup table, fill the high 64-bit 3225 // of the 128-bit lookup table with zero. 3226 if (PairPos == End) { 3227 Value *ZeroTbl = ConstantAggregateZero::get(TblTy); 3228 TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos], 3229 ZeroTbl, SV, Name)); 3230 } 3231 3232 Function *TblF; 3233 TblOps.push_back(IndexOp); 3234 TblF = CGF.CGM.getIntrinsic(IntID, ResTy); 3235 3236 return CGF.EmitNeonCall(TblF, TblOps, Name); 3237 } 3238 3239 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) { 3240 switch (BuiltinID) { 3241 default: 3242 return nullptr; 3243 case ARM::BI__builtin_arm_nop: 3244 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), 3245 llvm::ConstantInt::get(Int32Ty, 0)); 3246 case ARM::BI__builtin_arm_yield: 3247 case ARM::BI__yield: 3248 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), 3249 llvm::ConstantInt::get(Int32Ty, 1)); 3250 case ARM::BI__builtin_arm_wfe: 3251 case ARM::BI__wfe: 3252 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), 3253 llvm::ConstantInt::get(Int32Ty, 2)); 3254 case ARM::BI__builtin_arm_wfi: 3255 case ARM::BI__wfi: 3256 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), 3257 llvm::ConstantInt::get(Int32Ty, 3)); 3258 case ARM::BI__builtin_arm_sev: 3259 case ARM::BI__sev: 3260 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), 3261 llvm::ConstantInt::get(Int32Ty, 4)); 3262 case ARM::BI__builtin_arm_sevl: 3263 case ARM::BI__sevl: 3264 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint), 3265 llvm::ConstantInt::get(Int32Ty, 5)); 3266 } 3267 } 3268 3269 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, 3270 const CallExpr *E) { 3271 if (auto Hint = GetValueForARMHint(BuiltinID)) 3272 return Hint; 3273 3274 if (BuiltinID == ARM::BI__emit) { 3275 bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb; 3276 llvm::FunctionType *FTy = 3277 llvm::FunctionType::get(VoidTy, /*Variadic=*/false); 3278 3279 APSInt Value; 3280 if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext())) 3281 llvm_unreachable("Sema will ensure that the parameter is constant"); 3282 3283 uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue(); 3284 3285 llvm::InlineAsm *Emit = 3286 IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "", 3287 /*SideEffects=*/true) 3288 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "", 3289 /*SideEffects=*/true); 3290 3291 return Builder.CreateCall(Emit); 3292 } 3293 3294 if (BuiltinID == ARM::BI__builtin_arm_dbg) { 3295 Value *Option = EmitScalarExpr(E->getArg(0)); 3296 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option); 3297 } 3298 3299 if (BuiltinID == ARM::BI__builtin_arm_prefetch) { 3300 Value *Address = EmitScalarExpr(E->getArg(0)); 3301 Value *RW = EmitScalarExpr(E->getArg(1)); 3302 Value *IsData = EmitScalarExpr(E->getArg(2)); 3303 3304 // Locality is not supported on ARM target 3305 Value *Locality = llvm::ConstantInt::get(Int32Ty, 3); 3306 3307 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 3308 return Builder.CreateCall4(F, Address, RW, Locality, IsData); 3309 } 3310 3311 if (BuiltinID == ARM::BI__builtin_arm_rbit) { 3312 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_rbit), 3313 EmitScalarExpr(E->getArg(0)), 3314 "rbit"); 3315 } 3316 3317 if (BuiltinID == ARM::BI__clear_cache) { 3318 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 3319 const FunctionDecl *FD = E->getDirectCallee(); 3320 SmallVector<Value*, 2> Ops; 3321 for (unsigned i = 0; i < 2; i++) 3322 Ops.push_back(EmitScalarExpr(E->getArg(i))); 3323 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 3324 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 3325 StringRef Name = FD->getName(); 3326 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 3327 } 3328 3329 if (BuiltinID == ARM::BI__builtin_arm_ldrexd || 3330 ((BuiltinID == ARM::BI__builtin_arm_ldrex || 3331 BuiltinID == ARM::BI__builtin_arm_ldaex) && 3332 getContext().getTypeSize(E->getType()) == 64) || 3333 BuiltinID == ARM::BI__ldrexd) { 3334 Function *F; 3335 3336 switch (BuiltinID) { 3337 default: llvm_unreachable("unexpected builtin"); 3338 case ARM::BI__builtin_arm_ldaex: 3339 F = CGM.getIntrinsic(Intrinsic::arm_ldaexd); 3340 break; 3341 case ARM::BI__builtin_arm_ldrexd: 3342 case ARM::BI__builtin_arm_ldrex: 3343 case ARM::BI__ldrexd: 3344 F = CGM.getIntrinsic(Intrinsic::arm_ldrexd); 3345 break; 3346 } 3347 3348 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 3349 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 3350 "ldrexd"); 3351 3352 Value *Val0 = Builder.CreateExtractValue(Val, 1); 3353 Value *Val1 = Builder.CreateExtractValue(Val, 0); 3354 Val0 = Builder.CreateZExt(Val0, Int64Ty); 3355 Val1 = Builder.CreateZExt(Val1, Int64Ty); 3356 3357 Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32); 3358 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 3359 Val = Builder.CreateOr(Val, Val1); 3360 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 3361 } 3362 3363 if (BuiltinID == ARM::BI__builtin_arm_ldrex || 3364 BuiltinID == ARM::BI__builtin_arm_ldaex) { 3365 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 3366 3367 QualType Ty = E->getType(); 3368 llvm::Type *RealResTy = ConvertType(Ty); 3369 llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(), 3370 getContext().getTypeSize(Ty)); 3371 LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo()); 3372 3373 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex 3374 ? Intrinsic::arm_ldaex 3375 : Intrinsic::arm_ldrex, 3376 LoadAddr->getType()); 3377 Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex"); 3378 3379 if (RealResTy->isPointerTy()) 3380 return Builder.CreateIntToPtr(Val, RealResTy); 3381 else { 3382 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 3383 return Builder.CreateBitCast(Val, RealResTy); 3384 } 3385 } 3386 3387 if (BuiltinID == ARM::BI__builtin_arm_strexd || 3388 ((BuiltinID == ARM::BI__builtin_arm_stlex || 3389 BuiltinID == ARM::BI__builtin_arm_strex) && 3390 getContext().getTypeSize(E->getArg(0)->getType()) == 64)) { 3391 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 3392 ? Intrinsic::arm_stlexd 3393 : Intrinsic::arm_strexd); 3394 llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr); 3395 3396 Value *Tmp = CreateMemTemp(E->getArg(0)->getType()); 3397 Value *Val = EmitScalarExpr(E->getArg(0)); 3398 Builder.CreateStore(Val, Tmp); 3399 3400 Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); 3401 Val = Builder.CreateLoad(LdPtr); 3402 3403 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 3404 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 3405 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy); 3406 return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "strexd"); 3407 } 3408 3409 if (BuiltinID == ARM::BI__builtin_arm_strex || 3410 BuiltinID == ARM::BI__builtin_arm_stlex) { 3411 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 3412 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 3413 3414 QualType Ty = E->getArg(0)->getType(); 3415 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 3416 getContext().getTypeSize(Ty)); 3417 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 3418 3419 if (StoreVal->getType()->isPointerTy()) 3420 StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty); 3421 else { 3422 StoreVal = Builder.CreateBitCast(StoreVal, StoreTy); 3423 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty); 3424 } 3425 3426 Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex 3427 ? Intrinsic::arm_stlex 3428 : Intrinsic::arm_strex, 3429 StoreAddr->getType()); 3430 return Builder.CreateCall2(F, StoreVal, StoreAddr, "strex"); 3431 } 3432 3433 if (BuiltinID == ARM::BI__builtin_arm_clrex) { 3434 Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex); 3435 return Builder.CreateCall(F); 3436 } 3437 3438 // CRC32 3439 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 3440 switch (BuiltinID) { 3441 case ARM::BI__builtin_arm_crc32b: 3442 CRCIntrinsicID = Intrinsic::arm_crc32b; break; 3443 case ARM::BI__builtin_arm_crc32cb: 3444 CRCIntrinsicID = Intrinsic::arm_crc32cb; break; 3445 case ARM::BI__builtin_arm_crc32h: 3446 CRCIntrinsicID = Intrinsic::arm_crc32h; break; 3447 case ARM::BI__builtin_arm_crc32ch: 3448 CRCIntrinsicID = Intrinsic::arm_crc32ch; break; 3449 case ARM::BI__builtin_arm_crc32w: 3450 case ARM::BI__builtin_arm_crc32d: 3451 CRCIntrinsicID = Intrinsic::arm_crc32w; break; 3452 case ARM::BI__builtin_arm_crc32cw: 3453 case ARM::BI__builtin_arm_crc32cd: 3454 CRCIntrinsicID = Intrinsic::arm_crc32cw; break; 3455 } 3456 3457 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 3458 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 3459 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 3460 3461 // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w 3462 // intrinsics, hence we need different codegen for these cases. 3463 if (BuiltinID == ARM::BI__builtin_arm_crc32d || 3464 BuiltinID == ARM::BI__builtin_arm_crc32cd) { 3465 Value *C1 = llvm::ConstantInt::get(Int64Ty, 32); 3466 Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty); 3467 Value *Arg1b = Builder.CreateLShr(Arg1, C1); 3468 Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty); 3469 3470 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 3471 Value *Res = Builder.CreateCall2(F, Arg0, Arg1a); 3472 return Builder.CreateCall2(F, Res, Arg1b); 3473 } else { 3474 Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty); 3475 3476 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 3477 return Builder.CreateCall2(F, Arg0, Arg1); 3478 } 3479 } 3480 3481 SmallVector<Value*, 4> Ops; 3482 llvm::Value *Align = nullptr; 3483 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) { 3484 if (i == 0) { 3485 switch (BuiltinID) { 3486 case NEON::BI__builtin_neon_vld1_v: 3487 case NEON::BI__builtin_neon_vld1q_v: 3488 case NEON::BI__builtin_neon_vld1q_lane_v: 3489 case NEON::BI__builtin_neon_vld1_lane_v: 3490 case NEON::BI__builtin_neon_vld1_dup_v: 3491 case NEON::BI__builtin_neon_vld1q_dup_v: 3492 case NEON::BI__builtin_neon_vst1_v: 3493 case NEON::BI__builtin_neon_vst1q_v: 3494 case NEON::BI__builtin_neon_vst1q_lane_v: 3495 case NEON::BI__builtin_neon_vst1_lane_v: 3496 case NEON::BI__builtin_neon_vst2_v: 3497 case NEON::BI__builtin_neon_vst2q_v: 3498 case NEON::BI__builtin_neon_vst2_lane_v: 3499 case NEON::BI__builtin_neon_vst2q_lane_v: 3500 case NEON::BI__builtin_neon_vst3_v: 3501 case NEON::BI__builtin_neon_vst3q_v: 3502 case NEON::BI__builtin_neon_vst3_lane_v: 3503 case NEON::BI__builtin_neon_vst3q_lane_v: 3504 case NEON::BI__builtin_neon_vst4_v: 3505 case NEON::BI__builtin_neon_vst4q_v: 3506 case NEON::BI__builtin_neon_vst4_lane_v: 3507 case NEON::BI__builtin_neon_vst4q_lane_v: 3508 // Get the alignment for the argument in addition to the value; 3509 // we'll use it later. 3510 std::pair<llvm::Value*, unsigned> Src = 3511 EmitPointerWithAlignment(E->getArg(0)); 3512 Ops.push_back(Src.first); 3513 Align = Builder.getInt32(Src.second); 3514 continue; 3515 } 3516 } 3517 if (i == 1) { 3518 switch (BuiltinID) { 3519 case NEON::BI__builtin_neon_vld2_v: 3520 case NEON::BI__builtin_neon_vld2q_v: 3521 case NEON::BI__builtin_neon_vld3_v: 3522 case NEON::BI__builtin_neon_vld3q_v: 3523 case NEON::BI__builtin_neon_vld4_v: 3524 case NEON::BI__builtin_neon_vld4q_v: 3525 case NEON::BI__builtin_neon_vld2_lane_v: 3526 case NEON::BI__builtin_neon_vld2q_lane_v: 3527 case NEON::BI__builtin_neon_vld3_lane_v: 3528 case NEON::BI__builtin_neon_vld3q_lane_v: 3529 case NEON::BI__builtin_neon_vld4_lane_v: 3530 case NEON::BI__builtin_neon_vld4q_lane_v: 3531 case NEON::BI__builtin_neon_vld2_dup_v: 3532 case NEON::BI__builtin_neon_vld3_dup_v: 3533 case NEON::BI__builtin_neon_vld4_dup_v: 3534 // Get the alignment for the argument in addition to the value; 3535 // we'll use it later. 3536 std::pair<llvm::Value*, unsigned> Src = 3537 EmitPointerWithAlignment(E->getArg(1)); 3538 Ops.push_back(Src.first); 3539 Align = Builder.getInt32(Src.second); 3540 continue; 3541 } 3542 } 3543 Ops.push_back(EmitScalarExpr(E->getArg(i))); 3544 } 3545 3546 switch (BuiltinID) { 3547 default: break; 3548 // vget_lane and vset_lane are not overloaded and do not have an extra 3549 // argument that specifies the vector type. 3550 case NEON::BI__builtin_neon_vget_lane_i8: 3551 case NEON::BI__builtin_neon_vget_lane_i16: 3552 case NEON::BI__builtin_neon_vget_lane_i32: 3553 case NEON::BI__builtin_neon_vget_lane_i64: 3554 case NEON::BI__builtin_neon_vget_lane_f32: 3555 case NEON::BI__builtin_neon_vgetq_lane_i8: 3556 case NEON::BI__builtin_neon_vgetq_lane_i16: 3557 case NEON::BI__builtin_neon_vgetq_lane_i32: 3558 case NEON::BI__builtin_neon_vgetq_lane_i64: 3559 case NEON::BI__builtin_neon_vgetq_lane_f32: 3560 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 3561 "vget_lane"); 3562 case NEON::BI__builtin_neon_vset_lane_i8: 3563 case NEON::BI__builtin_neon_vset_lane_i16: 3564 case NEON::BI__builtin_neon_vset_lane_i32: 3565 case NEON::BI__builtin_neon_vset_lane_i64: 3566 case NEON::BI__builtin_neon_vset_lane_f32: 3567 case NEON::BI__builtin_neon_vsetq_lane_i8: 3568 case NEON::BI__builtin_neon_vsetq_lane_i16: 3569 case NEON::BI__builtin_neon_vsetq_lane_i32: 3570 case NEON::BI__builtin_neon_vsetq_lane_i64: 3571 case NEON::BI__builtin_neon_vsetq_lane_f32: 3572 Ops.push_back(EmitScalarExpr(E->getArg(2))); 3573 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 3574 3575 // Non-polymorphic crypto instructions also not overloaded 3576 case NEON::BI__builtin_neon_vsha1h_u32: 3577 Ops.push_back(EmitScalarExpr(E->getArg(0))); 3578 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops, 3579 "vsha1h"); 3580 case NEON::BI__builtin_neon_vsha1cq_u32: 3581 Ops.push_back(EmitScalarExpr(E->getArg(2))); 3582 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops, 3583 "vsha1h"); 3584 case NEON::BI__builtin_neon_vsha1pq_u32: 3585 Ops.push_back(EmitScalarExpr(E->getArg(2))); 3586 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops, 3587 "vsha1h"); 3588 case NEON::BI__builtin_neon_vsha1mq_u32: 3589 Ops.push_back(EmitScalarExpr(E->getArg(2))); 3590 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops, 3591 "vsha1h"); 3592 } 3593 3594 // Get the last argument, which specifies the vector type. 3595 llvm::APSInt Result; 3596 const Expr *Arg = E->getArg(E->getNumArgs()-1); 3597 if (!Arg->isIntegerConstantExpr(Result, getContext())) 3598 return nullptr; 3599 3600 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f || 3601 BuiltinID == ARM::BI__builtin_arm_vcvtr_d) { 3602 // Determine the overloaded type of this builtin. 3603 llvm::Type *Ty; 3604 if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f) 3605 Ty = FloatTy; 3606 else 3607 Ty = DoubleTy; 3608 3609 // Determine whether this is an unsigned conversion or not. 3610 bool usgn = Result.getZExtValue() == 1; 3611 unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr; 3612 3613 // Call the appropriate intrinsic. 3614 Function *F = CGM.getIntrinsic(Int, Ty); 3615 return Builder.CreateCall(F, Ops, "vcvtr"); 3616 } 3617 3618 // Determine the type of this overloaded NEON intrinsic. 3619 NeonTypeFlags Type(Result.getZExtValue()); 3620 bool usgn = Type.isUnsigned(); 3621 bool rightShift = false; 3622 3623 llvm::VectorType *VTy = GetNeonType(this, Type); 3624 llvm::Type *Ty = VTy; 3625 if (!Ty) 3626 return nullptr; 3627 3628 // Many NEON builtins have identical semantics and uses in ARM and 3629 // AArch64. Emit these in a single function. 3630 auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap); 3631 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 3632 IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted); 3633 if (Builtin) 3634 return EmitCommonNeonBuiltinExpr( 3635 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 3636 Builtin->NameHint, Builtin->TypeModifier, E, Ops, Align); 3637 3638 unsigned Int; 3639 switch (BuiltinID) { 3640 default: return nullptr; 3641 case NEON::BI__builtin_neon_vld1q_lane_v: 3642 // Handle 64-bit integer elements as a special case. Use shuffles of 3643 // one-element vectors to avoid poor code for i64 in the backend. 3644 if (VTy->getElementType()->isIntegerTy(64)) { 3645 // Extract the other lane. 3646 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3647 int Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); 3648 Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); 3649 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 3650 // Load the value as a one-element vector. 3651 Ty = llvm::VectorType::get(VTy->getElementType(), 1); 3652 Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty); 3653 Value *Ld = Builder.CreateCall2(F, Ops[0], Align); 3654 // Combine them. 3655 SmallVector<Constant*, 2> Indices; 3656 Indices.push_back(ConstantInt::get(Int32Ty, 1-Lane)); 3657 Indices.push_back(ConstantInt::get(Int32Ty, Lane)); 3658 SV = llvm::ConstantVector::get(Indices); 3659 return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane"); 3660 } 3661 // fall through 3662 case NEON::BI__builtin_neon_vld1_lane_v: { 3663 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3664 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 3665 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3666 LoadInst *Ld = Builder.CreateLoad(Ops[0]); 3667 Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); 3668 return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); 3669 } 3670 case NEON::BI__builtin_neon_vld2_dup_v: 3671 case NEON::BI__builtin_neon_vld3_dup_v: 3672 case NEON::BI__builtin_neon_vld4_dup_v: { 3673 // Handle 64-bit elements as a special-case. There is no "dup" needed. 3674 if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) { 3675 switch (BuiltinID) { 3676 case NEON::BI__builtin_neon_vld2_dup_v: 3677 Int = Intrinsic::arm_neon_vld2; 3678 break; 3679 case NEON::BI__builtin_neon_vld3_dup_v: 3680 Int = Intrinsic::arm_neon_vld3; 3681 break; 3682 case NEON::BI__builtin_neon_vld4_dup_v: 3683 Int = Intrinsic::arm_neon_vld4; 3684 break; 3685 default: llvm_unreachable("unknown vld_dup intrinsic?"); 3686 } 3687 Function *F = CGM.getIntrinsic(Int, Ty); 3688 Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup"); 3689 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3690 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3691 return Builder.CreateStore(Ops[1], Ops[0]); 3692 } 3693 switch (BuiltinID) { 3694 case NEON::BI__builtin_neon_vld2_dup_v: 3695 Int = Intrinsic::arm_neon_vld2lane; 3696 break; 3697 case NEON::BI__builtin_neon_vld3_dup_v: 3698 Int = Intrinsic::arm_neon_vld3lane; 3699 break; 3700 case NEON::BI__builtin_neon_vld4_dup_v: 3701 Int = Intrinsic::arm_neon_vld4lane; 3702 break; 3703 default: llvm_unreachable("unknown vld_dup intrinsic?"); 3704 } 3705 Function *F = CGM.getIntrinsic(Int, Ty); 3706 llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); 3707 3708 SmallVector<Value*, 6> Args; 3709 Args.push_back(Ops[1]); 3710 Args.append(STy->getNumElements(), UndefValue::get(Ty)); 3711 3712 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 3713 Args.push_back(CI); 3714 Args.push_back(Align); 3715 3716 Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); 3717 // splat lane 0 to all elts in each vector of the result. 3718 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 3719 Value *Val = Builder.CreateExtractValue(Ops[1], i); 3720 Value *Elt = Builder.CreateBitCast(Val, Ty); 3721 Elt = EmitNeonSplat(Elt, CI); 3722 Elt = Builder.CreateBitCast(Elt, Val->getType()); 3723 Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); 3724 } 3725 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3726 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3727 return Builder.CreateStore(Ops[1], Ops[0]); 3728 } 3729 case NEON::BI__builtin_neon_vqrshrn_n_v: 3730 Int = 3731 usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns; 3732 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n", 3733 1, true); 3734 case NEON::BI__builtin_neon_vqrshrun_n_v: 3735 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty), 3736 Ops, "vqrshrun_n", 1, true); 3737 case NEON::BI__builtin_neon_vqshrn_n_v: 3738 Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns; 3739 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n", 3740 1, true); 3741 case NEON::BI__builtin_neon_vqshrun_n_v: 3742 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty), 3743 Ops, "vqshrun_n", 1, true); 3744 case NEON::BI__builtin_neon_vrecpe_v: 3745 case NEON::BI__builtin_neon_vrecpeq_v: 3746 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty), 3747 Ops, "vrecpe"); 3748 case NEON::BI__builtin_neon_vrshrn_n_v: 3749 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty), 3750 Ops, "vrshrn_n", 1, true); 3751 case NEON::BI__builtin_neon_vrsra_n_v: 3752 case NEON::BI__builtin_neon_vrsraq_n_v: 3753 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3754 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3755 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true); 3756 Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts; 3757 Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Ty), Ops[1], Ops[2]); 3758 return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n"); 3759 case NEON::BI__builtin_neon_vsri_n_v: 3760 case NEON::BI__builtin_neon_vsriq_n_v: 3761 rightShift = true; 3762 case NEON::BI__builtin_neon_vsli_n_v: 3763 case NEON::BI__builtin_neon_vsliq_n_v: 3764 Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift); 3765 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty), 3766 Ops, "vsli_n"); 3767 case NEON::BI__builtin_neon_vsra_n_v: 3768 case NEON::BI__builtin_neon_vsraq_n_v: 3769 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 3770 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 3771 return Builder.CreateAdd(Ops[0], Ops[1]); 3772 case NEON::BI__builtin_neon_vst1q_lane_v: 3773 // Handle 64-bit integer elements as a special case. Use a shuffle to get 3774 // a one-element vector and avoid poor code for i64 in the backend. 3775 if (VTy->getElementType()->isIntegerTy(64)) { 3776 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3777 Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2])); 3778 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); 3779 Ops[2] = Align; 3780 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, 3781 Ops[1]->getType()), Ops); 3782 } 3783 // fall through 3784 case NEON::BI__builtin_neon_vst1_lane_v: { 3785 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 3786 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 3787 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 3788 StoreInst *St = Builder.CreateStore(Ops[1], 3789 Builder.CreateBitCast(Ops[0], Ty)); 3790 St->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); 3791 return St; 3792 } 3793 case NEON::BI__builtin_neon_vtbl1_v: 3794 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1), 3795 Ops, "vtbl1"); 3796 case NEON::BI__builtin_neon_vtbl2_v: 3797 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2), 3798 Ops, "vtbl2"); 3799 case NEON::BI__builtin_neon_vtbl3_v: 3800 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3), 3801 Ops, "vtbl3"); 3802 case NEON::BI__builtin_neon_vtbl4_v: 3803 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4), 3804 Ops, "vtbl4"); 3805 case NEON::BI__builtin_neon_vtbx1_v: 3806 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1), 3807 Ops, "vtbx1"); 3808 case NEON::BI__builtin_neon_vtbx2_v: 3809 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2), 3810 Ops, "vtbx2"); 3811 case NEON::BI__builtin_neon_vtbx3_v: 3812 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3), 3813 Ops, "vtbx3"); 3814 case NEON::BI__builtin_neon_vtbx4_v: 3815 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4), 3816 Ops, "vtbx4"); 3817 } 3818 } 3819 3820 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, 3821 const CallExpr *E, 3822 SmallVectorImpl<Value *> &Ops) { 3823 unsigned int Int = 0; 3824 const char *s = nullptr; 3825 3826 switch (BuiltinID) { 3827 default: 3828 return nullptr; 3829 case NEON::BI__builtin_neon_vtbl1_v: 3830 case NEON::BI__builtin_neon_vqtbl1_v: 3831 case NEON::BI__builtin_neon_vqtbl1q_v: 3832 case NEON::BI__builtin_neon_vtbl2_v: 3833 case NEON::BI__builtin_neon_vqtbl2_v: 3834 case NEON::BI__builtin_neon_vqtbl2q_v: 3835 case NEON::BI__builtin_neon_vtbl3_v: 3836 case NEON::BI__builtin_neon_vqtbl3_v: 3837 case NEON::BI__builtin_neon_vqtbl3q_v: 3838 case NEON::BI__builtin_neon_vtbl4_v: 3839 case NEON::BI__builtin_neon_vqtbl4_v: 3840 case NEON::BI__builtin_neon_vqtbl4q_v: 3841 break; 3842 case NEON::BI__builtin_neon_vtbx1_v: 3843 case NEON::BI__builtin_neon_vqtbx1_v: 3844 case NEON::BI__builtin_neon_vqtbx1q_v: 3845 case NEON::BI__builtin_neon_vtbx2_v: 3846 case NEON::BI__builtin_neon_vqtbx2_v: 3847 case NEON::BI__builtin_neon_vqtbx2q_v: 3848 case NEON::BI__builtin_neon_vtbx3_v: 3849 case NEON::BI__builtin_neon_vqtbx3_v: 3850 case NEON::BI__builtin_neon_vqtbx3q_v: 3851 case NEON::BI__builtin_neon_vtbx4_v: 3852 case NEON::BI__builtin_neon_vqtbx4_v: 3853 case NEON::BI__builtin_neon_vqtbx4q_v: 3854 break; 3855 } 3856 3857 assert(E->getNumArgs() >= 3); 3858 3859 // Get the last argument, which specifies the vector type. 3860 llvm::APSInt Result; 3861 const Expr *Arg = E->getArg(E->getNumArgs() - 1); 3862 if (!Arg->isIntegerConstantExpr(Result, CGF.getContext())) 3863 return nullptr; 3864 3865 // Determine the type of this overloaded NEON intrinsic. 3866 NeonTypeFlags Type(Result.getZExtValue()); 3867 llvm::VectorType *VTy = GetNeonType(&CGF, Type); 3868 llvm::Type *Ty = VTy; 3869 if (!Ty) 3870 return nullptr; 3871 3872 unsigned nElts = VTy->getNumElements(); 3873 3874 CodeGen::CGBuilderTy &Builder = CGF.Builder; 3875 3876 // AArch64 scalar builtins are not overloaded, they do not have an extra 3877 // argument that specifies the vector type, need to handle each case. 3878 SmallVector<Value *, 2> TblOps; 3879 switch (BuiltinID) { 3880 case NEON::BI__builtin_neon_vtbl1_v: { 3881 TblOps.push_back(Ops[0]); 3882 return packTBLDVectorList(CGF, TblOps, nullptr, Ops[1], Ty, 3883 Intrinsic::aarch64_neon_tbl1, "vtbl1"); 3884 } 3885 case NEON::BI__builtin_neon_vtbl2_v: { 3886 TblOps.push_back(Ops[0]); 3887 TblOps.push_back(Ops[1]); 3888 return packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty, 3889 Intrinsic::aarch64_neon_tbl1, "vtbl1"); 3890 } 3891 case NEON::BI__builtin_neon_vtbl3_v: { 3892 TblOps.push_back(Ops[0]); 3893 TblOps.push_back(Ops[1]); 3894 TblOps.push_back(Ops[2]); 3895 return packTBLDVectorList(CGF, TblOps, nullptr, Ops[3], Ty, 3896 Intrinsic::aarch64_neon_tbl2, "vtbl2"); 3897 } 3898 case NEON::BI__builtin_neon_vtbl4_v: { 3899 TblOps.push_back(Ops[0]); 3900 TblOps.push_back(Ops[1]); 3901 TblOps.push_back(Ops[2]); 3902 TblOps.push_back(Ops[3]); 3903 return packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty, 3904 Intrinsic::aarch64_neon_tbl2, "vtbl2"); 3905 } 3906 case NEON::BI__builtin_neon_vtbx1_v: { 3907 TblOps.push_back(Ops[1]); 3908 Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty, 3909 Intrinsic::aarch64_neon_tbl1, "vtbl1"); 3910 3911 llvm::Constant *Eight = ConstantInt::get(VTy->getElementType(), 8); 3912 Value* EightV = llvm::ConstantVector::getSplat(nElts, Eight); 3913 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV); 3914 CmpRes = Builder.CreateSExt(CmpRes, Ty); 3915 3916 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 3917 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 3918 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 3919 } 3920 case NEON::BI__builtin_neon_vtbx2_v: { 3921 TblOps.push_back(Ops[1]); 3922 TblOps.push_back(Ops[2]); 3923 return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[3], Ty, 3924 Intrinsic::aarch64_neon_tbx1, "vtbx1"); 3925 } 3926 case NEON::BI__builtin_neon_vtbx3_v: { 3927 TblOps.push_back(Ops[1]); 3928 TblOps.push_back(Ops[2]); 3929 TblOps.push_back(Ops[3]); 3930 Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty, 3931 Intrinsic::aarch64_neon_tbl2, "vtbl2"); 3932 3933 llvm::Constant *TwentyFour = ConstantInt::get(VTy->getElementType(), 24); 3934 Value* TwentyFourV = llvm::ConstantVector::getSplat(nElts, TwentyFour); 3935 Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4], 3936 TwentyFourV); 3937 CmpRes = Builder.CreateSExt(CmpRes, Ty); 3938 3939 Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]); 3940 Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes); 3941 return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx"); 3942 } 3943 case NEON::BI__builtin_neon_vtbx4_v: { 3944 TblOps.push_back(Ops[1]); 3945 TblOps.push_back(Ops[2]); 3946 TblOps.push_back(Ops[3]); 3947 TblOps.push_back(Ops[4]); 3948 return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[5], Ty, 3949 Intrinsic::aarch64_neon_tbx2, "vtbx2"); 3950 } 3951 case NEON::BI__builtin_neon_vqtbl1_v: 3952 case NEON::BI__builtin_neon_vqtbl1q_v: 3953 Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break; 3954 case NEON::BI__builtin_neon_vqtbl2_v: 3955 case NEON::BI__builtin_neon_vqtbl2q_v: { 3956 Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break; 3957 case NEON::BI__builtin_neon_vqtbl3_v: 3958 case NEON::BI__builtin_neon_vqtbl3q_v: 3959 Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break; 3960 case NEON::BI__builtin_neon_vqtbl4_v: 3961 case NEON::BI__builtin_neon_vqtbl4q_v: 3962 Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break; 3963 case NEON::BI__builtin_neon_vqtbx1_v: 3964 case NEON::BI__builtin_neon_vqtbx1q_v: 3965 Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break; 3966 case NEON::BI__builtin_neon_vqtbx2_v: 3967 case NEON::BI__builtin_neon_vqtbx2q_v: 3968 Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break; 3969 case NEON::BI__builtin_neon_vqtbx3_v: 3970 case NEON::BI__builtin_neon_vqtbx3q_v: 3971 Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break; 3972 case NEON::BI__builtin_neon_vqtbx4_v: 3973 case NEON::BI__builtin_neon_vqtbx4q_v: 3974 Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break; 3975 } 3976 } 3977 3978 if (!Int) 3979 return nullptr; 3980 3981 Function *F = CGF.CGM.getIntrinsic(Int, Ty); 3982 return CGF.EmitNeonCall(F, Ops, s); 3983 } 3984 3985 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) { 3986 llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4); 3987 Op = Builder.CreateBitCast(Op, Int16Ty); 3988 Value *V = UndefValue::get(VTy); 3989 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 3990 Op = Builder.CreateInsertElement(V, Op, CI); 3991 return Op; 3992 } 3993 3994 Value *CodeGenFunction::vectorWrapScalar8(Value *Op) { 3995 llvm::Type *VTy = llvm::VectorType::get(Int8Ty, 8); 3996 Op = Builder.CreateBitCast(Op, Int8Ty); 3997 Value *V = UndefValue::get(VTy); 3998 llvm::Constant *CI = ConstantInt::get(SizeTy, 0); 3999 Op = Builder.CreateInsertElement(V, Op, CI); 4000 return Op; 4001 } 4002 4003 Value *CodeGenFunction:: 4004 emitVectorWrappedScalar8Intrinsic(unsigned Int, SmallVectorImpl<Value*> &Ops, 4005 const char *Name) { 4006 // i8 is not a legal types for AArch64, so we can't just use 4007 // a normal overloaded intrinsic call for these scalar types. Instead 4008 // we'll build 64-bit vectors w/ lane zero being our input values and 4009 // perform the operation on that. The back end can pattern match directly 4010 // to the scalar instruction. 4011 Ops[0] = vectorWrapScalar8(Ops[0]); 4012 Ops[1] = vectorWrapScalar8(Ops[1]); 4013 llvm::Type *VTy = llvm::VectorType::get(Int8Ty, 8); 4014 Value *V = EmitNeonCall(CGM.getIntrinsic(Int, VTy), Ops, Name); 4015 Constant *CI = ConstantInt::get(SizeTy, 0); 4016 return Builder.CreateExtractElement(V, CI, "lane0"); 4017 } 4018 4019 Value *CodeGenFunction:: 4020 emitVectorWrappedScalar16Intrinsic(unsigned Int, SmallVectorImpl<Value*> &Ops, 4021 const char *Name) { 4022 // i16 is not a legal types for AArch64, so we can't just use 4023 // a normal overloaded intrinsic call for these scalar types. Instead 4024 // we'll build 64-bit vectors w/ lane zero being our input values and 4025 // perform the operation on that. The back end can pattern match directly 4026 // to the scalar instruction. 4027 Ops[0] = vectorWrapScalar16(Ops[0]); 4028 Ops[1] = vectorWrapScalar16(Ops[1]); 4029 llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4); 4030 Value *V = EmitNeonCall(CGM.getIntrinsic(Int, VTy), Ops, Name); 4031 Constant *CI = ConstantInt::get(SizeTy, 0); 4032 return Builder.CreateExtractElement(V, CI, "lane0"); 4033 } 4034 4035 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, 4036 const CallExpr *E) { 4037 unsigned HintID = static_cast<unsigned>(-1); 4038 switch (BuiltinID) { 4039 default: break; 4040 case AArch64::BI__builtin_arm_nop: 4041 HintID = 0; 4042 break; 4043 case AArch64::BI__builtin_arm_yield: 4044 HintID = 1; 4045 break; 4046 case AArch64::BI__builtin_arm_wfe: 4047 HintID = 2; 4048 break; 4049 case AArch64::BI__builtin_arm_wfi: 4050 HintID = 3; 4051 break; 4052 case AArch64::BI__builtin_arm_sev: 4053 HintID = 4; 4054 break; 4055 case AArch64::BI__builtin_arm_sevl: 4056 HintID = 5; 4057 break; 4058 } 4059 4060 if (HintID != static_cast<unsigned>(-1)) { 4061 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint); 4062 return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID)); 4063 } 4064 4065 if (BuiltinID == AArch64::BI__builtin_arm_prefetch) { 4066 Value *Address = EmitScalarExpr(E->getArg(0)); 4067 Value *RW = EmitScalarExpr(E->getArg(1)); 4068 Value *CacheLevel = EmitScalarExpr(E->getArg(2)); 4069 Value *RetentionPolicy = EmitScalarExpr(E->getArg(3)); 4070 Value *IsData = EmitScalarExpr(E->getArg(4)); 4071 4072 Value *Locality = nullptr; 4073 if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) { 4074 // Temporal fetch, needs to convert cache level to locality. 4075 Locality = llvm::ConstantInt::get(Int32Ty, 4076 -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3); 4077 } else { 4078 // Streaming fetch. 4079 Locality = llvm::ConstantInt::get(Int32Ty, 0); 4080 } 4081 4082 // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify 4083 // PLDL3STRM or PLDL2STRM. 4084 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 4085 return Builder.CreateCall4(F, Address, RW, Locality, IsData); 4086 } 4087 4088 if (BuiltinID == AArch64::BI__builtin_arm_rbit) { 4089 assert((getContext().getTypeSize(E->getType()) == 32) && 4090 "rbit of unusual size!"); 4091 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 4092 return Builder.CreateCall( 4093 CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit"); 4094 } 4095 if (BuiltinID == AArch64::BI__builtin_arm_rbit64) { 4096 assert((getContext().getTypeSize(E->getType()) == 64) && 4097 "rbit of unusual size!"); 4098 llvm::Value *Arg = EmitScalarExpr(E->getArg(0)); 4099 return Builder.CreateCall( 4100 CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit"); 4101 } 4102 4103 if (BuiltinID == AArch64::BI__clear_cache) { 4104 assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); 4105 const FunctionDecl *FD = E->getDirectCallee(); 4106 SmallVector<Value*, 2> Ops; 4107 for (unsigned i = 0; i < 2; i++) 4108 Ops.push_back(EmitScalarExpr(E->getArg(i))); 4109 llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType()); 4110 llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty); 4111 StringRef Name = FD->getName(); 4112 return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops); 4113 } 4114 4115 if ((BuiltinID == AArch64::BI__builtin_arm_ldrex || 4116 BuiltinID == AArch64::BI__builtin_arm_ldaex) && 4117 getContext().getTypeSize(E->getType()) == 128) { 4118 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 4119 ? Intrinsic::aarch64_ldaxp 4120 : Intrinsic::aarch64_ldxp); 4121 4122 Value *LdPtr = EmitScalarExpr(E->getArg(0)); 4123 Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy), 4124 "ldxp"); 4125 4126 Value *Val0 = Builder.CreateExtractValue(Val, 1); 4127 Value *Val1 = Builder.CreateExtractValue(Val, 0); 4128 llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128); 4129 Val0 = Builder.CreateZExt(Val0, Int128Ty); 4130 Val1 = Builder.CreateZExt(Val1, Int128Ty); 4131 4132 Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64); 4133 Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */); 4134 Val = Builder.CreateOr(Val, Val1); 4135 return Builder.CreateBitCast(Val, ConvertType(E->getType())); 4136 } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex || 4137 BuiltinID == AArch64::BI__builtin_arm_ldaex) { 4138 Value *LoadAddr = EmitScalarExpr(E->getArg(0)); 4139 4140 QualType Ty = E->getType(); 4141 llvm::Type *RealResTy = ConvertType(Ty); 4142 llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(), 4143 getContext().getTypeSize(Ty)); 4144 LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo()); 4145 4146 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex 4147 ? Intrinsic::aarch64_ldaxr 4148 : Intrinsic::aarch64_ldxr, 4149 LoadAddr->getType()); 4150 Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr"); 4151 4152 if (RealResTy->isPointerTy()) 4153 return Builder.CreateIntToPtr(Val, RealResTy); 4154 4155 Val = Builder.CreateTruncOrBitCast(Val, IntResTy); 4156 return Builder.CreateBitCast(Val, RealResTy); 4157 } 4158 4159 if ((BuiltinID == AArch64::BI__builtin_arm_strex || 4160 BuiltinID == AArch64::BI__builtin_arm_stlex) && 4161 getContext().getTypeSize(E->getArg(0)->getType()) == 128) { 4162 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 4163 ? Intrinsic::aarch64_stlxp 4164 : Intrinsic::aarch64_stxp); 4165 llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr); 4166 4167 Value *One = llvm::ConstantInt::get(Int32Ty, 1); 4168 Value *Tmp = Builder.CreateAlloca(ConvertType(E->getArg(0)->getType()), 4169 One); 4170 Value *Val = EmitScalarExpr(E->getArg(0)); 4171 Builder.CreateStore(Val, Tmp); 4172 4173 Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy)); 4174 Val = Builder.CreateLoad(LdPtr); 4175 4176 Value *Arg0 = Builder.CreateExtractValue(Val, 0); 4177 Value *Arg1 = Builder.CreateExtractValue(Val, 1); 4178 Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), 4179 Int8PtrTy); 4180 return Builder.CreateCall3(F, Arg0, Arg1, StPtr, "stxp"); 4181 } else if (BuiltinID == AArch64::BI__builtin_arm_strex || 4182 BuiltinID == AArch64::BI__builtin_arm_stlex) { 4183 Value *StoreVal = EmitScalarExpr(E->getArg(0)); 4184 Value *StoreAddr = EmitScalarExpr(E->getArg(1)); 4185 4186 QualType Ty = E->getArg(0)->getType(); 4187 llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(), 4188 getContext().getTypeSize(Ty)); 4189 StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo()); 4190 4191 if (StoreVal->getType()->isPointerTy()) 4192 StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty); 4193 else { 4194 StoreVal = Builder.CreateBitCast(StoreVal, StoreTy); 4195 StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty); 4196 } 4197 4198 Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex 4199 ? Intrinsic::aarch64_stlxr 4200 : Intrinsic::aarch64_stxr, 4201 StoreAddr->getType()); 4202 return Builder.CreateCall2(F, StoreVal, StoreAddr, "stxr"); 4203 } 4204 4205 if (BuiltinID == AArch64::BI__builtin_arm_clrex) { 4206 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex); 4207 return Builder.CreateCall(F); 4208 } 4209 4210 // CRC32 4211 Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic; 4212 switch (BuiltinID) { 4213 case AArch64::BI__builtin_arm_crc32b: 4214 CRCIntrinsicID = Intrinsic::aarch64_crc32b; break; 4215 case AArch64::BI__builtin_arm_crc32cb: 4216 CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break; 4217 case AArch64::BI__builtin_arm_crc32h: 4218 CRCIntrinsicID = Intrinsic::aarch64_crc32h; break; 4219 case AArch64::BI__builtin_arm_crc32ch: 4220 CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break; 4221 case AArch64::BI__builtin_arm_crc32w: 4222 CRCIntrinsicID = Intrinsic::aarch64_crc32w; break; 4223 case AArch64::BI__builtin_arm_crc32cw: 4224 CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break; 4225 case AArch64::BI__builtin_arm_crc32d: 4226 CRCIntrinsicID = Intrinsic::aarch64_crc32x; break; 4227 case AArch64::BI__builtin_arm_crc32cd: 4228 CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break; 4229 } 4230 4231 if (CRCIntrinsicID != Intrinsic::not_intrinsic) { 4232 Value *Arg0 = EmitScalarExpr(E->getArg(0)); 4233 Value *Arg1 = EmitScalarExpr(E->getArg(1)); 4234 Function *F = CGM.getIntrinsic(CRCIntrinsicID); 4235 4236 llvm::Type *DataTy = F->getFunctionType()->getParamType(1); 4237 Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy); 4238 4239 return Builder.CreateCall2(F, Arg0, Arg1); 4240 } 4241 4242 llvm::SmallVector<Value*, 4> Ops; 4243 for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) 4244 Ops.push_back(EmitScalarExpr(E->getArg(i))); 4245 4246 auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap); 4247 const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap( 4248 SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted); 4249 4250 if (Builtin) { 4251 Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1))); 4252 Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E); 4253 assert(Result && "SISD intrinsic should have been handled"); 4254 return Result; 4255 } 4256 4257 llvm::APSInt Result; 4258 const Expr *Arg = E->getArg(E->getNumArgs()-1); 4259 NeonTypeFlags Type(0); 4260 if (Arg->isIntegerConstantExpr(Result, getContext())) 4261 // Determine the type of this overloaded NEON intrinsic. 4262 Type = NeonTypeFlags(Result.getZExtValue()); 4263 4264 bool usgn = Type.isUnsigned(); 4265 bool quad = Type.isQuad(); 4266 4267 // Handle non-overloaded intrinsics first. 4268 switch (BuiltinID) { 4269 default: break; 4270 case NEON::BI__builtin_neon_vldrq_p128: { 4271 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); 4272 Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy); 4273 return Builder.CreateLoad(Ptr); 4274 } 4275 case NEON::BI__builtin_neon_vstrq_p128: { 4276 llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128); 4277 Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy); 4278 return Builder.CreateStore(EmitScalarExpr(E->getArg(1)), Ptr); 4279 } 4280 case NEON::BI__builtin_neon_vcvts_u32_f32: 4281 case NEON::BI__builtin_neon_vcvtd_u64_f64: 4282 usgn = true; 4283 // FALL THROUGH 4284 case NEON::BI__builtin_neon_vcvts_s32_f32: 4285 case NEON::BI__builtin_neon_vcvtd_s64_f64: { 4286 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4287 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 4288 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 4289 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 4290 Ops[0] = Builder.CreateBitCast(Ops[0], FTy); 4291 if (usgn) 4292 return Builder.CreateFPToUI(Ops[0], InTy); 4293 return Builder.CreateFPToSI(Ops[0], InTy); 4294 } 4295 case NEON::BI__builtin_neon_vcvts_f32_u32: 4296 case NEON::BI__builtin_neon_vcvtd_f64_u64: 4297 usgn = true; 4298 // FALL THROUGH 4299 case NEON::BI__builtin_neon_vcvts_f32_s32: 4300 case NEON::BI__builtin_neon_vcvtd_f64_s64: { 4301 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4302 bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; 4303 llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; 4304 llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; 4305 Ops[0] = Builder.CreateBitCast(Ops[0], InTy); 4306 if (usgn) 4307 return Builder.CreateUIToFP(Ops[0], FTy); 4308 return Builder.CreateSIToFP(Ops[0], FTy); 4309 } 4310 case NEON::BI__builtin_neon_vpaddd_s64: { 4311 llvm::Type *Ty = 4312 llvm::VectorType::get(llvm::Type::getInt64Ty(getLLVMContext()), 2); 4313 Value *Vec = EmitScalarExpr(E->getArg(0)); 4314 // The vector is v2f64, so make sure it's bitcast to that. 4315 Vec = Builder.CreateBitCast(Vec, Ty, "v2i64"); 4316 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 4317 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 4318 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 4319 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 4320 // Pairwise addition of a v2f64 into a scalar f64. 4321 return Builder.CreateAdd(Op0, Op1, "vpaddd"); 4322 } 4323 case NEON::BI__builtin_neon_vpaddd_f64: { 4324 llvm::Type *Ty = 4325 llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2); 4326 Value *Vec = EmitScalarExpr(E->getArg(0)); 4327 // The vector is v2f64, so make sure it's bitcast to that. 4328 Vec = Builder.CreateBitCast(Vec, Ty, "v2f64"); 4329 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 4330 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 4331 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 4332 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 4333 // Pairwise addition of a v2f64 into a scalar f64. 4334 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 4335 } 4336 case NEON::BI__builtin_neon_vpadds_f32: { 4337 llvm::Type *Ty = 4338 llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2); 4339 Value *Vec = EmitScalarExpr(E->getArg(0)); 4340 // The vector is v2f32, so make sure it's bitcast to that. 4341 Vec = Builder.CreateBitCast(Vec, Ty, "v2f32"); 4342 llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0); 4343 llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1); 4344 Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0"); 4345 Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1"); 4346 // Pairwise addition of a v2f32 into a scalar f32. 4347 return Builder.CreateFAdd(Op0, Op1, "vpaddd"); 4348 } 4349 case NEON::BI__builtin_neon_vceqzd_s64: 4350 case NEON::BI__builtin_neon_vceqzd_f64: 4351 case NEON::BI__builtin_neon_vceqzs_f32: 4352 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4353 return EmitAArch64CompareBuiltinExpr( 4354 Ops[0], ConvertType(E->getCallReturnType(getContext())), 4355 ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz"); 4356 case NEON::BI__builtin_neon_vcgezd_s64: 4357 case NEON::BI__builtin_neon_vcgezd_f64: 4358 case NEON::BI__builtin_neon_vcgezs_f32: 4359 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4360 return EmitAArch64CompareBuiltinExpr( 4361 Ops[0], ConvertType(E->getCallReturnType(getContext())), 4362 ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez"); 4363 case NEON::BI__builtin_neon_vclezd_s64: 4364 case NEON::BI__builtin_neon_vclezd_f64: 4365 case NEON::BI__builtin_neon_vclezs_f32: 4366 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4367 return EmitAArch64CompareBuiltinExpr( 4368 Ops[0], ConvertType(E->getCallReturnType(getContext())), 4369 ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez"); 4370 case NEON::BI__builtin_neon_vcgtzd_s64: 4371 case NEON::BI__builtin_neon_vcgtzd_f64: 4372 case NEON::BI__builtin_neon_vcgtzs_f32: 4373 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4374 return EmitAArch64CompareBuiltinExpr( 4375 Ops[0], ConvertType(E->getCallReturnType(getContext())), 4376 ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz"); 4377 case NEON::BI__builtin_neon_vcltzd_s64: 4378 case NEON::BI__builtin_neon_vcltzd_f64: 4379 case NEON::BI__builtin_neon_vcltzs_f32: 4380 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4381 return EmitAArch64CompareBuiltinExpr( 4382 Ops[0], ConvertType(E->getCallReturnType(getContext())), 4383 ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz"); 4384 4385 case NEON::BI__builtin_neon_vceqzd_u64: { 4386 llvm::Type *Ty = llvm::Type::getInt64Ty(getLLVMContext()); 4387 Ops.push_back(EmitScalarExpr(E->getArg(0))); 4388 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4389 Ops[0] = Builder.CreateICmp(llvm::ICmpInst::ICMP_EQ, Ops[0], 4390 llvm::Constant::getNullValue(Ty)); 4391 return Builder.CreateSExt(Ops[0], Ty, "vceqzd"); 4392 } 4393 case NEON::BI__builtin_neon_vceqd_f64: 4394 case NEON::BI__builtin_neon_vcled_f64: 4395 case NEON::BI__builtin_neon_vcltd_f64: 4396 case NEON::BI__builtin_neon_vcged_f64: 4397 case NEON::BI__builtin_neon_vcgtd_f64: { 4398 llvm::CmpInst::Predicate P; 4399 switch (BuiltinID) { 4400 default: llvm_unreachable("missing builtin ID in switch!"); 4401 case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break; 4402 case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break; 4403 case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break; 4404 case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break; 4405 case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break; 4406 } 4407 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4408 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 4409 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 4410 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 4411 return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd"); 4412 } 4413 case NEON::BI__builtin_neon_vceqs_f32: 4414 case NEON::BI__builtin_neon_vcles_f32: 4415 case NEON::BI__builtin_neon_vclts_f32: 4416 case NEON::BI__builtin_neon_vcges_f32: 4417 case NEON::BI__builtin_neon_vcgts_f32: { 4418 llvm::CmpInst::Predicate P; 4419 switch (BuiltinID) { 4420 default: llvm_unreachable("missing builtin ID in switch!"); 4421 case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break; 4422 case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break; 4423 case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break; 4424 case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break; 4425 case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break; 4426 } 4427 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4428 Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy); 4429 Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy); 4430 Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]); 4431 return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd"); 4432 } 4433 case NEON::BI__builtin_neon_vceqd_s64: 4434 case NEON::BI__builtin_neon_vceqd_u64: 4435 case NEON::BI__builtin_neon_vcgtd_s64: 4436 case NEON::BI__builtin_neon_vcgtd_u64: 4437 case NEON::BI__builtin_neon_vcltd_s64: 4438 case NEON::BI__builtin_neon_vcltd_u64: 4439 case NEON::BI__builtin_neon_vcged_u64: 4440 case NEON::BI__builtin_neon_vcged_s64: 4441 case NEON::BI__builtin_neon_vcled_u64: 4442 case NEON::BI__builtin_neon_vcled_s64: { 4443 llvm::CmpInst::Predicate P; 4444 switch (BuiltinID) { 4445 default: llvm_unreachable("missing builtin ID in switch!"); 4446 case NEON::BI__builtin_neon_vceqd_s64: 4447 case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break; 4448 case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break; 4449 case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break; 4450 case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break; 4451 case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break; 4452 case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break; 4453 case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break; 4454 case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break; 4455 case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break; 4456 } 4457 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4458 Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty); 4459 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 4460 Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]); 4461 return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd"); 4462 } 4463 case NEON::BI__builtin_neon_vtstd_s64: 4464 case NEON::BI__builtin_neon_vtstd_u64: { 4465 llvm::Type *Ty = llvm::Type::getInt64Ty(getLLVMContext()); 4466 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4467 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4468 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4469 Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]); 4470 Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0], 4471 llvm::Constant::getNullValue(Ty)); 4472 return Builder.CreateSExt(Ops[0], Ty, "vtstd"); 4473 } 4474 case NEON::BI__builtin_neon_vset_lane_i8: 4475 case NEON::BI__builtin_neon_vset_lane_i16: 4476 case NEON::BI__builtin_neon_vset_lane_i32: 4477 case NEON::BI__builtin_neon_vset_lane_i64: 4478 case NEON::BI__builtin_neon_vset_lane_f32: 4479 case NEON::BI__builtin_neon_vsetq_lane_i8: 4480 case NEON::BI__builtin_neon_vsetq_lane_i16: 4481 case NEON::BI__builtin_neon_vsetq_lane_i32: 4482 case NEON::BI__builtin_neon_vsetq_lane_i64: 4483 case NEON::BI__builtin_neon_vsetq_lane_f32: 4484 Ops.push_back(EmitScalarExpr(E->getArg(2))); 4485 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 4486 case NEON::BI__builtin_neon_vset_lane_f64: 4487 // The vector type needs a cast for the v1f64 variant. 4488 Ops[1] = Builder.CreateBitCast(Ops[1], 4489 llvm::VectorType::get(DoubleTy, 1)); 4490 Ops.push_back(EmitScalarExpr(E->getArg(2))); 4491 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 4492 case NEON::BI__builtin_neon_vsetq_lane_f64: 4493 // The vector type needs a cast for the v2f64 variant. 4494 Ops[1] = Builder.CreateBitCast(Ops[1], 4495 llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2)); 4496 Ops.push_back(EmitScalarExpr(E->getArg(2))); 4497 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane"); 4498 4499 case NEON::BI__builtin_neon_vget_lane_i8: 4500 case NEON::BI__builtin_neon_vdupb_lane_i8: 4501 Ops[0] = Builder.CreateBitCast(Ops[0], 4502 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8)); 4503 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4504 "vget_lane"); 4505 case NEON::BI__builtin_neon_vgetq_lane_i8: 4506 case NEON::BI__builtin_neon_vdupb_laneq_i8: 4507 Ops[0] = Builder.CreateBitCast(Ops[0], 4508 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16)); 4509 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4510 "vgetq_lane"); 4511 case NEON::BI__builtin_neon_vget_lane_i16: 4512 case NEON::BI__builtin_neon_vduph_lane_i16: 4513 Ops[0] = Builder.CreateBitCast(Ops[0], 4514 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4)); 4515 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4516 "vget_lane"); 4517 case NEON::BI__builtin_neon_vgetq_lane_i16: 4518 case NEON::BI__builtin_neon_vduph_laneq_i16: 4519 Ops[0] = Builder.CreateBitCast(Ops[0], 4520 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8)); 4521 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4522 "vgetq_lane"); 4523 case NEON::BI__builtin_neon_vget_lane_i32: 4524 case NEON::BI__builtin_neon_vdups_lane_i32: 4525 Ops[0] = Builder.CreateBitCast( 4526 Ops[0], 4527 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 32), 2)); 4528 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4529 "vget_lane"); 4530 case NEON::BI__builtin_neon_vdups_lane_f32: 4531 Ops[0] = Builder.CreateBitCast(Ops[0], 4532 llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2)); 4533 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4534 "vdups_lane"); 4535 case NEON::BI__builtin_neon_vgetq_lane_i32: 4536 case NEON::BI__builtin_neon_vdups_laneq_i32: 4537 Ops[0] = Builder.CreateBitCast(Ops[0], 4538 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 32), 4)); 4539 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4540 "vgetq_lane"); 4541 case NEON::BI__builtin_neon_vget_lane_i64: 4542 case NEON::BI__builtin_neon_vdupd_lane_i64: 4543 Ops[0] = Builder.CreateBitCast(Ops[0], 4544 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 64), 1)); 4545 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4546 "vget_lane"); 4547 case NEON::BI__builtin_neon_vdupd_lane_f64: 4548 Ops[0] = Builder.CreateBitCast(Ops[0], 4549 llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 1)); 4550 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4551 "vdupd_lane"); 4552 case NEON::BI__builtin_neon_vgetq_lane_i64: 4553 case NEON::BI__builtin_neon_vdupd_laneq_i64: 4554 Ops[0] = Builder.CreateBitCast(Ops[0], 4555 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 64), 2)); 4556 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4557 "vgetq_lane"); 4558 case NEON::BI__builtin_neon_vget_lane_f32: 4559 Ops[0] = Builder.CreateBitCast(Ops[0], 4560 llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2)); 4561 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4562 "vget_lane"); 4563 case NEON::BI__builtin_neon_vget_lane_f64: 4564 Ops[0] = Builder.CreateBitCast(Ops[0], 4565 llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 1)); 4566 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4567 "vget_lane"); 4568 case NEON::BI__builtin_neon_vgetq_lane_f32: 4569 case NEON::BI__builtin_neon_vdups_laneq_f32: 4570 Ops[0] = Builder.CreateBitCast(Ops[0], 4571 llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 4)); 4572 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4573 "vgetq_lane"); 4574 case NEON::BI__builtin_neon_vgetq_lane_f64: 4575 case NEON::BI__builtin_neon_vdupd_laneq_f64: 4576 Ops[0] = Builder.CreateBitCast(Ops[0], 4577 llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2)); 4578 return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)), 4579 "vgetq_lane"); 4580 case NEON::BI__builtin_neon_vaddd_s64: 4581 case NEON::BI__builtin_neon_vaddd_u64: 4582 return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd"); 4583 case NEON::BI__builtin_neon_vsubd_s64: 4584 case NEON::BI__builtin_neon_vsubd_u64: 4585 return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd"); 4586 case NEON::BI__builtin_neon_vqdmlalh_s16: 4587 case NEON::BI__builtin_neon_vqdmlslh_s16: { 4588 SmallVector<Value *, 2> ProductOps; 4589 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 4590 ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2)))); 4591 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 4592 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 4593 ProductOps, "vqdmlXl"); 4594 Constant *CI = ConstantInt::get(SizeTy, 0); 4595 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 4596 4597 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16 4598 ? Intrinsic::aarch64_neon_sqadd 4599 : Intrinsic::aarch64_neon_sqsub; 4600 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl"); 4601 } 4602 case NEON::BI__builtin_neon_vqshlud_n_s64: { 4603 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4604 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 4605 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty), 4606 Ops, "vqshlu_n"); 4607 } 4608 case NEON::BI__builtin_neon_vqshld_n_u64: 4609 case NEON::BI__builtin_neon_vqshld_n_s64: { 4610 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64 4611 ? Intrinsic::aarch64_neon_uqshl 4612 : Intrinsic::aarch64_neon_sqshl; 4613 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4614 Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty); 4615 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n"); 4616 } 4617 case NEON::BI__builtin_neon_vrshrd_n_u64: 4618 case NEON::BI__builtin_neon_vrshrd_n_s64: { 4619 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64 4620 ? Intrinsic::aarch64_neon_urshl 4621 : Intrinsic::aarch64_neon_srshl; 4622 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4623 int SV = cast<ConstantInt>(Ops[1])->getSExtValue(); 4624 Ops[1] = ConstantInt::get(Int64Ty, -SV); 4625 return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n"); 4626 } 4627 case NEON::BI__builtin_neon_vrsrad_n_u64: 4628 case NEON::BI__builtin_neon_vrsrad_n_s64: { 4629 unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64 4630 ? Intrinsic::aarch64_neon_urshl 4631 : Intrinsic::aarch64_neon_srshl; 4632 Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty); 4633 Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2)))); 4634 Ops[1] = Builder.CreateCall2(CGM.getIntrinsic(Int, Int64Ty), Ops[1], 4635 Builder.CreateSExt(Ops[2], Int64Ty)); 4636 return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty)); 4637 } 4638 case NEON::BI__builtin_neon_vshld_n_s64: 4639 case NEON::BI__builtin_neon_vshld_n_u64: { 4640 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 4641 return Builder.CreateShl( 4642 Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n"); 4643 } 4644 case NEON::BI__builtin_neon_vshrd_n_s64: { 4645 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 4646 return Builder.CreateAShr( 4647 Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 4648 Amt->getZExtValue())), 4649 "shrd_n"); 4650 } 4651 case NEON::BI__builtin_neon_vshrd_n_u64: { 4652 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1))); 4653 uint64_t ShiftAmt = Amt->getZExtValue(); 4654 // Right-shifting an unsigned value by its size yields 0. 4655 if (ShiftAmt == 64) 4656 return ConstantInt::get(Int64Ty, 0); 4657 return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt), 4658 "shrd_n"); 4659 } 4660 case NEON::BI__builtin_neon_vsrad_n_s64: { 4661 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 4662 Ops[1] = Builder.CreateAShr( 4663 Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63), 4664 Amt->getZExtValue())), 4665 "shrd_n"); 4666 return Builder.CreateAdd(Ops[0], Ops[1]); 4667 } 4668 case NEON::BI__builtin_neon_vsrad_n_u64: { 4669 llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2))); 4670 uint64_t ShiftAmt = Amt->getZExtValue(); 4671 // Right-shifting an unsigned value by its size yields 0. 4672 // As Op + 0 = Op, return Ops[0] directly. 4673 if (ShiftAmt == 64) 4674 return Ops[0]; 4675 Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt), 4676 "shrd_n"); 4677 return Builder.CreateAdd(Ops[0], Ops[1]); 4678 } 4679 case NEON::BI__builtin_neon_vqdmlalh_lane_s16: 4680 case NEON::BI__builtin_neon_vqdmlalh_laneq_s16: 4681 case NEON::BI__builtin_neon_vqdmlslh_lane_s16: 4682 case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: { 4683 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 4684 "lane"); 4685 SmallVector<Value *, 2> ProductOps; 4686 ProductOps.push_back(vectorWrapScalar16(Ops[1])); 4687 ProductOps.push_back(vectorWrapScalar16(Ops[2])); 4688 llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4); 4689 Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy), 4690 ProductOps, "vqdmlXl"); 4691 Constant *CI = ConstantInt::get(SizeTy, 0); 4692 Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0"); 4693 Ops.pop_back(); 4694 4695 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 || 4696 BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16) 4697 ? Intrinsic::aarch64_neon_sqadd 4698 : Intrinsic::aarch64_neon_sqsub; 4699 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl"); 4700 } 4701 case NEON::BI__builtin_neon_vqdmlals_s32: 4702 case NEON::BI__builtin_neon_vqdmlsls_s32: { 4703 SmallVector<Value *, 2> ProductOps; 4704 ProductOps.push_back(Ops[1]); 4705 ProductOps.push_back(EmitScalarExpr(E->getArg(2))); 4706 Ops[1] = 4707 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 4708 ProductOps, "vqdmlXl"); 4709 4710 unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32 4711 ? Intrinsic::aarch64_neon_sqadd 4712 : Intrinsic::aarch64_neon_sqsub; 4713 return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl"); 4714 } 4715 case NEON::BI__builtin_neon_vqdmlals_lane_s32: 4716 case NEON::BI__builtin_neon_vqdmlals_laneq_s32: 4717 case NEON::BI__builtin_neon_vqdmlsls_lane_s32: 4718 case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: { 4719 Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)), 4720 "lane"); 4721 SmallVector<Value *, 2> ProductOps; 4722 ProductOps.push_back(Ops[1]); 4723 ProductOps.push_back(Ops[2]); 4724 Ops[1] = 4725 EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar), 4726 ProductOps, "vqdmlXl"); 4727 Ops.pop_back(); 4728 4729 unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 || 4730 BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32) 4731 ? Intrinsic::aarch64_neon_sqadd 4732 : Intrinsic::aarch64_neon_sqsub; 4733 return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl"); 4734 } 4735 } 4736 4737 llvm::VectorType *VTy = GetNeonType(this, Type); 4738 llvm::Type *Ty = VTy; 4739 if (!Ty) 4740 return nullptr; 4741 4742 // Not all intrinsics handled by the common case work for AArch64 yet, so only 4743 // defer to common code if it's been added to our special map. 4744 Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID, 4745 AArch64SIMDIntrinsicsProvenSorted); 4746 4747 if (Builtin) 4748 return EmitCommonNeonBuiltinExpr( 4749 Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic, 4750 Builtin->NameHint, Builtin->TypeModifier, E, Ops, nullptr); 4751 4752 if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops)) 4753 return V; 4754 4755 unsigned Int; 4756 switch (BuiltinID) { 4757 default: return nullptr; 4758 case NEON::BI__builtin_neon_vbsl_v: 4759 case NEON::BI__builtin_neon_vbslq_v: { 4760 llvm::Type *BitTy = llvm::VectorType::getInteger(VTy); 4761 Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl"); 4762 Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl"); 4763 Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl"); 4764 4765 Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl"); 4766 Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl"); 4767 Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl"); 4768 return Builder.CreateBitCast(Ops[0], Ty); 4769 } 4770 case NEON::BI__builtin_neon_vfma_lane_v: 4771 case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types 4772 // The ARM builtins (and instructions) have the addend as the first 4773 // operand, but the 'fma' intrinsics have it last. Swap it around here. 4774 Value *Addend = Ops[0]; 4775 Value *Multiplicand = Ops[1]; 4776 Value *LaneSource = Ops[2]; 4777 Ops[0] = Multiplicand; 4778 Ops[1] = LaneSource; 4779 Ops[2] = Addend; 4780 4781 // Now adjust things to handle the lane access. 4782 llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ? 4783 llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) : 4784 VTy; 4785 llvm::Constant *cst = cast<Constant>(Ops[3]); 4786 Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst); 4787 Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy); 4788 Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane"); 4789 4790 Ops.pop_back(); 4791 Int = Intrinsic::fma; 4792 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla"); 4793 } 4794 case NEON::BI__builtin_neon_vfma_laneq_v: { 4795 llvm::VectorType *VTy = cast<llvm::VectorType>(Ty); 4796 // v1f64 fma should be mapped to Neon scalar f64 fma 4797 if (VTy && VTy->getElementType() == DoubleTy) { 4798 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 4799 Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy); 4800 llvm::Type *VTy = GetNeonType(this, 4801 NeonTypeFlags(NeonTypeFlags::Float64, false, true)); 4802 Ops[2] = Builder.CreateBitCast(Ops[2], VTy); 4803 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 4804 Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy); 4805 Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); 4806 return Builder.CreateBitCast(Result, Ty); 4807 } 4808 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 4809 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4810 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4811 4812 llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(), 4813 VTy->getNumElements() * 2); 4814 Ops[2] = Builder.CreateBitCast(Ops[2], STy); 4815 Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), 4816 cast<ConstantInt>(Ops[3])); 4817 Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane"); 4818 4819 return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); 4820 } 4821 case NEON::BI__builtin_neon_vfmaq_laneq_v: { 4822 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 4823 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 4824 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 4825 4826 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 4827 Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3])); 4828 return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]); 4829 } 4830 case NEON::BI__builtin_neon_vfmas_lane_f32: 4831 case NEON::BI__builtin_neon_vfmas_laneq_f32: 4832 case NEON::BI__builtin_neon_vfmad_lane_f64: 4833 case NEON::BI__builtin_neon_vfmad_laneq_f64: { 4834 Ops.push_back(EmitScalarExpr(E->getArg(3))); 4835 llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext())); 4836 Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty); 4837 Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); 4838 return Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); 4839 } 4840 case NEON::BI__builtin_neon_vfms_v: 4841 case NEON::BI__builtin_neon_vfmsq_v: { // Only used for FP types 4842 // FIXME: probably remove when we no longer support aarch64_simd.h 4843 // (arm_neon.h delegates to vfma). 4844 4845 // The ARM builtins (and instructions) have the addend as the first 4846 // operand, but the 'fma' intrinsics have it last. Swap it around here. 4847 Value *Subtrahend = Ops[0]; 4848 Value *Multiplicand = Ops[2]; 4849 Ops[0] = Multiplicand; 4850 Ops[2] = Subtrahend; 4851 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 4852 Ops[1] = Builder.CreateFNeg(Ops[1]); 4853 Int = Intrinsic::fma; 4854 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmls"); 4855 } 4856 case NEON::BI__builtin_neon_vmull_v: 4857 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 4858 Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull; 4859 if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull; 4860 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull"); 4861 case NEON::BI__builtin_neon_vmax_v: 4862 case NEON::BI__builtin_neon_vmaxq_v: 4863 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 4864 Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax; 4865 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax; 4866 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax"); 4867 case NEON::BI__builtin_neon_vmin_v: 4868 case NEON::BI__builtin_neon_vminq_v: 4869 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 4870 Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin; 4871 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin; 4872 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin"); 4873 case NEON::BI__builtin_neon_vabd_v: 4874 case NEON::BI__builtin_neon_vabdq_v: 4875 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 4876 Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd; 4877 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd; 4878 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd"); 4879 case NEON::BI__builtin_neon_vpadal_v: 4880 case NEON::BI__builtin_neon_vpadalq_v: { 4881 unsigned ArgElts = VTy->getNumElements(); 4882 llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType()); 4883 unsigned BitWidth = EltTy->getBitWidth(); 4884 llvm::Type *ArgTy = llvm::VectorType::get( 4885 llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts); 4886 llvm::Type* Tys[2] = { VTy, ArgTy }; 4887 Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp; 4888 SmallVector<llvm::Value*, 1> TmpOps; 4889 TmpOps.push_back(Ops[1]); 4890 Function *F = CGM.getIntrinsic(Int, Tys); 4891 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal"); 4892 llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType()); 4893 return Builder.CreateAdd(tmp, addend); 4894 } 4895 case NEON::BI__builtin_neon_vpmin_v: 4896 case NEON::BI__builtin_neon_vpminq_v: 4897 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 4898 Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp; 4899 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp; 4900 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin"); 4901 case NEON::BI__builtin_neon_vpmax_v: 4902 case NEON::BI__builtin_neon_vpmaxq_v: 4903 // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics. 4904 Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp; 4905 if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp; 4906 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax"); 4907 case NEON::BI__builtin_neon_vminnm_v: 4908 case NEON::BI__builtin_neon_vminnmq_v: 4909 Int = Intrinsic::aarch64_neon_fminnm; 4910 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm"); 4911 case NEON::BI__builtin_neon_vmaxnm_v: 4912 case NEON::BI__builtin_neon_vmaxnmq_v: 4913 Int = Intrinsic::aarch64_neon_fmaxnm; 4914 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm"); 4915 case NEON::BI__builtin_neon_vrecpss_f32: { 4916 llvm::Type *f32Type = llvm::Type::getFloatTy(getLLVMContext()); 4917 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4918 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, f32Type), 4919 Ops, "vrecps"); 4920 } 4921 case NEON::BI__builtin_neon_vrecpsd_f64: { 4922 llvm::Type *f64Type = llvm::Type::getDoubleTy(getLLVMContext()); 4923 Ops.push_back(EmitScalarExpr(E->getArg(1))); 4924 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, f64Type), 4925 Ops, "vrecps"); 4926 } 4927 case NEON::BI__builtin_neon_vqshrun_n_v: 4928 Int = Intrinsic::aarch64_neon_sqshrun; 4929 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n"); 4930 case NEON::BI__builtin_neon_vqrshrun_n_v: 4931 Int = Intrinsic::aarch64_neon_sqrshrun; 4932 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n"); 4933 case NEON::BI__builtin_neon_vqshrn_n_v: 4934 Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn; 4935 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n"); 4936 case NEON::BI__builtin_neon_vrshrn_n_v: 4937 Int = Intrinsic::aarch64_neon_rshrn; 4938 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n"); 4939 case NEON::BI__builtin_neon_vqrshrn_n_v: 4940 Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn; 4941 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n"); 4942 case NEON::BI__builtin_neon_vrnda_v: 4943 case NEON::BI__builtin_neon_vrndaq_v: { 4944 Int = Intrinsic::round; 4945 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda"); 4946 } 4947 case NEON::BI__builtin_neon_vrndi_v: 4948 case NEON::BI__builtin_neon_vrndiq_v: { 4949 Int = Intrinsic::nearbyint; 4950 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi"); 4951 } 4952 case NEON::BI__builtin_neon_vrndm_v: 4953 case NEON::BI__builtin_neon_vrndmq_v: { 4954 Int = Intrinsic::floor; 4955 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm"); 4956 } 4957 case NEON::BI__builtin_neon_vrndn_v: 4958 case NEON::BI__builtin_neon_vrndnq_v: { 4959 Int = Intrinsic::aarch64_neon_frintn; 4960 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn"); 4961 } 4962 case NEON::BI__builtin_neon_vrndp_v: 4963 case NEON::BI__builtin_neon_vrndpq_v: { 4964 Int = Intrinsic::ceil; 4965 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp"); 4966 } 4967 case NEON::BI__builtin_neon_vrndx_v: 4968 case NEON::BI__builtin_neon_vrndxq_v: { 4969 Int = Intrinsic::rint; 4970 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx"); 4971 } 4972 case NEON::BI__builtin_neon_vrnd_v: 4973 case NEON::BI__builtin_neon_vrndq_v: { 4974 Int = Intrinsic::trunc; 4975 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz"); 4976 } 4977 case NEON::BI__builtin_neon_vceqz_v: 4978 case NEON::BI__builtin_neon_vceqzq_v: 4979 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ, 4980 ICmpInst::ICMP_EQ, "vceqz"); 4981 case NEON::BI__builtin_neon_vcgez_v: 4982 case NEON::BI__builtin_neon_vcgezq_v: 4983 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE, 4984 ICmpInst::ICMP_SGE, "vcgez"); 4985 case NEON::BI__builtin_neon_vclez_v: 4986 case NEON::BI__builtin_neon_vclezq_v: 4987 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE, 4988 ICmpInst::ICMP_SLE, "vclez"); 4989 case NEON::BI__builtin_neon_vcgtz_v: 4990 case NEON::BI__builtin_neon_vcgtzq_v: 4991 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT, 4992 ICmpInst::ICMP_SGT, "vcgtz"); 4993 case NEON::BI__builtin_neon_vcltz_v: 4994 case NEON::BI__builtin_neon_vcltzq_v: 4995 return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT, 4996 ICmpInst::ICMP_SLT, "vcltz"); 4997 case NEON::BI__builtin_neon_vcvt_f64_v: 4998 case NEON::BI__builtin_neon_vcvtq_f64_v: 4999 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5000 Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad)); 5001 return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 5002 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt"); 5003 case NEON::BI__builtin_neon_vcvt_f64_f32: { 5004 assert(Type.getEltType() == NeonTypeFlags::Float64 && quad && 5005 "unexpected vcvt_f64_f32 builtin"); 5006 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false); 5007 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 5008 5009 return Builder.CreateFPExt(Ops[0], Ty, "vcvt"); 5010 } 5011 case NEON::BI__builtin_neon_vcvt_f32_f64: { 5012 assert(Type.getEltType() == NeonTypeFlags::Float32 && 5013 "unexpected vcvt_f32_f64 builtin"); 5014 NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true); 5015 Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag)); 5016 5017 return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt"); 5018 } 5019 case NEON::BI__builtin_neon_vcvt_s32_v: 5020 case NEON::BI__builtin_neon_vcvt_u32_v: 5021 case NEON::BI__builtin_neon_vcvt_s64_v: 5022 case NEON::BI__builtin_neon_vcvt_u64_v: 5023 case NEON::BI__builtin_neon_vcvtq_s32_v: 5024 case NEON::BI__builtin_neon_vcvtq_u32_v: 5025 case NEON::BI__builtin_neon_vcvtq_s64_v: 5026 case NEON::BI__builtin_neon_vcvtq_u64_v: { 5027 bool Double = 5028 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 5029 llvm::Type *InTy = 5030 GetNeonType(this, 5031 NeonTypeFlags(Double ? NeonTypeFlags::Float64 5032 : NeonTypeFlags::Float32, false, quad)); 5033 Ops[0] = Builder.CreateBitCast(Ops[0], InTy); 5034 if (usgn) 5035 return Builder.CreateFPToUI(Ops[0], Ty); 5036 return Builder.CreateFPToSI(Ops[0], Ty); 5037 } 5038 case NEON::BI__builtin_neon_vcvta_s32_v: 5039 case NEON::BI__builtin_neon_vcvtaq_s32_v: 5040 case NEON::BI__builtin_neon_vcvta_u32_v: 5041 case NEON::BI__builtin_neon_vcvtaq_u32_v: 5042 case NEON::BI__builtin_neon_vcvta_s64_v: 5043 case NEON::BI__builtin_neon_vcvtaq_s64_v: 5044 case NEON::BI__builtin_neon_vcvta_u64_v: 5045 case NEON::BI__builtin_neon_vcvtaq_u64_v: { 5046 Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas; 5047 bool Double = 5048 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 5049 llvm::Type *InTy = 5050 GetNeonType(this, 5051 NeonTypeFlags(Double ? NeonTypeFlags::Float64 5052 : NeonTypeFlags::Float32, false, quad)); 5053 llvm::Type *Tys[2] = { Ty, InTy }; 5054 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta"); 5055 } 5056 case NEON::BI__builtin_neon_vcvtm_s32_v: 5057 case NEON::BI__builtin_neon_vcvtmq_s32_v: 5058 case NEON::BI__builtin_neon_vcvtm_u32_v: 5059 case NEON::BI__builtin_neon_vcvtmq_u32_v: 5060 case NEON::BI__builtin_neon_vcvtm_s64_v: 5061 case NEON::BI__builtin_neon_vcvtmq_s64_v: 5062 case NEON::BI__builtin_neon_vcvtm_u64_v: 5063 case NEON::BI__builtin_neon_vcvtmq_u64_v: { 5064 Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms; 5065 bool Double = 5066 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 5067 llvm::Type *InTy = 5068 GetNeonType(this, 5069 NeonTypeFlags(Double ? NeonTypeFlags::Float64 5070 : NeonTypeFlags::Float32, false, quad)); 5071 llvm::Type *Tys[2] = { Ty, InTy }; 5072 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm"); 5073 } 5074 case NEON::BI__builtin_neon_vcvtn_s32_v: 5075 case NEON::BI__builtin_neon_vcvtnq_s32_v: 5076 case NEON::BI__builtin_neon_vcvtn_u32_v: 5077 case NEON::BI__builtin_neon_vcvtnq_u32_v: 5078 case NEON::BI__builtin_neon_vcvtn_s64_v: 5079 case NEON::BI__builtin_neon_vcvtnq_s64_v: 5080 case NEON::BI__builtin_neon_vcvtn_u64_v: 5081 case NEON::BI__builtin_neon_vcvtnq_u64_v: { 5082 Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns; 5083 bool Double = 5084 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 5085 llvm::Type *InTy = 5086 GetNeonType(this, 5087 NeonTypeFlags(Double ? NeonTypeFlags::Float64 5088 : NeonTypeFlags::Float32, false, quad)); 5089 llvm::Type *Tys[2] = { Ty, InTy }; 5090 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn"); 5091 } 5092 case NEON::BI__builtin_neon_vcvtp_s32_v: 5093 case NEON::BI__builtin_neon_vcvtpq_s32_v: 5094 case NEON::BI__builtin_neon_vcvtp_u32_v: 5095 case NEON::BI__builtin_neon_vcvtpq_u32_v: 5096 case NEON::BI__builtin_neon_vcvtp_s64_v: 5097 case NEON::BI__builtin_neon_vcvtpq_s64_v: 5098 case NEON::BI__builtin_neon_vcvtp_u64_v: 5099 case NEON::BI__builtin_neon_vcvtpq_u64_v: { 5100 Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps; 5101 bool Double = 5102 (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64); 5103 llvm::Type *InTy = 5104 GetNeonType(this, 5105 NeonTypeFlags(Double ? NeonTypeFlags::Float64 5106 : NeonTypeFlags::Float32, false, quad)); 5107 llvm::Type *Tys[2] = { Ty, InTy }; 5108 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp"); 5109 } 5110 case NEON::BI__builtin_neon_vmulx_v: 5111 case NEON::BI__builtin_neon_vmulxq_v: { 5112 Int = Intrinsic::aarch64_neon_fmulx; 5113 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx"); 5114 } 5115 case NEON::BI__builtin_neon_vmul_lane_v: 5116 case NEON::BI__builtin_neon_vmul_laneq_v: { 5117 // v1f64 vmul_lane should be mapped to Neon scalar mul lane 5118 bool Quad = false; 5119 if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v) 5120 Quad = true; 5121 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 5122 llvm::Type *VTy = GetNeonType(this, 5123 NeonTypeFlags(NeonTypeFlags::Float64, false, Quad)); 5124 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 5125 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract"); 5126 Value *Result = Builder.CreateFMul(Ops[0], Ops[1]); 5127 return Builder.CreateBitCast(Result, Ty); 5128 } 5129 case NEON::BI__builtin_neon_vnegd_s64: 5130 return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd"); 5131 case NEON::BI__builtin_neon_vpmaxnm_v: 5132 case NEON::BI__builtin_neon_vpmaxnmq_v: { 5133 Int = Intrinsic::aarch64_neon_fmaxnmp; 5134 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm"); 5135 } 5136 case NEON::BI__builtin_neon_vpminnm_v: 5137 case NEON::BI__builtin_neon_vpminnmq_v: { 5138 Int = Intrinsic::aarch64_neon_fminnmp; 5139 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm"); 5140 } 5141 case NEON::BI__builtin_neon_vsqrt_v: 5142 case NEON::BI__builtin_neon_vsqrtq_v: { 5143 Int = Intrinsic::sqrt; 5144 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5145 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt"); 5146 } 5147 case NEON::BI__builtin_neon_vrbit_v: 5148 case NEON::BI__builtin_neon_vrbitq_v: { 5149 Int = Intrinsic::aarch64_neon_rbit; 5150 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit"); 5151 } 5152 case NEON::BI__builtin_neon_vaddv_u8: 5153 // FIXME: These are handled by the AArch64 scalar code. 5154 usgn = true; 5155 // FALLTHROUGH 5156 case NEON::BI__builtin_neon_vaddv_s8: { 5157 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 5158 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5159 VTy = 5160 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 5161 llvm::Type *Tys[2] = { Ty, VTy }; 5162 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5163 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 5164 return Builder.CreateTrunc(Ops[0], 5165 llvm::IntegerType::get(getLLVMContext(), 8)); 5166 } 5167 case NEON::BI__builtin_neon_vaddv_u16: 5168 usgn = true; 5169 // FALLTHROUGH 5170 case NEON::BI__builtin_neon_vaddv_s16: { 5171 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 5172 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5173 VTy = 5174 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 5175 llvm::Type *Tys[2] = { Ty, VTy }; 5176 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5177 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 5178 return Builder.CreateTrunc(Ops[0], 5179 llvm::IntegerType::get(getLLVMContext(), 16)); 5180 } 5181 case NEON::BI__builtin_neon_vaddvq_u8: 5182 usgn = true; 5183 // FALLTHROUGH 5184 case NEON::BI__builtin_neon_vaddvq_s8: { 5185 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 5186 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5187 VTy = 5188 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 5189 llvm::Type *Tys[2] = { Ty, VTy }; 5190 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5191 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 5192 return Builder.CreateTrunc(Ops[0], 5193 llvm::IntegerType::get(getLLVMContext(), 8)); 5194 } 5195 case NEON::BI__builtin_neon_vaddvq_u16: 5196 usgn = true; 5197 // FALLTHROUGH 5198 case NEON::BI__builtin_neon_vaddvq_s16: { 5199 Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv; 5200 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5201 VTy = 5202 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5203 llvm::Type *Tys[2] = { Ty, VTy }; 5204 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5205 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv"); 5206 return Builder.CreateTrunc(Ops[0], 5207 llvm::IntegerType::get(getLLVMContext(), 16)); 5208 } 5209 case NEON::BI__builtin_neon_vmaxv_u8: { 5210 Int = Intrinsic::aarch64_neon_umaxv; 5211 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5212 VTy = 5213 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 5214 llvm::Type *Tys[2] = { Ty, VTy }; 5215 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5216 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5217 return Builder.CreateTrunc(Ops[0], 5218 llvm::IntegerType::get(getLLVMContext(), 8)); 5219 } 5220 case NEON::BI__builtin_neon_vmaxv_u16: { 5221 Int = Intrinsic::aarch64_neon_umaxv; 5222 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5223 VTy = 5224 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 5225 llvm::Type *Tys[2] = { Ty, VTy }; 5226 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5227 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5228 return Builder.CreateTrunc(Ops[0], 5229 llvm::IntegerType::get(getLLVMContext(), 16)); 5230 } 5231 case NEON::BI__builtin_neon_vmaxvq_u8: { 5232 Int = Intrinsic::aarch64_neon_umaxv; 5233 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5234 VTy = 5235 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 5236 llvm::Type *Tys[2] = { Ty, VTy }; 5237 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5238 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5239 return Builder.CreateTrunc(Ops[0], 5240 llvm::IntegerType::get(getLLVMContext(), 8)); 5241 } 5242 case NEON::BI__builtin_neon_vmaxvq_u16: { 5243 Int = Intrinsic::aarch64_neon_umaxv; 5244 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5245 VTy = 5246 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5247 llvm::Type *Tys[2] = { Ty, VTy }; 5248 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5249 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5250 return Builder.CreateTrunc(Ops[0], 5251 llvm::IntegerType::get(getLLVMContext(), 16)); 5252 } 5253 case NEON::BI__builtin_neon_vmaxv_s8: { 5254 Int = Intrinsic::aarch64_neon_smaxv; 5255 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5256 VTy = 5257 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 5258 llvm::Type *Tys[2] = { Ty, VTy }; 5259 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5260 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5261 return Builder.CreateTrunc(Ops[0], 5262 llvm::IntegerType::get(getLLVMContext(), 8)); 5263 } 5264 case NEON::BI__builtin_neon_vmaxv_s16: { 5265 Int = Intrinsic::aarch64_neon_smaxv; 5266 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5267 VTy = 5268 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 5269 llvm::Type *Tys[2] = { Ty, VTy }; 5270 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5271 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5272 return Builder.CreateTrunc(Ops[0], 5273 llvm::IntegerType::get(getLLVMContext(), 16)); 5274 } 5275 case NEON::BI__builtin_neon_vmaxvq_s8: { 5276 Int = Intrinsic::aarch64_neon_smaxv; 5277 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5278 VTy = 5279 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 5280 llvm::Type *Tys[2] = { Ty, VTy }; 5281 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5282 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5283 return Builder.CreateTrunc(Ops[0], 5284 llvm::IntegerType::get(getLLVMContext(), 8)); 5285 } 5286 case NEON::BI__builtin_neon_vmaxvq_s16: { 5287 Int = Intrinsic::aarch64_neon_smaxv; 5288 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5289 VTy = 5290 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5291 llvm::Type *Tys[2] = { Ty, VTy }; 5292 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5293 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv"); 5294 return Builder.CreateTrunc(Ops[0], 5295 llvm::IntegerType::get(getLLVMContext(), 16)); 5296 } 5297 case NEON::BI__builtin_neon_vminv_u8: { 5298 Int = Intrinsic::aarch64_neon_uminv; 5299 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5300 VTy = 5301 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 5302 llvm::Type *Tys[2] = { Ty, VTy }; 5303 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5304 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5305 return Builder.CreateTrunc(Ops[0], 5306 llvm::IntegerType::get(getLLVMContext(), 8)); 5307 } 5308 case NEON::BI__builtin_neon_vminv_u16: { 5309 Int = Intrinsic::aarch64_neon_uminv; 5310 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5311 VTy = 5312 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 5313 llvm::Type *Tys[2] = { Ty, VTy }; 5314 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5315 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5316 return Builder.CreateTrunc(Ops[0], 5317 llvm::IntegerType::get(getLLVMContext(), 16)); 5318 } 5319 case NEON::BI__builtin_neon_vminvq_u8: { 5320 Int = Intrinsic::aarch64_neon_uminv; 5321 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5322 VTy = 5323 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 5324 llvm::Type *Tys[2] = { Ty, VTy }; 5325 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5326 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5327 return Builder.CreateTrunc(Ops[0], 5328 llvm::IntegerType::get(getLLVMContext(), 8)); 5329 } 5330 case NEON::BI__builtin_neon_vminvq_u16: { 5331 Int = Intrinsic::aarch64_neon_uminv; 5332 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5333 VTy = 5334 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5335 llvm::Type *Tys[2] = { Ty, VTy }; 5336 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5337 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5338 return Builder.CreateTrunc(Ops[0], 5339 llvm::IntegerType::get(getLLVMContext(), 16)); 5340 } 5341 case NEON::BI__builtin_neon_vminv_s8: { 5342 Int = Intrinsic::aarch64_neon_sminv; 5343 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5344 VTy = 5345 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 5346 llvm::Type *Tys[2] = { Ty, VTy }; 5347 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5348 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5349 return Builder.CreateTrunc(Ops[0], 5350 llvm::IntegerType::get(getLLVMContext(), 8)); 5351 } 5352 case NEON::BI__builtin_neon_vminv_s16: { 5353 Int = Intrinsic::aarch64_neon_sminv; 5354 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5355 VTy = 5356 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 5357 llvm::Type *Tys[2] = { Ty, VTy }; 5358 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5359 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5360 return Builder.CreateTrunc(Ops[0], 5361 llvm::IntegerType::get(getLLVMContext(), 16)); 5362 } 5363 case NEON::BI__builtin_neon_vminvq_s8: { 5364 Int = Intrinsic::aarch64_neon_sminv; 5365 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5366 VTy = 5367 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 5368 llvm::Type *Tys[2] = { Ty, VTy }; 5369 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5370 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5371 return Builder.CreateTrunc(Ops[0], 5372 llvm::IntegerType::get(getLLVMContext(), 8)); 5373 } 5374 case NEON::BI__builtin_neon_vminvq_s16: { 5375 Int = Intrinsic::aarch64_neon_sminv; 5376 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5377 VTy = 5378 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5379 llvm::Type *Tys[2] = { Ty, VTy }; 5380 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5381 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv"); 5382 return Builder.CreateTrunc(Ops[0], 5383 llvm::IntegerType::get(getLLVMContext(), 16)); 5384 } 5385 case NEON::BI__builtin_neon_vmul_n_f64: { 5386 Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy); 5387 Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy); 5388 return Builder.CreateFMul(Ops[0], RHS); 5389 } 5390 case NEON::BI__builtin_neon_vaddlv_u8: { 5391 Int = Intrinsic::aarch64_neon_uaddlv; 5392 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5393 VTy = 5394 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 5395 llvm::Type *Tys[2] = { Ty, VTy }; 5396 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5397 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5398 return Builder.CreateTrunc(Ops[0], 5399 llvm::IntegerType::get(getLLVMContext(), 16)); 5400 } 5401 case NEON::BI__builtin_neon_vaddlv_u16: { 5402 Int = Intrinsic::aarch64_neon_uaddlv; 5403 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5404 VTy = 5405 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 5406 llvm::Type *Tys[2] = { Ty, VTy }; 5407 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5408 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5409 } 5410 case NEON::BI__builtin_neon_vaddlvq_u8: { 5411 Int = Intrinsic::aarch64_neon_uaddlv; 5412 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5413 VTy = 5414 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 5415 llvm::Type *Tys[2] = { Ty, VTy }; 5416 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5417 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5418 return Builder.CreateTrunc(Ops[0], 5419 llvm::IntegerType::get(getLLVMContext(), 16)); 5420 } 5421 case NEON::BI__builtin_neon_vaddlvq_u16: { 5422 Int = Intrinsic::aarch64_neon_uaddlv; 5423 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5424 VTy = 5425 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5426 llvm::Type *Tys[2] = { Ty, VTy }; 5427 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5428 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5429 } 5430 case NEON::BI__builtin_neon_vaddlv_s8: { 5431 Int = Intrinsic::aarch64_neon_saddlv; 5432 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5433 VTy = 5434 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8); 5435 llvm::Type *Tys[2] = { Ty, VTy }; 5436 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5437 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5438 return Builder.CreateTrunc(Ops[0], 5439 llvm::IntegerType::get(getLLVMContext(), 16)); 5440 } 5441 case NEON::BI__builtin_neon_vaddlv_s16: { 5442 Int = Intrinsic::aarch64_neon_saddlv; 5443 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5444 VTy = 5445 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4); 5446 llvm::Type *Tys[2] = { Ty, VTy }; 5447 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5448 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5449 } 5450 case NEON::BI__builtin_neon_vaddlvq_s8: { 5451 Int = Intrinsic::aarch64_neon_saddlv; 5452 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5453 VTy = 5454 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16); 5455 llvm::Type *Tys[2] = { Ty, VTy }; 5456 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5457 Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5458 return Builder.CreateTrunc(Ops[0], 5459 llvm::IntegerType::get(getLLVMContext(), 16)); 5460 } 5461 case NEON::BI__builtin_neon_vaddlvq_s16: { 5462 Int = Intrinsic::aarch64_neon_saddlv; 5463 Ty = llvm::IntegerType::get(getLLVMContext(), 32); 5464 VTy = 5465 llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8); 5466 llvm::Type *Tys[2] = { Ty, VTy }; 5467 Ops.push_back(EmitScalarExpr(E->getArg(0))); 5468 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv"); 5469 } 5470 case NEON::BI__builtin_neon_vsri_n_v: 5471 case NEON::BI__builtin_neon_vsriq_n_v: { 5472 Int = Intrinsic::aarch64_neon_vsri; 5473 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 5474 return EmitNeonCall(Intrin, Ops, "vsri_n"); 5475 } 5476 case NEON::BI__builtin_neon_vsli_n_v: 5477 case NEON::BI__builtin_neon_vsliq_n_v: { 5478 Int = Intrinsic::aarch64_neon_vsli; 5479 llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty); 5480 return EmitNeonCall(Intrin, Ops, "vsli_n"); 5481 } 5482 case NEON::BI__builtin_neon_vsra_n_v: 5483 case NEON::BI__builtin_neon_vsraq_n_v: 5484 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5485 Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n"); 5486 return Builder.CreateAdd(Ops[0], Ops[1]); 5487 case NEON::BI__builtin_neon_vrsra_n_v: 5488 case NEON::BI__builtin_neon_vrsraq_n_v: { 5489 Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl; 5490 SmallVector<llvm::Value*,2> TmpOps; 5491 TmpOps.push_back(Ops[1]); 5492 TmpOps.push_back(Ops[2]); 5493 Function* F = CGM.getIntrinsic(Int, Ty); 5494 llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true); 5495 Ops[0] = Builder.CreateBitCast(Ops[0], VTy); 5496 return Builder.CreateAdd(Ops[0], tmp); 5497 } 5498 // FIXME: Sharing loads & stores with 32-bit is complicated by the absence 5499 // of an Align parameter here. 5500 case NEON::BI__builtin_neon_vld1_x2_v: 5501 case NEON::BI__builtin_neon_vld1q_x2_v: 5502 case NEON::BI__builtin_neon_vld1_x3_v: 5503 case NEON::BI__builtin_neon_vld1q_x3_v: 5504 case NEON::BI__builtin_neon_vld1_x4_v: 5505 case NEON::BI__builtin_neon_vld1q_x4_v: { 5506 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 5507 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5508 llvm::Type *Tys[2] = { VTy, PTy }; 5509 unsigned Int; 5510 switch (BuiltinID) { 5511 case NEON::BI__builtin_neon_vld1_x2_v: 5512 case NEON::BI__builtin_neon_vld1q_x2_v: 5513 Int = Intrinsic::aarch64_neon_ld1x2; 5514 break; 5515 case NEON::BI__builtin_neon_vld1_x3_v: 5516 case NEON::BI__builtin_neon_vld1q_x3_v: 5517 Int = Intrinsic::aarch64_neon_ld1x3; 5518 break; 5519 case NEON::BI__builtin_neon_vld1_x4_v: 5520 case NEON::BI__builtin_neon_vld1q_x4_v: 5521 Int = Intrinsic::aarch64_neon_ld1x4; 5522 break; 5523 } 5524 Function *F = CGM.getIntrinsic(Int, Tys); 5525 Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN"); 5526 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5527 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5528 return Builder.CreateStore(Ops[1], Ops[0]); 5529 } 5530 case NEON::BI__builtin_neon_vst1_x2_v: 5531 case NEON::BI__builtin_neon_vst1q_x2_v: 5532 case NEON::BI__builtin_neon_vst1_x3_v: 5533 case NEON::BI__builtin_neon_vst1q_x3_v: 5534 case NEON::BI__builtin_neon_vst1_x4_v: 5535 case NEON::BI__builtin_neon_vst1q_x4_v: { 5536 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType()); 5537 llvm::Type *Tys[2] = { VTy, PTy }; 5538 unsigned Int; 5539 switch (BuiltinID) { 5540 case NEON::BI__builtin_neon_vst1_x2_v: 5541 case NEON::BI__builtin_neon_vst1q_x2_v: 5542 Int = Intrinsic::aarch64_neon_st1x2; 5543 break; 5544 case NEON::BI__builtin_neon_vst1_x3_v: 5545 case NEON::BI__builtin_neon_vst1q_x3_v: 5546 Int = Intrinsic::aarch64_neon_st1x3; 5547 break; 5548 case NEON::BI__builtin_neon_vst1_x4_v: 5549 case NEON::BI__builtin_neon_vst1q_x4_v: 5550 Int = Intrinsic::aarch64_neon_st1x4; 5551 break; 5552 } 5553 SmallVector<Value *, 4> IntOps(Ops.begin()+1, Ops.end()); 5554 IntOps.push_back(Ops[0]); 5555 return EmitNeonCall(CGM.getIntrinsic(Int, Tys), IntOps, ""); 5556 } 5557 case NEON::BI__builtin_neon_vld1_v: 5558 case NEON::BI__builtin_neon_vld1q_v: 5559 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 5560 return Builder.CreateLoad(Ops[0]); 5561 case NEON::BI__builtin_neon_vst1_v: 5562 case NEON::BI__builtin_neon_vst1q_v: 5563 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy)); 5564 Ops[1] = Builder.CreateBitCast(Ops[1], VTy); 5565 return Builder.CreateStore(Ops[1], Ops[0]); 5566 case NEON::BI__builtin_neon_vld1_lane_v: 5567 case NEON::BI__builtin_neon_vld1q_lane_v: 5568 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5569 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 5570 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5571 Ops[0] = Builder.CreateLoad(Ops[0]); 5572 return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane"); 5573 case NEON::BI__builtin_neon_vld1_dup_v: 5574 case NEON::BI__builtin_neon_vld1q_dup_v: { 5575 Value *V = UndefValue::get(Ty); 5576 Ty = llvm::PointerType::getUnqual(VTy->getElementType()); 5577 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5578 Ops[0] = Builder.CreateLoad(Ops[0]); 5579 llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); 5580 Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI); 5581 return EmitNeonSplat(Ops[0], CI); 5582 } 5583 case NEON::BI__builtin_neon_vst1_lane_v: 5584 case NEON::BI__builtin_neon_vst1q_lane_v: 5585 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5586 Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); 5587 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5588 return Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty)); 5589 case NEON::BI__builtin_neon_vld2_v: 5590 case NEON::BI__builtin_neon_vld2q_v: { 5591 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 5592 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5593 llvm::Type *Tys[2] = { VTy, PTy }; 5594 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys); 5595 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 5596 Ops[0] = Builder.CreateBitCast(Ops[0], 5597 llvm::PointerType::getUnqual(Ops[1]->getType())); 5598 return Builder.CreateStore(Ops[1], Ops[0]); 5599 } 5600 case NEON::BI__builtin_neon_vld3_v: 5601 case NEON::BI__builtin_neon_vld3q_v: { 5602 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 5603 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5604 llvm::Type *Tys[2] = { VTy, PTy }; 5605 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys); 5606 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 5607 Ops[0] = Builder.CreateBitCast(Ops[0], 5608 llvm::PointerType::getUnqual(Ops[1]->getType())); 5609 return Builder.CreateStore(Ops[1], Ops[0]); 5610 } 5611 case NEON::BI__builtin_neon_vld4_v: 5612 case NEON::BI__builtin_neon_vld4q_v: { 5613 llvm::Type *PTy = llvm::PointerType::getUnqual(VTy); 5614 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5615 llvm::Type *Tys[2] = { VTy, PTy }; 5616 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys); 5617 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 5618 Ops[0] = Builder.CreateBitCast(Ops[0], 5619 llvm::PointerType::getUnqual(Ops[1]->getType())); 5620 return Builder.CreateStore(Ops[1], Ops[0]); 5621 } 5622 case NEON::BI__builtin_neon_vld2_dup_v: 5623 case NEON::BI__builtin_neon_vld2q_dup_v: { 5624 llvm::Type *PTy = 5625 llvm::PointerType::getUnqual(VTy->getElementType()); 5626 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5627 llvm::Type *Tys[2] = { VTy, PTy }; 5628 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys); 5629 Ops[1] = Builder.CreateCall(F, Ops[1], "vld2"); 5630 Ops[0] = Builder.CreateBitCast(Ops[0], 5631 llvm::PointerType::getUnqual(Ops[1]->getType())); 5632 return Builder.CreateStore(Ops[1], Ops[0]); 5633 } 5634 case NEON::BI__builtin_neon_vld3_dup_v: 5635 case NEON::BI__builtin_neon_vld3q_dup_v: { 5636 llvm::Type *PTy = 5637 llvm::PointerType::getUnqual(VTy->getElementType()); 5638 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5639 llvm::Type *Tys[2] = { VTy, PTy }; 5640 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys); 5641 Ops[1] = Builder.CreateCall(F, Ops[1], "vld3"); 5642 Ops[0] = Builder.CreateBitCast(Ops[0], 5643 llvm::PointerType::getUnqual(Ops[1]->getType())); 5644 return Builder.CreateStore(Ops[1], Ops[0]); 5645 } 5646 case NEON::BI__builtin_neon_vld4_dup_v: 5647 case NEON::BI__builtin_neon_vld4q_dup_v: { 5648 llvm::Type *PTy = 5649 llvm::PointerType::getUnqual(VTy->getElementType()); 5650 Ops[1] = Builder.CreateBitCast(Ops[1], PTy); 5651 llvm::Type *Tys[2] = { VTy, PTy }; 5652 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys); 5653 Ops[1] = Builder.CreateCall(F, Ops[1], "vld4"); 5654 Ops[0] = Builder.CreateBitCast(Ops[0], 5655 llvm::PointerType::getUnqual(Ops[1]->getType())); 5656 return Builder.CreateStore(Ops[1], Ops[0]); 5657 } 5658 case NEON::BI__builtin_neon_vld2_lane_v: 5659 case NEON::BI__builtin_neon_vld2q_lane_v: { 5660 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 5661 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys); 5662 Ops.push_back(Ops[1]); 5663 Ops.erase(Ops.begin()+1); 5664 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5665 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5666 Ops[3] = Builder.CreateZExt(Ops[3], 5667 llvm::IntegerType::get(getLLVMContext(), 64)); 5668 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane"); 5669 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5670 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5671 return Builder.CreateStore(Ops[1], Ops[0]); 5672 } 5673 case NEON::BI__builtin_neon_vld3_lane_v: 5674 case NEON::BI__builtin_neon_vld3q_lane_v: { 5675 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 5676 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys); 5677 Ops.push_back(Ops[1]); 5678 Ops.erase(Ops.begin()+1); 5679 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5680 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5681 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 5682 Ops[4] = Builder.CreateZExt(Ops[4], 5683 llvm::IntegerType::get(getLLVMContext(), 64)); 5684 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane"); 5685 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5686 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5687 return Builder.CreateStore(Ops[1], Ops[0]); 5688 } 5689 case NEON::BI__builtin_neon_vld4_lane_v: 5690 case NEON::BI__builtin_neon_vld4q_lane_v: { 5691 llvm::Type *Tys[2] = { VTy, Ops[1]->getType() }; 5692 Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys); 5693 Ops.push_back(Ops[1]); 5694 Ops.erase(Ops.begin()+1); 5695 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5696 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5697 Ops[3] = Builder.CreateBitCast(Ops[3], Ty); 5698 Ops[4] = Builder.CreateBitCast(Ops[4], Ty); 5699 Ops[5] = Builder.CreateZExt(Ops[5], 5700 llvm::IntegerType::get(getLLVMContext(), 64)); 5701 Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane"); 5702 Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); 5703 Ops[0] = Builder.CreateBitCast(Ops[0], Ty); 5704 return Builder.CreateStore(Ops[1], Ops[0]); 5705 } 5706 case NEON::BI__builtin_neon_vst2_v: 5707 case NEON::BI__builtin_neon_vst2q_v: { 5708 Ops.push_back(Ops[0]); 5709 Ops.erase(Ops.begin()); 5710 llvm::Type *Tys[2] = { VTy, Ops[2]->getType() }; 5711 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys), 5712 Ops, ""); 5713 } 5714 case NEON::BI__builtin_neon_vst2_lane_v: 5715 case NEON::BI__builtin_neon_vst2q_lane_v: { 5716 Ops.push_back(Ops[0]); 5717 Ops.erase(Ops.begin()); 5718 Ops[2] = Builder.CreateZExt(Ops[2], 5719 llvm::IntegerType::get(getLLVMContext(), 64)); 5720 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 5721 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys), 5722 Ops, ""); 5723 } 5724 case NEON::BI__builtin_neon_vst3_v: 5725 case NEON::BI__builtin_neon_vst3q_v: { 5726 Ops.push_back(Ops[0]); 5727 Ops.erase(Ops.begin()); 5728 llvm::Type *Tys[2] = { VTy, Ops[3]->getType() }; 5729 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys), 5730 Ops, ""); 5731 } 5732 case NEON::BI__builtin_neon_vst3_lane_v: 5733 case NEON::BI__builtin_neon_vst3q_lane_v: { 5734 Ops.push_back(Ops[0]); 5735 Ops.erase(Ops.begin()); 5736 Ops[3] = Builder.CreateZExt(Ops[3], 5737 llvm::IntegerType::get(getLLVMContext(), 64)); 5738 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 5739 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys), 5740 Ops, ""); 5741 } 5742 case NEON::BI__builtin_neon_vst4_v: 5743 case NEON::BI__builtin_neon_vst4q_v: { 5744 Ops.push_back(Ops[0]); 5745 Ops.erase(Ops.begin()); 5746 llvm::Type *Tys[2] = { VTy, Ops[4]->getType() }; 5747 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys), 5748 Ops, ""); 5749 } 5750 case NEON::BI__builtin_neon_vst4_lane_v: 5751 case NEON::BI__builtin_neon_vst4q_lane_v: { 5752 Ops.push_back(Ops[0]); 5753 Ops.erase(Ops.begin()); 5754 Ops[4] = Builder.CreateZExt(Ops[4], 5755 llvm::IntegerType::get(getLLVMContext(), 64)); 5756 llvm::Type *Tys[2] = { VTy, Ops[5]->getType() }; 5757 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys), 5758 Ops, ""); 5759 } 5760 case NEON::BI__builtin_neon_vtrn_v: 5761 case NEON::BI__builtin_neon_vtrnq_v: { 5762 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 5763 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5764 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5765 Value *SV = nullptr; 5766 5767 for (unsigned vi = 0; vi != 2; ++vi) { 5768 SmallVector<Constant*, 16> Indices; 5769 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 5770 Indices.push_back(ConstantInt::get(Int32Ty, i+vi)); 5771 Indices.push_back(ConstantInt::get(Int32Ty, i+e+vi)); 5772 } 5773 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 5774 SV = llvm::ConstantVector::get(Indices); 5775 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn"); 5776 SV = Builder.CreateStore(SV, Addr); 5777 } 5778 return SV; 5779 } 5780 case NEON::BI__builtin_neon_vuzp_v: 5781 case NEON::BI__builtin_neon_vuzpq_v: { 5782 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 5783 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5784 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5785 Value *SV = nullptr; 5786 5787 for (unsigned vi = 0; vi != 2; ++vi) { 5788 SmallVector<Constant*, 16> Indices; 5789 for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) 5790 Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi)); 5791 5792 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 5793 SV = llvm::ConstantVector::get(Indices); 5794 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp"); 5795 SV = Builder.CreateStore(SV, Addr); 5796 } 5797 return SV; 5798 } 5799 case NEON::BI__builtin_neon_vzip_v: 5800 case NEON::BI__builtin_neon_vzipq_v: { 5801 Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); 5802 Ops[1] = Builder.CreateBitCast(Ops[1], Ty); 5803 Ops[2] = Builder.CreateBitCast(Ops[2], Ty); 5804 Value *SV = nullptr; 5805 5806 for (unsigned vi = 0; vi != 2; ++vi) { 5807 SmallVector<Constant*, 16> Indices; 5808 for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) { 5809 Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1)); 5810 Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e)); 5811 } 5812 Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi); 5813 SV = llvm::ConstantVector::get(Indices); 5814 SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip"); 5815 SV = Builder.CreateStore(SV, Addr); 5816 } 5817 return SV; 5818 } 5819 case NEON::BI__builtin_neon_vqtbl1q_v: { 5820 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty), 5821 Ops, "vtbl1"); 5822 } 5823 case NEON::BI__builtin_neon_vqtbl2q_v: { 5824 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty), 5825 Ops, "vtbl2"); 5826 } 5827 case NEON::BI__builtin_neon_vqtbl3q_v: { 5828 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty), 5829 Ops, "vtbl3"); 5830 } 5831 case NEON::BI__builtin_neon_vqtbl4q_v: { 5832 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty), 5833 Ops, "vtbl4"); 5834 } 5835 case NEON::BI__builtin_neon_vqtbx1q_v: { 5836 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty), 5837 Ops, "vtbx1"); 5838 } 5839 case NEON::BI__builtin_neon_vqtbx2q_v: { 5840 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty), 5841 Ops, "vtbx2"); 5842 } 5843 case NEON::BI__builtin_neon_vqtbx3q_v: { 5844 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty), 5845 Ops, "vtbx3"); 5846 } 5847 case NEON::BI__builtin_neon_vqtbx4q_v: { 5848 return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty), 5849 Ops, "vtbx4"); 5850 } 5851 case NEON::BI__builtin_neon_vsqadd_v: 5852 case NEON::BI__builtin_neon_vsqaddq_v: { 5853 Int = Intrinsic::aarch64_neon_usqadd; 5854 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd"); 5855 } 5856 case NEON::BI__builtin_neon_vuqadd_v: 5857 case NEON::BI__builtin_neon_vuqaddq_v: { 5858 Int = Intrinsic::aarch64_neon_suqadd; 5859 return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); 5860 } 5861 } 5862 } 5863 5864 llvm::Value *CodeGenFunction:: 5865 BuildVector(ArrayRef<llvm::Value*> Ops) { 5866 assert((Ops.size() & (Ops.size() - 1)) == 0 && 5867 "Not a power-of-two sized vector!"); 5868 bool AllConstants = true; 5869 for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i) 5870 AllConstants &= isa<Constant>(Ops[i]); 5871 5872 // If this is a constant vector, create a ConstantVector. 5873 if (AllConstants) { 5874 SmallVector<llvm::Constant*, 16> CstOps; 5875 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 5876 CstOps.push_back(cast<Constant>(Ops[i])); 5877 return llvm::ConstantVector::get(CstOps); 5878 } 5879 5880 // Otherwise, insertelement the values to build the vector. 5881 Value *Result = 5882 llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size())); 5883 5884 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 5885 Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i)); 5886 5887 return Result; 5888 } 5889 5890 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, 5891 const CallExpr *E) { 5892 SmallVector<Value*, 4> Ops; 5893 5894 // Find out if any arguments are required to be integer constant expressions. 5895 unsigned ICEArguments = 0; 5896 ASTContext::GetBuiltinTypeError Error; 5897 getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); 5898 assert(Error == ASTContext::GE_None && "Should not codegen an error"); 5899 5900 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { 5901 // If this is a normal argument, just emit it as a scalar. 5902 if ((ICEArguments & (1 << i)) == 0) { 5903 Ops.push_back(EmitScalarExpr(E->getArg(i))); 5904 continue; 5905 } 5906 5907 // If this is required to be a constant, constant fold it so that we know 5908 // that the generated intrinsic gets a ConstantInt. 5909 llvm::APSInt Result; 5910 bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext()); 5911 assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst; 5912 Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result)); 5913 } 5914 5915 switch (BuiltinID) { 5916 default: return nullptr; 5917 case X86::BI_mm_prefetch: { 5918 Value *Address = EmitScalarExpr(E->getArg(0)); 5919 Value *RW = ConstantInt::get(Int32Ty, 0); 5920 Value *Locality = EmitScalarExpr(E->getArg(1)); 5921 Value *Data = ConstantInt::get(Int32Ty, 1); 5922 Value *F = CGM.getIntrinsic(Intrinsic::prefetch); 5923 return Builder.CreateCall4(F, Address, RW, Locality, Data); 5924 } 5925 case X86::BI__builtin_ia32_vec_init_v8qi: 5926 case X86::BI__builtin_ia32_vec_init_v4hi: 5927 case X86::BI__builtin_ia32_vec_init_v2si: 5928 return Builder.CreateBitCast(BuildVector(Ops), 5929 llvm::Type::getX86_MMXTy(getLLVMContext())); 5930 case X86::BI__builtin_ia32_vec_ext_v2si: 5931 return Builder.CreateExtractElement(Ops[0], 5932 llvm::ConstantInt::get(Ops[1]->getType(), 0)); 5933 case X86::BI__builtin_ia32_ldmxcsr: { 5934 Value *Tmp = CreateMemTemp(E->getArg(0)->getType()); 5935 Builder.CreateStore(Ops[0], Tmp); 5936 return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr), 5937 Builder.CreateBitCast(Tmp, Int8PtrTy)); 5938 } 5939 case X86::BI__builtin_ia32_stmxcsr: { 5940 Value *Tmp = CreateMemTemp(E->getType()); 5941 Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr), 5942 Builder.CreateBitCast(Tmp, Int8PtrTy)); 5943 return Builder.CreateLoad(Tmp, "stmxcsr"); 5944 } 5945 case X86::BI__builtin_ia32_storehps: 5946 case X86::BI__builtin_ia32_storelps: { 5947 llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty); 5948 llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2); 5949 5950 // cast val v2i64 5951 Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast"); 5952 5953 // extract (0, 1) 5954 unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; 5955 llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index); 5956 Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract"); 5957 5958 // cast pointer to i64 & store 5959 Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); 5960 return Builder.CreateStore(Ops[1], Ops[0]); 5961 } 5962 case X86::BI__builtin_ia32_palignr128: 5963 case X86::BI__builtin_ia32_palignr256: { 5964 unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); 5965 5966 unsigned NumElts = 5967 cast<llvm::VectorType>(Ops[0]->getType())->getNumElements(); 5968 assert(NumElts % 16 == 0); 5969 unsigned NumLanes = NumElts / 16; 5970 unsigned NumLaneElts = NumElts / NumLanes; 5971 5972 // If palignr is shifting the pair of vectors more than the size of two 5973 // lanes, emit zero. 5974 if (ShiftVal >= (2 * NumLaneElts)) 5975 return llvm::Constant::getNullValue(ConvertType(E->getType())); 5976 5977 // If palignr is shifting the pair of input vectors more than one lane, 5978 // but less than two lanes, convert to shifting in zeroes. 5979 if (ShiftVal > NumLaneElts) { 5980 ShiftVal -= NumLaneElts; 5981 Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType()); 5982 } 5983 5984 SmallVector<llvm::Constant*, 32> Indices; 5985 // 256-bit palignr operates on 128-bit lanes so we need to handle that 5986 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 5987 for (unsigned i = 0; i != NumLaneElts; ++i) { 5988 unsigned Idx = ShiftVal + i; 5989 if (Idx >= NumLaneElts) 5990 Idx += NumElts - NumLaneElts; // End of lane, switch operand. 5991 Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l)); 5992 } 5993 } 5994 5995 Value* SV = llvm::ConstantVector::get(Indices); 5996 return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr"); 5997 } 5998 case X86::BI__builtin_ia32_pslldqi256: { 5999 // Shift value is in bits so divide by 8. 6000 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3; 6001 6002 // If pslldq is shifting the vector more than 15 bytes, emit zero. 6003 if (shiftVal >= 16) 6004 return llvm::Constant::getNullValue(ConvertType(E->getType())); 6005 6006 SmallVector<llvm::Constant*, 32> Indices; 6007 // 256-bit pslldq operates on 128-bit lanes so we need to handle that 6008 for (unsigned l = 0; l != 32; l += 16) { 6009 for (unsigned i = 0; i != 16; ++i) { 6010 unsigned Idx = 32 + i - shiftVal; 6011 if (Idx < 32) Idx -= 16; // end of lane, switch operand. 6012 Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l)); 6013 } 6014 } 6015 6016 llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32); 6017 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 6018 Value *Zero = llvm::Constant::getNullValue(VecTy); 6019 6020 Value *SV = llvm::ConstantVector::get(Indices); 6021 SV = Builder.CreateShuffleVector(Zero, Ops[0], SV, "pslldq"); 6022 llvm::Type *ResultType = ConvertType(E->getType()); 6023 return Builder.CreateBitCast(SV, ResultType, "cast"); 6024 } 6025 case X86::BI__builtin_ia32_psrldqi256: { 6026 // Shift value is in bits so divide by 8. 6027 unsigned shiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() >> 3; 6028 6029 // If psrldq is shifting the vector more than 15 bytes, emit zero. 6030 if (shiftVal >= 16) 6031 return llvm::Constant::getNullValue(ConvertType(E->getType())); 6032 6033 SmallVector<llvm::Constant*, 32> Indices; 6034 // 256-bit psrldq operates on 128-bit lanes so we need to handle that 6035 for (unsigned l = 0; l != 32; l += 16) { 6036 for (unsigned i = 0; i != 16; ++i) { 6037 unsigned Idx = i + shiftVal; 6038 if (Idx >= 16) Idx += 16; // end of lane, switch operand. 6039 Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l)); 6040 } 6041 } 6042 6043 llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, 32); 6044 Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast"); 6045 Value *Zero = llvm::Constant::getNullValue(VecTy); 6046 6047 Value *SV = llvm::ConstantVector::get(Indices); 6048 SV = Builder.CreateShuffleVector(Ops[0], Zero, SV, "psrldq"); 6049 llvm::Type *ResultType = ConvertType(E->getType()); 6050 return Builder.CreateBitCast(SV, ResultType, "cast"); 6051 } 6052 case X86::BI__builtin_ia32_movntps: 6053 case X86::BI__builtin_ia32_movntps256: 6054 case X86::BI__builtin_ia32_movntpd: 6055 case X86::BI__builtin_ia32_movntpd256: 6056 case X86::BI__builtin_ia32_movntdq: 6057 case X86::BI__builtin_ia32_movntdq256: 6058 case X86::BI__builtin_ia32_movnti: 6059 case X86::BI__builtin_ia32_movnti64: { 6060 llvm::MDNode *Node = llvm::MDNode::get( 6061 getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1))); 6062 6063 // Convert the type of the pointer to a pointer to the stored type. 6064 Value *BC = Builder.CreateBitCast(Ops[0], 6065 llvm::PointerType::getUnqual(Ops[1]->getType()), 6066 "cast"); 6067 StoreInst *SI = Builder.CreateStore(Ops[1], BC); 6068 SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); 6069 6070 // If the operand is an integer, we can't assume alignment. Otherwise, 6071 // assume natural alignment. 6072 QualType ArgTy = E->getArg(1)->getType(); 6073 unsigned Align; 6074 if (ArgTy->isIntegerType()) 6075 Align = 1; 6076 else 6077 Align = getContext().getTypeSizeInChars(ArgTy).getQuantity(); 6078 SI->setAlignment(Align); 6079 return SI; 6080 } 6081 // 3DNow! 6082 case X86::BI__builtin_ia32_pswapdsf: 6083 case X86::BI__builtin_ia32_pswapdsi: { 6084 llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext()); 6085 Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast"); 6086 llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd); 6087 return Builder.CreateCall(F, Ops, "pswapd"); 6088 } 6089 case X86::BI__builtin_ia32_rdrand16_step: 6090 case X86::BI__builtin_ia32_rdrand32_step: 6091 case X86::BI__builtin_ia32_rdrand64_step: 6092 case X86::BI__builtin_ia32_rdseed16_step: 6093 case X86::BI__builtin_ia32_rdseed32_step: 6094 case X86::BI__builtin_ia32_rdseed64_step: { 6095 Intrinsic::ID ID; 6096 switch (BuiltinID) { 6097 default: llvm_unreachable("Unsupported intrinsic!"); 6098 case X86::BI__builtin_ia32_rdrand16_step: 6099 ID = Intrinsic::x86_rdrand_16; 6100 break; 6101 case X86::BI__builtin_ia32_rdrand32_step: 6102 ID = Intrinsic::x86_rdrand_32; 6103 break; 6104 case X86::BI__builtin_ia32_rdrand64_step: 6105 ID = Intrinsic::x86_rdrand_64; 6106 break; 6107 case X86::BI__builtin_ia32_rdseed16_step: 6108 ID = Intrinsic::x86_rdseed_16; 6109 break; 6110 case X86::BI__builtin_ia32_rdseed32_step: 6111 ID = Intrinsic::x86_rdseed_32; 6112 break; 6113 case X86::BI__builtin_ia32_rdseed64_step: 6114 ID = Intrinsic::x86_rdseed_64; 6115 break; 6116 } 6117 6118 Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID)); 6119 Builder.CreateStore(Builder.CreateExtractValue(Call, 0), Ops[0]); 6120 return Builder.CreateExtractValue(Call, 1); 6121 } 6122 // SSE comparison intrisics 6123 case X86::BI__builtin_ia32_cmpeqps: 6124 case X86::BI__builtin_ia32_cmpltps: 6125 case X86::BI__builtin_ia32_cmpleps: 6126 case X86::BI__builtin_ia32_cmpunordps: 6127 case X86::BI__builtin_ia32_cmpneqps: 6128 case X86::BI__builtin_ia32_cmpnltps: 6129 case X86::BI__builtin_ia32_cmpnleps: 6130 case X86::BI__builtin_ia32_cmpordps: 6131 case X86::BI__builtin_ia32_cmpeqss: 6132 case X86::BI__builtin_ia32_cmpltss: 6133 case X86::BI__builtin_ia32_cmpless: 6134 case X86::BI__builtin_ia32_cmpunordss: 6135 case X86::BI__builtin_ia32_cmpneqss: 6136 case X86::BI__builtin_ia32_cmpnltss: 6137 case X86::BI__builtin_ia32_cmpnless: 6138 case X86::BI__builtin_ia32_cmpordss: 6139 case X86::BI__builtin_ia32_cmpeqpd: 6140 case X86::BI__builtin_ia32_cmpltpd: 6141 case X86::BI__builtin_ia32_cmplepd: 6142 case X86::BI__builtin_ia32_cmpunordpd: 6143 case X86::BI__builtin_ia32_cmpneqpd: 6144 case X86::BI__builtin_ia32_cmpnltpd: 6145 case X86::BI__builtin_ia32_cmpnlepd: 6146 case X86::BI__builtin_ia32_cmpordpd: 6147 case X86::BI__builtin_ia32_cmpeqsd: 6148 case X86::BI__builtin_ia32_cmpltsd: 6149 case X86::BI__builtin_ia32_cmplesd: 6150 case X86::BI__builtin_ia32_cmpunordsd: 6151 case X86::BI__builtin_ia32_cmpneqsd: 6152 case X86::BI__builtin_ia32_cmpnltsd: 6153 case X86::BI__builtin_ia32_cmpnlesd: 6154 case X86::BI__builtin_ia32_cmpordsd: 6155 // These exist so that the builtin that takes an immediate can be bounds 6156 // checked by clang to avoid passing bad immediates to the backend. Since 6157 // AVX has a larger immediate than SSE we would need separate builtins to 6158 // do the different bounds checking. Rather than create a clang specific 6159 // SSE only builtin, this implements eight separate builtins to match gcc 6160 // implementation. 6161 6162 // Choose the immediate. 6163 unsigned Imm; 6164 switch (BuiltinID) { 6165 default: llvm_unreachable("Unsupported intrinsic!"); 6166 case X86::BI__builtin_ia32_cmpeqps: 6167 case X86::BI__builtin_ia32_cmpeqss: 6168 case X86::BI__builtin_ia32_cmpeqpd: 6169 case X86::BI__builtin_ia32_cmpeqsd: 6170 Imm = 0; 6171 break; 6172 case X86::BI__builtin_ia32_cmpltps: 6173 case X86::BI__builtin_ia32_cmpltss: 6174 case X86::BI__builtin_ia32_cmpltpd: 6175 case X86::BI__builtin_ia32_cmpltsd: 6176 Imm = 1; 6177 break; 6178 case X86::BI__builtin_ia32_cmpleps: 6179 case X86::BI__builtin_ia32_cmpless: 6180 case X86::BI__builtin_ia32_cmplepd: 6181 case X86::BI__builtin_ia32_cmplesd: 6182 Imm = 2; 6183 break; 6184 case X86::BI__builtin_ia32_cmpunordps: 6185 case X86::BI__builtin_ia32_cmpunordss: 6186 case X86::BI__builtin_ia32_cmpunordpd: 6187 case X86::BI__builtin_ia32_cmpunordsd: 6188 Imm = 3; 6189 break; 6190 case X86::BI__builtin_ia32_cmpneqps: 6191 case X86::BI__builtin_ia32_cmpneqss: 6192 case X86::BI__builtin_ia32_cmpneqpd: 6193 case X86::BI__builtin_ia32_cmpneqsd: 6194 Imm = 4; 6195 break; 6196 case X86::BI__builtin_ia32_cmpnltps: 6197 case X86::BI__builtin_ia32_cmpnltss: 6198 case X86::BI__builtin_ia32_cmpnltpd: 6199 case X86::BI__builtin_ia32_cmpnltsd: 6200 Imm = 5; 6201 break; 6202 case X86::BI__builtin_ia32_cmpnleps: 6203 case X86::BI__builtin_ia32_cmpnless: 6204 case X86::BI__builtin_ia32_cmpnlepd: 6205 case X86::BI__builtin_ia32_cmpnlesd: 6206 Imm = 6; 6207 break; 6208 case X86::BI__builtin_ia32_cmpordps: 6209 case X86::BI__builtin_ia32_cmpordss: 6210 case X86::BI__builtin_ia32_cmpordpd: 6211 case X86::BI__builtin_ia32_cmpordsd: 6212 Imm = 7; 6213 break; 6214 } 6215 6216 // Choose the intrinsic ID. 6217 const char *name; 6218 Intrinsic::ID ID; 6219 switch (BuiltinID) { 6220 default: llvm_unreachable("Unsupported intrinsic!"); 6221 case X86::BI__builtin_ia32_cmpeqps: 6222 case X86::BI__builtin_ia32_cmpltps: 6223 case X86::BI__builtin_ia32_cmpleps: 6224 case X86::BI__builtin_ia32_cmpunordps: 6225 case X86::BI__builtin_ia32_cmpneqps: 6226 case X86::BI__builtin_ia32_cmpnltps: 6227 case X86::BI__builtin_ia32_cmpnleps: 6228 case X86::BI__builtin_ia32_cmpordps: 6229 name = "cmpps"; 6230 ID = Intrinsic::x86_sse_cmp_ps; 6231 break; 6232 case X86::BI__builtin_ia32_cmpeqss: 6233 case X86::BI__builtin_ia32_cmpltss: 6234 case X86::BI__builtin_ia32_cmpless: 6235 case X86::BI__builtin_ia32_cmpunordss: 6236 case X86::BI__builtin_ia32_cmpneqss: 6237 case X86::BI__builtin_ia32_cmpnltss: 6238 case X86::BI__builtin_ia32_cmpnless: 6239 case X86::BI__builtin_ia32_cmpordss: 6240 name = "cmpss"; 6241 ID = Intrinsic::x86_sse_cmp_ss; 6242 break; 6243 case X86::BI__builtin_ia32_cmpeqpd: 6244 case X86::BI__builtin_ia32_cmpltpd: 6245 case X86::BI__builtin_ia32_cmplepd: 6246 case X86::BI__builtin_ia32_cmpunordpd: 6247 case X86::BI__builtin_ia32_cmpneqpd: 6248 case X86::BI__builtin_ia32_cmpnltpd: 6249 case X86::BI__builtin_ia32_cmpnlepd: 6250 case X86::BI__builtin_ia32_cmpordpd: 6251 name = "cmppd"; 6252 ID = Intrinsic::x86_sse2_cmp_pd; 6253 break; 6254 case X86::BI__builtin_ia32_cmpeqsd: 6255 case X86::BI__builtin_ia32_cmpltsd: 6256 case X86::BI__builtin_ia32_cmplesd: 6257 case X86::BI__builtin_ia32_cmpunordsd: 6258 case X86::BI__builtin_ia32_cmpneqsd: 6259 case X86::BI__builtin_ia32_cmpnltsd: 6260 case X86::BI__builtin_ia32_cmpnlesd: 6261 case X86::BI__builtin_ia32_cmpordsd: 6262 name = "cmpsd"; 6263 ID = Intrinsic::x86_sse2_cmp_sd; 6264 break; 6265 } 6266 6267 Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm)); 6268 llvm::Function *F = CGM.getIntrinsic(ID); 6269 return Builder.CreateCall(F, Ops, name); 6270 } 6271 } 6272 6273 6274 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, 6275 const CallExpr *E) { 6276 SmallVector<Value*, 4> Ops; 6277 6278 for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) 6279 Ops.push_back(EmitScalarExpr(E->getArg(i))); 6280 6281 Intrinsic::ID ID = Intrinsic::not_intrinsic; 6282 6283 switch (BuiltinID) { 6284 default: return nullptr; 6285 6286 // vec_ld, vec_lvsl, vec_lvsr 6287 case PPC::BI__builtin_altivec_lvx: 6288 case PPC::BI__builtin_altivec_lvxl: 6289 case PPC::BI__builtin_altivec_lvebx: 6290 case PPC::BI__builtin_altivec_lvehx: 6291 case PPC::BI__builtin_altivec_lvewx: 6292 case PPC::BI__builtin_altivec_lvsl: 6293 case PPC::BI__builtin_altivec_lvsr: 6294 case PPC::BI__builtin_vsx_lxvd2x: 6295 case PPC::BI__builtin_vsx_lxvw4x: 6296 { 6297 Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy); 6298 6299 Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]); 6300 Ops.pop_back(); 6301 6302 switch (BuiltinID) { 6303 default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!"); 6304 case PPC::BI__builtin_altivec_lvx: 6305 ID = Intrinsic::ppc_altivec_lvx; 6306 break; 6307 case PPC::BI__builtin_altivec_lvxl: 6308 ID = Intrinsic::ppc_altivec_lvxl; 6309 break; 6310 case PPC::BI__builtin_altivec_lvebx: 6311 ID = Intrinsic::ppc_altivec_lvebx; 6312 break; 6313 case PPC::BI__builtin_altivec_lvehx: 6314 ID = Intrinsic::ppc_altivec_lvehx; 6315 break; 6316 case PPC::BI__builtin_altivec_lvewx: 6317 ID = Intrinsic::ppc_altivec_lvewx; 6318 break; 6319 case PPC::BI__builtin_altivec_lvsl: 6320 ID = Intrinsic::ppc_altivec_lvsl; 6321 break; 6322 case PPC::BI__builtin_altivec_lvsr: 6323 ID = Intrinsic::ppc_altivec_lvsr; 6324 break; 6325 case PPC::BI__builtin_vsx_lxvd2x: 6326 ID = Intrinsic::ppc_vsx_lxvd2x; 6327 break; 6328 case PPC::BI__builtin_vsx_lxvw4x: 6329 ID = Intrinsic::ppc_vsx_lxvw4x; 6330 break; 6331 } 6332 llvm::Function *F = CGM.getIntrinsic(ID); 6333 return Builder.CreateCall(F, Ops, ""); 6334 } 6335 6336 // vec_st 6337 case PPC::BI__builtin_altivec_stvx: 6338 case PPC::BI__builtin_altivec_stvxl: 6339 case PPC::BI__builtin_altivec_stvebx: 6340 case PPC::BI__builtin_altivec_stvehx: 6341 case PPC::BI__builtin_altivec_stvewx: 6342 case PPC::BI__builtin_vsx_stxvd2x: 6343 case PPC::BI__builtin_vsx_stxvw4x: 6344 { 6345 Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy); 6346 Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]); 6347 Ops.pop_back(); 6348 6349 switch (BuiltinID) { 6350 default: llvm_unreachable("Unsupported st intrinsic!"); 6351 case PPC::BI__builtin_altivec_stvx: 6352 ID = Intrinsic::ppc_altivec_stvx; 6353 break; 6354 case PPC::BI__builtin_altivec_stvxl: 6355 ID = Intrinsic::ppc_altivec_stvxl; 6356 break; 6357 case PPC::BI__builtin_altivec_stvebx: 6358 ID = Intrinsic::ppc_altivec_stvebx; 6359 break; 6360 case PPC::BI__builtin_altivec_stvehx: 6361 ID = Intrinsic::ppc_altivec_stvehx; 6362 break; 6363 case PPC::BI__builtin_altivec_stvewx: 6364 ID = Intrinsic::ppc_altivec_stvewx; 6365 break; 6366 case PPC::BI__builtin_vsx_stxvd2x: 6367 ID = Intrinsic::ppc_vsx_stxvd2x; 6368 break; 6369 case PPC::BI__builtin_vsx_stxvw4x: 6370 ID = Intrinsic::ppc_vsx_stxvw4x; 6371 break; 6372 } 6373 llvm::Function *F = CGM.getIntrinsic(ID); 6374 return Builder.CreateCall(F, Ops, ""); 6375 } 6376 } 6377 } 6378 6379 // Emit an intrinsic that has 1 float or double. 6380 static Value *emitUnaryFPBuiltin(CodeGenFunction &CGF, 6381 const CallExpr *E, 6382 unsigned IntrinsicID) { 6383 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 6384 6385 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 6386 return CGF.Builder.CreateCall(F, Src0); 6387 } 6388 6389 // Emit an intrinsic that has 3 float or double operands. 6390 static Value *emitTernaryFPBuiltin(CodeGenFunction &CGF, 6391 const CallExpr *E, 6392 unsigned IntrinsicID) { 6393 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 6394 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 6395 llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); 6396 6397 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 6398 return CGF.Builder.CreateCall3(F, Src0, Src1, Src2); 6399 } 6400 6401 // Emit an intrinsic that has 1 float or double operand, and 1 integer. 6402 static Value *emitFPIntBuiltin(CodeGenFunction &CGF, 6403 const CallExpr *E, 6404 unsigned IntrinsicID) { 6405 llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); 6406 llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); 6407 6408 Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); 6409 return CGF.Builder.CreateCall2(F, Src0, Src1); 6410 } 6411 6412 Value *CodeGenFunction::EmitR600BuiltinExpr(unsigned BuiltinID, 6413 const CallExpr *E) { 6414 switch (BuiltinID) { 6415 case R600::BI__builtin_amdgpu_div_scale: 6416 case R600::BI__builtin_amdgpu_div_scalef: { 6417 // Translate from the intrinsics's struct return to the builtin's out 6418 // argument. 6419 6420 std::pair<llvm::Value *, unsigned> FlagOutPtr 6421 = EmitPointerWithAlignment(E->getArg(3)); 6422 6423 llvm::Value *X = EmitScalarExpr(E->getArg(0)); 6424 llvm::Value *Y = EmitScalarExpr(E->getArg(1)); 6425 llvm::Value *Z = EmitScalarExpr(E->getArg(2)); 6426 6427 llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::AMDGPU_div_scale, 6428 X->getType()); 6429 6430 llvm::Value *Tmp = Builder.CreateCall3(Callee, X, Y, Z); 6431 6432 llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0); 6433 llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1); 6434 6435 llvm::Type *RealFlagType 6436 = FlagOutPtr.first->getType()->getPointerElementType(); 6437 6438 llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType); 6439 llvm::StoreInst *FlagStore = Builder.CreateStore(FlagExt, FlagOutPtr.first); 6440 FlagStore->setAlignment(FlagOutPtr.second); 6441 return Result; 6442 } 6443 case R600::BI__builtin_amdgpu_div_fmas: 6444 case R600::BI__builtin_amdgpu_div_fmasf: { 6445 llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); 6446 llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); 6447 llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); 6448 llvm::Value *Src3 = EmitScalarExpr(E->getArg(3)); 6449 6450 llvm::Value *F = CGM.getIntrinsic(Intrinsic::AMDGPU_div_fmas, 6451 Src0->getType()); 6452 llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3); 6453 return Builder.CreateCall4(F, Src0, Src1, Src2, Src3ToBool); 6454 } 6455 case R600::BI__builtin_amdgpu_div_fixup: 6456 case R600::BI__builtin_amdgpu_div_fixupf: 6457 return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fixup); 6458 case R600::BI__builtin_amdgpu_trig_preop: 6459 case R600::BI__builtin_amdgpu_trig_preopf: 6460 return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_trig_preop); 6461 case R600::BI__builtin_amdgpu_rcp: 6462 case R600::BI__builtin_amdgpu_rcpf: 6463 return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rcp); 6464 case R600::BI__builtin_amdgpu_rsq: 6465 case R600::BI__builtin_amdgpu_rsqf: 6466 return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq); 6467 case R600::BI__builtin_amdgpu_rsq_clamped: 6468 case R600::BI__builtin_amdgpu_rsq_clampedf: 6469 return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq_clamped); 6470 case R600::BI__builtin_amdgpu_ldexp: 6471 case R600::BI__builtin_amdgpu_ldexpf: 6472 return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp); 6473 case R600::BI__builtin_amdgpu_class: 6474 case R600::BI__builtin_amdgpu_classf: 6475 return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_class); 6476 default: 6477 return nullptr; 6478 } 6479 } 6480 6481 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID, 6482 const CallExpr *E) { 6483 switch (BuiltinID) { 6484 case SystemZ::BI__builtin_tbegin: { 6485 Value *TDB = EmitScalarExpr(E->getArg(0)); 6486 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 6487 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin); 6488 return Builder.CreateCall2(F, TDB, Control); 6489 } 6490 case SystemZ::BI__builtin_tbegin_nofloat: { 6491 Value *TDB = EmitScalarExpr(E->getArg(0)); 6492 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c); 6493 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat); 6494 return Builder.CreateCall2(F, TDB, Control); 6495 } 6496 case SystemZ::BI__builtin_tbeginc: { 6497 Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy); 6498 Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08); 6499 Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc); 6500 return Builder.CreateCall2(F, TDB, Control); 6501 } 6502 case SystemZ::BI__builtin_tabort: { 6503 Value *Data = EmitScalarExpr(E->getArg(0)); 6504 Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort); 6505 return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort")); 6506 } 6507 case SystemZ::BI__builtin_non_tx_store: { 6508 Value *Address = EmitScalarExpr(E->getArg(0)); 6509 Value *Data = EmitScalarExpr(E->getArg(1)); 6510 Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg); 6511 return Builder.CreateCall2(F, Data, Address); 6512 } 6513 6514 default: 6515 return nullptr; 6516 } 6517 } 6518