Home | History | Annotate | Download | only in CodeGen
      1 //===--- CGAtomic.cpp - Emit LLVM IR for atomic operations ----------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains the code for emitting atomic operations.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "CodeGenFunction.h"
     15 #include "CGCall.h"
     16 #include "CodeGenModule.h"
     17 #include "clang/AST/ASTContext.h"
     18 #include "llvm/ADT/StringExtras.h"
     19 #include "llvm/IR/DataLayout.h"
     20 #include "llvm/IR/Intrinsics.h"
     21 #include "llvm/IR/Operator.h"
     22 
     23 using namespace clang;
     24 using namespace CodeGen;
     25 
     26 // The ABI values for various atomic memory orderings.
     27 enum AtomicOrderingKind {
     28   AO_ABI_memory_order_relaxed = 0,
     29   AO_ABI_memory_order_consume = 1,
     30   AO_ABI_memory_order_acquire = 2,
     31   AO_ABI_memory_order_release = 3,
     32   AO_ABI_memory_order_acq_rel = 4,
     33   AO_ABI_memory_order_seq_cst = 5
     34 };
     35 
     36 namespace {
     37   class AtomicInfo {
     38     CodeGenFunction &CGF;
     39     QualType AtomicTy;
     40     QualType ValueTy;
     41     uint64_t AtomicSizeInBits;
     42     uint64_t ValueSizeInBits;
     43     CharUnits AtomicAlign;
     44     CharUnits ValueAlign;
     45     CharUnits LValueAlign;
     46     TypeEvaluationKind EvaluationKind;
     47     bool UseLibcall;
     48   public:
     49     AtomicInfo(CodeGenFunction &CGF, LValue &lvalue) : CGF(CGF) {
     50       assert(lvalue.isSimple());
     51 
     52       AtomicTy = lvalue.getType();
     53       ValueTy = AtomicTy->castAs<AtomicType>()->getValueType();
     54       EvaluationKind = CGF.getEvaluationKind(ValueTy);
     55 
     56       ASTContext &C = CGF.getContext();
     57 
     58       uint64_t valueAlignInBits;
     59       llvm::tie(ValueSizeInBits, valueAlignInBits) = C.getTypeInfo(ValueTy);
     60 
     61       uint64_t atomicAlignInBits;
     62       llvm::tie(AtomicSizeInBits, atomicAlignInBits) = C.getTypeInfo(AtomicTy);
     63 
     64       assert(ValueSizeInBits <= AtomicSizeInBits);
     65       assert(valueAlignInBits <= atomicAlignInBits);
     66 
     67       AtomicAlign = C.toCharUnitsFromBits(atomicAlignInBits);
     68       ValueAlign = C.toCharUnitsFromBits(valueAlignInBits);
     69       if (lvalue.getAlignment().isZero())
     70         lvalue.setAlignment(AtomicAlign);
     71 
     72       UseLibcall =
     73         (AtomicSizeInBits > uint64_t(C.toBits(lvalue.getAlignment())) ||
     74          AtomicSizeInBits > C.getTargetInfo().getMaxAtomicInlineWidth());
     75     }
     76 
     77     QualType getAtomicType() const { return AtomicTy; }
     78     QualType getValueType() const { return ValueTy; }
     79     CharUnits getAtomicAlignment() const { return AtomicAlign; }
     80     CharUnits getValueAlignment() const { return ValueAlign; }
     81     uint64_t getAtomicSizeInBits() const { return AtomicSizeInBits; }
     82     uint64_t getValueSizeInBits() const { return AtomicSizeInBits; }
     83     TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; }
     84     bool shouldUseLibcall() const { return UseLibcall; }
     85 
     86     /// Is the atomic size larger than the underlying value type?
     87     ///
     88     /// Note that the absence of padding does not mean that atomic
     89     /// objects are completely interchangeable with non-atomic
     90     /// objects: we might have promoted the alignment of a type
     91     /// without making it bigger.
     92     bool hasPadding() const {
     93       return (ValueSizeInBits != AtomicSizeInBits);
     94     }
     95 
     96     bool emitMemSetZeroIfNecessary(LValue dest) const;
     97 
     98     llvm::Value *getAtomicSizeValue() const {
     99       CharUnits size = CGF.getContext().toCharUnitsFromBits(AtomicSizeInBits);
    100       return CGF.CGM.getSize(size);
    101     }
    102 
    103     /// Cast the given pointer to an integer pointer suitable for
    104     /// atomic operations.
    105     llvm::Value *emitCastToAtomicIntPointer(llvm::Value *addr) const;
    106 
    107     /// Turn an atomic-layout object into an r-value.
    108     RValue convertTempToRValue(llvm::Value *addr,
    109                                AggValueSlot resultSlot) const;
    110 
    111     /// Copy an atomic r-value into atomic-layout memory.
    112     void emitCopyIntoMemory(RValue rvalue, LValue lvalue) const;
    113 
    114     /// Project an l-value down to the value field.
    115     LValue projectValue(LValue lvalue) const {
    116       llvm::Value *addr = lvalue.getAddress();
    117       if (hasPadding())
    118         addr = CGF.Builder.CreateStructGEP(addr, 0);
    119 
    120       return LValue::MakeAddr(addr, getValueType(), lvalue.getAlignment(),
    121                               CGF.getContext(), lvalue.getTBAAInfo());
    122     }
    123 
    124     /// Materialize an atomic r-value in atomic-layout memory.
    125     llvm::Value *materializeRValue(RValue rvalue) const;
    126 
    127   private:
    128     bool requiresMemSetZero(llvm::Type *type) const;
    129   };
    130 }
    131 
    132 static RValue emitAtomicLibcall(CodeGenFunction &CGF,
    133                                 StringRef fnName,
    134                                 QualType resultType,
    135                                 CallArgList &args) {
    136   const CGFunctionInfo &fnInfo =
    137     CGF.CGM.getTypes().arrangeFreeFunctionCall(resultType, args,
    138             FunctionType::ExtInfo(), RequiredArgs::All);
    139   llvm::FunctionType *fnTy = CGF.CGM.getTypes().GetFunctionType(fnInfo);
    140   llvm::Constant *fn = CGF.CGM.CreateRuntimeFunction(fnTy, fnName);
    141   return CGF.EmitCall(fnInfo, fn, ReturnValueSlot(), args);
    142 }
    143 
    144 /// Does a store of the given IR type modify the full expected width?
    145 static bool isFullSizeType(CodeGenModule &CGM, llvm::Type *type,
    146                            uint64_t expectedSize) {
    147   return (CGM.getDataLayout().getTypeStoreSize(type) * 8 == expectedSize);
    148 }
    149 
    150 /// Does the atomic type require memsetting to zero before initialization?
    151 ///
    152 /// The IR type is provided as a way of making certain queries faster.
    153 bool AtomicInfo::requiresMemSetZero(llvm::Type *type) const {
    154   // If the atomic type has size padding, we definitely need a memset.
    155   if (hasPadding()) return true;
    156 
    157   // Otherwise, do some simple heuristics to try to avoid it:
    158   switch (getEvaluationKind()) {
    159   // For scalars and complexes, check whether the store size of the
    160   // type uses the full size.
    161   case TEK_Scalar:
    162     return !isFullSizeType(CGF.CGM, type, AtomicSizeInBits);
    163   case TEK_Complex:
    164     return !isFullSizeType(CGF.CGM, type->getStructElementType(0),
    165                            AtomicSizeInBits / 2);
    166 
    167   // Padding in structs has an undefined bit pattern.  User beware.
    168   case TEK_Aggregate:
    169     return false;
    170   }
    171   llvm_unreachable("bad evaluation kind");
    172 }
    173 
    174 bool AtomicInfo::emitMemSetZeroIfNecessary(LValue dest) const {
    175   llvm::Value *addr = dest.getAddress();
    176   if (!requiresMemSetZero(addr->getType()->getPointerElementType()))
    177     return false;
    178 
    179   CGF.Builder.CreateMemSet(addr, llvm::ConstantInt::get(CGF.Int8Ty, 0),
    180                            AtomicSizeInBits / 8,
    181                            dest.getAlignment().getQuantity());
    182   return true;
    183 }
    184 
    185 static void
    186 EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, llvm::Value *Dest,
    187              llvm::Value *Ptr, llvm::Value *Val1, llvm::Value *Val2,
    188              uint64_t Size, unsigned Align, llvm::AtomicOrdering Order) {
    189   llvm::AtomicRMWInst::BinOp Op = llvm::AtomicRMWInst::Add;
    190   llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0;
    191 
    192   switch (E->getOp()) {
    193   case AtomicExpr::AO__c11_atomic_init:
    194     llvm_unreachable("Already handled!");
    195 
    196   case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
    197   case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
    198   case AtomicExpr::AO__atomic_compare_exchange:
    199   case AtomicExpr::AO__atomic_compare_exchange_n: {
    200     // Note that cmpxchg only supports specifying one ordering and
    201     // doesn't support weak cmpxchg, at least at the moment.
    202     llvm::LoadInst *LoadVal1 = CGF.Builder.CreateLoad(Val1);
    203     LoadVal1->setAlignment(Align);
    204     llvm::LoadInst *LoadVal2 = CGF.Builder.CreateLoad(Val2);
    205     LoadVal2->setAlignment(Align);
    206     llvm::AtomicCmpXchgInst *CXI =
    207         CGF.Builder.CreateAtomicCmpXchg(Ptr, LoadVal1, LoadVal2, Order);
    208     CXI->setVolatile(E->isVolatile());
    209     llvm::StoreInst *StoreVal1 = CGF.Builder.CreateStore(CXI, Val1);
    210     StoreVal1->setAlignment(Align);
    211     llvm::Value *Cmp = CGF.Builder.CreateICmpEQ(CXI, LoadVal1);
    212     CGF.EmitStoreOfScalar(Cmp, CGF.MakeAddrLValue(Dest, E->getType()));
    213     return;
    214   }
    215 
    216   case AtomicExpr::AO__c11_atomic_load:
    217   case AtomicExpr::AO__atomic_load_n:
    218   case AtomicExpr::AO__atomic_load: {
    219     llvm::LoadInst *Load = CGF.Builder.CreateLoad(Ptr);
    220     Load->setAtomic(Order);
    221     Load->setAlignment(Size);
    222     Load->setVolatile(E->isVolatile());
    223     llvm::StoreInst *StoreDest = CGF.Builder.CreateStore(Load, Dest);
    224     StoreDest->setAlignment(Align);
    225     return;
    226   }
    227 
    228   case AtomicExpr::AO__c11_atomic_store:
    229   case AtomicExpr::AO__atomic_store:
    230   case AtomicExpr::AO__atomic_store_n: {
    231     assert(!Dest && "Store does not return a value");
    232     llvm::LoadInst *LoadVal1 = CGF.Builder.CreateLoad(Val1);
    233     LoadVal1->setAlignment(Align);
    234     llvm::StoreInst *Store = CGF.Builder.CreateStore(LoadVal1, Ptr);
    235     Store->setAtomic(Order);
    236     Store->setAlignment(Size);
    237     Store->setVolatile(E->isVolatile());
    238     return;
    239   }
    240 
    241   case AtomicExpr::AO__c11_atomic_exchange:
    242   case AtomicExpr::AO__atomic_exchange_n:
    243   case AtomicExpr::AO__atomic_exchange:
    244     Op = llvm::AtomicRMWInst::Xchg;
    245     break;
    246 
    247   case AtomicExpr::AO__atomic_add_fetch:
    248     PostOp = llvm::Instruction::Add;
    249     // Fall through.
    250   case AtomicExpr::AO__c11_atomic_fetch_add:
    251   case AtomicExpr::AO__atomic_fetch_add:
    252     Op = llvm::AtomicRMWInst::Add;
    253     break;
    254 
    255   case AtomicExpr::AO__atomic_sub_fetch:
    256     PostOp = llvm::Instruction::Sub;
    257     // Fall through.
    258   case AtomicExpr::AO__c11_atomic_fetch_sub:
    259   case AtomicExpr::AO__atomic_fetch_sub:
    260     Op = llvm::AtomicRMWInst::Sub;
    261     break;
    262 
    263   case AtomicExpr::AO__atomic_and_fetch:
    264     PostOp = llvm::Instruction::And;
    265     // Fall through.
    266   case AtomicExpr::AO__c11_atomic_fetch_and:
    267   case AtomicExpr::AO__atomic_fetch_and:
    268     Op = llvm::AtomicRMWInst::And;
    269     break;
    270 
    271   case AtomicExpr::AO__atomic_or_fetch:
    272     PostOp = llvm::Instruction::Or;
    273     // Fall through.
    274   case AtomicExpr::AO__c11_atomic_fetch_or:
    275   case AtomicExpr::AO__atomic_fetch_or:
    276     Op = llvm::AtomicRMWInst::Or;
    277     break;
    278 
    279   case AtomicExpr::AO__atomic_xor_fetch:
    280     PostOp = llvm::Instruction::Xor;
    281     // Fall through.
    282   case AtomicExpr::AO__c11_atomic_fetch_xor:
    283   case AtomicExpr::AO__atomic_fetch_xor:
    284     Op = llvm::AtomicRMWInst::Xor;
    285     break;
    286 
    287   case AtomicExpr::AO__atomic_nand_fetch:
    288     PostOp = llvm::Instruction::And;
    289     // Fall through.
    290   case AtomicExpr::AO__atomic_fetch_nand:
    291     Op = llvm::AtomicRMWInst::Nand;
    292     break;
    293   }
    294 
    295   llvm::LoadInst *LoadVal1 = CGF.Builder.CreateLoad(Val1);
    296   LoadVal1->setAlignment(Align);
    297   llvm::AtomicRMWInst *RMWI =
    298       CGF.Builder.CreateAtomicRMW(Op, Ptr, LoadVal1, Order);
    299   RMWI->setVolatile(E->isVolatile());
    300 
    301   // For __atomic_*_fetch operations, perform the operation again to
    302   // determine the value which was written.
    303   llvm::Value *Result = RMWI;
    304   if (PostOp)
    305     Result = CGF.Builder.CreateBinOp(PostOp, RMWI, LoadVal1);
    306   if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch)
    307     Result = CGF.Builder.CreateNot(Result);
    308   llvm::StoreInst *StoreDest = CGF.Builder.CreateStore(Result, Dest);
    309   StoreDest->setAlignment(Align);
    310 }
    311 
    312 // This function emits any expression (scalar, complex, or aggregate)
    313 // into a temporary alloca.
    314 static llvm::Value *
    315 EmitValToTemp(CodeGenFunction &CGF, Expr *E) {
    316   llvm::Value *DeclPtr = CGF.CreateMemTemp(E->getType(), ".atomictmp");
    317   CGF.EmitAnyExprToMem(E, DeclPtr, E->getType().getQualifiers(),
    318                        /*Init*/ true);
    319   return DeclPtr;
    320 }
    321 
    322 static void
    323 AddDirectArgument(CodeGenFunction &CGF, CallArgList &Args,
    324                   bool UseOptimizedLibcall, llvm::Value *Val, QualType ValTy) {
    325   if (UseOptimizedLibcall) {
    326     // Load value and pass it to the function directly.
    327     unsigned Align = CGF.getContext().getTypeAlignInChars(ValTy).getQuantity();
    328     Val = CGF.EmitLoadOfScalar(Val, false, Align, ValTy);
    329     Args.add(RValue::get(Val), ValTy);
    330   } else {
    331     // Non-optimized functions always take a reference.
    332     Args.add(RValue::get(CGF.EmitCastToVoidPtr(Val)),
    333                          CGF.getContext().VoidPtrTy);
    334   }
    335 }
    336 
    337 RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
    338   QualType AtomicTy = E->getPtr()->getType()->getPointeeType();
    339   QualType MemTy = AtomicTy;
    340   if (const AtomicType *AT = AtomicTy->getAs<AtomicType>())
    341     MemTy = AT->getValueType();
    342   CharUnits sizeChars = getContext().getTypeSizeInChars(AtomicTy);
    343   uint64_t Size = sizeChars.getQuantity();
    344   CharUnits alignChars = getContext().getTypeAlignInChars(AtomicTy);
    345   unsigned Align = alignChars.getQuantity();
    346   unsigned MaxInlineWidthInBits =
    347     getTarget().getMaxAtomicInlineWidth();
    348   bool UseLibcall = (Size != Align ||
    349                      getContext().toBits(sizeChars) > MaxInlineWidthInBits);
    350 
    351   llvm::Value *Ptr, *Order, *OrderFail = 0, *Val1 = 0, *Val2 = 0;
    352   Ptr = EmitScalarExpr(E->getPtr());
    353 
    354   if (E->getOp() == AtomicExpr::AO__c11_atomic_init) {
    355     assert(!Dest && "Init does not return a value");
    356     LValue lvalue = LValue::MakeAddr(Ptr, AtomicTy, alignChars, getContext());
    357     EmitAtomicInit(E->getVal1(), lvalue);
    358     return RValue::get(0);
    359   }
    360 
    361   Order = EmitScalarExpr(E->getOrder());
    362 
    363   switch (E->getOp()) {
    364   case AtomicExpr::AO__c11_atomic_init:
    365     llvm_unreachable("Already handled!");
    366 
    367   case AtomicExpr::AO__c11_atomic_load:
    368   case AtomicExpr::AO__atomic_load_n:
    369     break;
    370 
    371   case AtomicExpr::AO__atomic_load:
    372     Dest = EmitScalarExpr(E->getVal1());
    373     break;
    374 
    375   case AtomicExpr::AO__atomic_store:
    376     Val1 = EmitScalarExpr(E->getVal1());
    377     break;
    378 
    379   case AtomicExpr::AO__atomic_exchange:
    380     Val1 = EmitScalarExpr(E->getVal1());
    381     Dest = EmitScalarExpr(E->getVal2());
    382     break;
    383 
    384   case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
    385   case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
    386   case AtomicExpr::AO__atomic_compare_exchange_n:
    387   case AtomicExpr::AO__atomic_compare_exchange:
    388     Val1 = EmitScalarExpr(E->getVal1());
    389     if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange)
    390       Val2 = EmitScalarExpr(E->getVal2());
    391     else
    392       Val2 = EmitValToTemp(*this, E->getVal2());
    393     OrderFail = EmitScalarExpr(E->getOrderFail());
    394     // Evaluate and discard the 'weak' argument.
    395     if (E->getNumSubExprs() == 6)
    396       EmitScalarExpr(E->getWeak());
    397     break;
    398 
    399   case AtomicExpr::AO__c11_atomic_fetch_add:
    400   case AtomicExpr::AO__c11_atomic_fetch_sub:
    401     if (MemTy->isPointerType()) {
    402       // For pointer arithmetic, we're required to do a bit of math:
    403       // adding 1 to an int* is not the same as adding 1 to a uintptr_t.
    404       // ... but only for the C11 builtins. The GNU builtins expect the
    405       // user to multiply by sizeof(T).
    406       QualType Val1Ty = E->getVal1()->getType();
    407       llvm::Value *Val1Scalar = EmitScalarExpr(E->getVal1());
    408       CharUnits PointeeIncAmt =
    409           getContext().getTypeSizeInChars(MemTy->getPointeeType());
    410       Val1Scalar = Builder.CreateMul(Val1Scalar, CGM.getSize(PointeeIncAmt));
    411       Val1 = CreateMemTemp(Val1Ty, ".atomictmp");
    412       EmitStoreOfScalar(Val1Scalar, MakeAddrLValue(Val1, Val1Ty));
    413       break;
    414     }
    415     // Fall through.
    416   case AtomicExpr::AO__atomic_fetch_add:
    417   case AtomicExpr::AO__atomic_fetch_sub:
    418   case AtomicExpr::AO__atomic_add_fetch:
    419   case AtomicExpr::AO__atomic_sub_fetch:
    420   case AtomicExpr::AO__c11_atomic_store:
    421   case AtomicExpr::AO__c11_atomic_exchange:
    422   case AtomicExpr::AO__atomic_store_n:
    423   case AtomicExpr::AO__atomic_exchange_n:
    424   case AtomicExpr::AO__c11_atomic_fetch_and:
    425   case AtomicExpr::AO__c11_atomic_fetch_or:
    426   case AtomicExpr::AO__c11_atomic_fetch_xor:
    427   case AtomicExpr::AO__atomic_fetch_and:
    428   case AtomicExpr::AO__atomic_fetch_or:
    429   case AtomicExpr::AO__atomic_fetch_xor:
    430   case AtomicExpr::AO__atomic_fetch_nand:
    431   case AtomicExpr::AO__atomic_and_fetch:
    432   case AtomicExpr::AO__atomic_or_fetch:
    433   case AtomicExpr::AO__atomic_xor_fetch:
    434   case AtomicExpr::AO__atomic_nand_fetch:
    435     Val1 = EmitValToTemp(*this, E->getVal1());
    436     break;
    437   }
    438 
    439   if (!E->getType()->isVoidType() && !Dest)
    440     Dest = CreateMemTemp(E->getType(), ".atomicdst");
    441 
    442   // Use a library call.  See: http://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary .
    443   if (UseLibcall) {
    444     bool UseOptimizedLibcall = false;
    445     switch (E->getOp()) {
    446     case AtomicExpr::AO__c11_atomic_fetch_add:
    447     case AtomicExpr::AO__atomic_fetch_add:
    448     case AtomicExpr::AO__c11_atomic_fetch_and:
    449     case AtomicExpr::AO__atomic_fetch_and:
    450     case AtomicExpr::AO__c11_atomic_fetch_or:
    451     case AtomicExpr::AO__atomic_fetch_or:
    452     case AtomicExpr::AO__c11_atomic_fetch_sub:
    453     case AtomicExpr::AO__atomic_fetch_sub:
    454     case AtomicExpr::AO__c11_atomic_fetch_xor:
    455     case AtomicExpr::AO__atomic_fetch_xor:
    456       // For these, only library calls for certain sizes exist.
    457       UseOptimizedLibcall = true;
    458       break;
    459     default:
    460       // Only use optimized library calls for sizes for which they exist.
    461       if (Size == 1 || Size == 2 || Size == 4 || Size == 8)
    462         UseOptimizedLibcall = true;
    463       break;
    464     }
    465 
    466     CallArgList Args;
    467     if (!UseOptimizedLibcall) {
    468       // For non-optimized library calls, the size is the first parameter
    469       Args.add(RValue::get(llvm::ConstantInt::get(SizeTy, Size)),
    470                getContext().getSizeType());
    471     }
    472     // Atomic address is the first or second parameter
    473     Args.add(RValue::get(EmitCastToVoidPtr(Ptr)),
    474              getContext().VoidPtrTy);
    475 
    476     std::string LibCallName;
    477     QualType RetTy;
    478     bool HaveRetTy = false;
    479     switch (E->getOp()) {
    480     // There is only one libcall for compare an exchange, because there is no
    481     // optimisation benefit possible from a libcall version of a weak compare
    482     // and exchange.
    483     // bool __atomic_compare_exchange(size_t size, void *mem, void *expected,
    484     //                                void *desired, int success, int failure)
    485     // bool __atomic_compare_exchange_N(T *mem, T *expected, T desired,
    486     //                                  int success, int failure)
    487     case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
    488     case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
    489     case AtomicExpr::AO__atomic_compare_exchange:
    490     case AtomicExpr::AO__atomic_compare_exchange_n:
    491       LibCallName = "__atomic_compare_exchange";
    492       RetTy = getContext().BoolTy;
    493       HaveRetTy = true;
    494       Args.add(RValue::get(EmitCastToVoidPtr(Val1)),
    495                getContext().VoidPtrTy);
    496       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val2, MemTy);
    497       Args.add(RValue::get(Order),
    498                getContext().IntTy);
    499       Order = OrderFail;
    500       break;
    501     // void __atomic_exchange(size_t size, void *mem, void *val, void *return,
    502     //                        int order)
    503     // T __atomic_exchange_N(T *mem, T val, int order)
    504     case AtomicExpr::AO__c11_atomic_exchange:
    505     case AtomicExpr::AO__atomic_exchange_n:
    506     case AtomicExpr::AO__atomic_exchange:
    507       LibCallName = "__atomic_exchange";
    508       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy);
    509       break;
    510     // void __atomic_store(size_t size, void *mem, void *val, int order)
    511     // void __atomic_store_N(T *mem, T val, int order)
    512     case AtomicExpr::AO__c11_atomic_store:
    513     case AtomicExpr::AO__atomic_store:
    514     case AtomicExpr::AO__atomic_store_n:
    515       LibCallName = "__atomic_store";
    516       RetTy = getContext().VoidTy;
    517       HaveRetTy = true;
    518       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy);
    519       break;
    520     // void __atomic_load(size_t size, void *mem, void *return, int order)
    521     // T __atomic_load_N(T *mem, int order)
    522     case AtomicExpr::AO__c11_atomic_load:
    523     case AtomicExpr::AO__atomic_load:
    524     case AtomicExpr::AO__atomic_load_n:
    525       LibCallName = "__atomic_load";
    526       break;
    527     // T __atomic_fetch_add_N(T *mem, T val, int order)
    528     case AtomicExpr::AO__c11_atomic_fetch_add:
    529     case AtomicExpr::AO__atomic_fetch_add:
    530       LibCallName = "__atomic_fetch_add";
    531       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy);
    532       break;
    533     // T __atomic_fetch_and_N(T *mem, T val, int order)
    534     case AtomicExpr::AO__c11_atomic_fetch_and:
    535     case AtomicExpr::AO__atomic_fetch_and:
    536       LibCallName = "__atomic_fetch_and";
    537       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy);
    538       break;
    539     // T __atomic_fetch_or_N(T *mem, T val, int order)
    540     case AtomicExpr::AO__c11_atomic_fetch_or:
    541     case AtomicExpr::AO__atomic_fetch_or:
    542       LibCallName = "__atomic_fetch_or";
    543       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy);
    544       break;
    545     // T __atomic_fetch_sub_N(T *mem, T val, int order)
    546     case AtomicExpr::AO__c11_atomic_fetch_sub:
    547     case AtomicExpr::AO__atomic_fetch_sub:
    548       LibCallName = "__atomic_fetch_sub";
    549       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy);
    550       break;
    551     // T __atomic_fetch_xor_N(T *mem, T val, int order)
    552     case AtomicExpr::AO__c11_atomic_fetch_xor:
    553     case AtomicExpr::AO__atomic_fetch_xor:
    554       LibCallName = "__atomic_fetch_xor";
    555       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy);
    556       break;
    557     default: return EmitUnsupportedRValue(E, "atomic library call");
    558     }
    559 
    560     // Optimized functions have the size in their name.
    561     if (UseOptimizedLibcall)
    562       LibCallName += "_" + llvm::utostr(Size);
    563     // By default, assume we return a value of the atomic type.
    564     if (!HaveRetTy) {
    565       if (UseOptimizedLibcall) {
    566         // Value is returned directly.
    567         RetTy = MemTy;
    568       } else {
    569         // Value is returned through parameter before the order.
    570         RetTy = getContext().VoidTy;
    571         Args.add(RValue::get(EmitCastToVoidPtr(Dest)),
    572                  getContext().VoidPtrTy);
    573       }
    574     }
    575     // order is always the last parameter
    576     Args.add(RValue::get(Order),
    577              getContext().IntTy);
    578 
    579     const CGFunctionInfo &FuncInfo =
    580         CGM.getTypes().arrangeFreeFunctionCall(RetTy, Args,
    581             FunctionType::ExtInfo(), RequiredArgs::All);
    582     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
    583     llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
    584     RValue Res = EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
    585     if (!RetTy->isVoidType())
    586       return Res;
    587     if (E->getType()->isVoidType())
    588       return RValue::get(0);
    589     return convertTempToRValue(Dest, E->getType());
    590   }
    591 
    592   bool IsStore = E->getOp() == AtomicExpr::AO__c11_atomic_store ||
    593                  E->getOp() == AtomicExpr::AO__atomic_store ||
    594                  E->getOp() == AtomicExpr::AO__atomic_store_n;
    595   bool IsLoad = E->getOp() == AtomicExpr::AO__c11_atomic_load ||
    596                 E->getOp() == AtomicExpr::AO__atomic_load ||
    597                 E->getOp() == AtomicExpr::AO__atomic_load_n;
    598 
    599   llvm::Type *IPtrTy =
    600       llvm::IntegerType::get(getLLVMContext(), Size * 8)->getPointerTo();
    601   llvm::Value *OrigDest = Dest;
    602   Ptr = Builder.CreateBitCast(Ptr, IPtrTy);
    603   if (Val1) Val1 = Builder.CreateBitCast(Val1, IPtrTy);
    604   if (Val2) Val2 = Builder.CreateBitCast(Val2, IPtrTy);
    605   if (Dest && !E->isCmpXChg()) Dest = Builder.CreateBitCast(Dest, IPtrTy);
    606 
    607   if (isa<llvm::ConstantInt>(Order)) {
    608     int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
    609     switch (ord) {
    610     case AO_ABI_memory_order_relaxed:
    611       EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
    612                    llvm::Monotonic);
    613       break;
    614     case AO_ABI_memory_order_consume:
    615     case AO_ABI_memory_order_acquire:
    616       if (IsStore)
    617         break; // Avoid crashing on code with undefined behavior
    618       EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
    619                    llvm::Acquire);
    620       break;
    621     case AO_ABI_memory_order_release:
    622       if (IsLoad)
    623         break; // Avoid crashing on code with undefined behavior
    624       EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
    625                    llvm::Release);
    626       break;
    627     case AO_ABI_memory_order_acq_rel:
    628       if (IsLoad || IsStore)
    629         break; // Avoid crashing on code with undefined behavior
    630       EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
    631                    llvm::AcquireRelease);
    632       break;
    633     case AO_ABI_memory_order_seq_cst:
    634       EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
    635                    llvm::SequentiallyConsistent);
    636       break;
    637     default: // invalid order
    638       // We should not ever get here normally, but it's hard to
    639       // enforce that in general.
    640       break;
    641     }
    642     if (E->getType()->isVoidType())
    643       return RValue::get(0);
    644     return convertTempToRValue(OrigDest, E->getType());
    645   }
    646 
    647   // Long case, when Order isn't obviously constant.
    648 
    649   // Create all the relevant BB's
    650   llvm::BasicBlock *MonotonicBB = 0, *AcquireBB = 0, *ReleaseBB = 0,
    651                    *AcqRelBB = 0, *SeqCstBB = 0;
    652   MonotonicBB = createBasicBlock("monotonic", CurFn);
    653   if (!IsStore)
    654     AcquireBB = createBasicBlock("acquire", CurFn);
    655   if (!IsLoad)
    656     ReleaseBB = createBasicBlock("release", CurFn);
    657   if (!IsLoad && !IsStore)
    658     AcqRelBB = createBasicBlock("acqrel", CurFn);
    659   SeqCstBB = createBasicBlock("seqcst", CurFn);
    660   llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
    661 
    662   // Create the switch for the split
    663   // MonotonicBB is arbitrarily chosen as the default case; in practice, this
    664   // doesn't matter unless someone is crazy enough to use something that
    665   // doesn't fold to a constant for the ordering.
    666   Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
    667   llvm::SwitchInst *SI = Builder.CreateSwitch(Order, MonotonicBB);
    668 
    669   // Emit all the different atomics
    670   Builder.SetInsertPoint(MonotonicBB);
    671   EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
    672                llvm::Monotonic);
    673   Builder.CreateBr(ContBB);
    674   if (!IsStore) {
    675     Builder.SetInsertPoint(AcquireBB);
    676     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
    677                  llvm::Acquire);
    678     Builder.CreateBr(ContBB);
    679     SI->addCase(Builder.getInt32(1), AcquireBB);
    680     SI->addCase(Builder.getInt32(2), AcquireBB);
    681   }
    682   if (!IsLoad) {
    683     Builder.SetInsertPoint(ReleaseBB);
    684     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
    685                  llvm::Release);
    686     Builder.CreateBr(ContBB);
    687     SI->addCase(Builder.getInt32(3), ReleaseBB);
    688   }
    689   if (!IsLoad && !IsStore) {
    690     Builder.SetInsertPoint(AcqRelBB);
    691     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
    692                  llvm::AcquireRelease);
    693     Builder.CreateBr(ContBB);
    694     SI->addCase(Builder.getInt32(4), AcqRelBB);
    695   }
    696   Builder.SetInsertPoint(SeqCstBB);
    697   EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, Size, Align,
    698                llvm::SequentiallyConsistent);
    699   Builder.CreateBr(ContBB);
    700   SI->addCase(Builder.getInt32(5), SeqCstBB);
    701 
    702   // Cleanup and return
    703   Builder.SetInsertPoint(ContBB);
    704   if (E->getType()->isVoidType())
    705     return RValue::get(0);
    706   return convertTempToRValue(OrigDest, E->getType());
    707 }
    708 
    709 llvm::Value *AtomicInfo::emitCastToAtomicIntPointer(llvm::Value *addr) const {
    710   unsigned addrspace =
    711     cast<llvm::PointerType>(addr->getType())->getAddressSpace();
    712   llvm::IntegerType *ty =
    713     llvm::IntegerType::get(CGF.getLLVMContext(), AtomicSizeInBits);
    714   return CGF.Builder.CreateBitCast(addr, ty->getPointerTo(addrspace));
    715 }
    716 
    717 RValue AtomicInfo::convertTempToRValue(llvm::Value *addr,
    718                                        AggValueSlot resultSlot) const {
    719   if (EvaluationKind == TEK_Aggregate)
    720     return resultSlot.asRValue();
    721 
    722   // Drill into the padding structure if we have one.
    723   if (hasPadding())
    724     addr = CGF.Builder.CreateStructGEP(addr, 0);
    725 
    726   // Otherwise, just convert the temporary to an r-value using the
    727   // normal conversion routine.
    728   return CGF.convertTempToRValue(addr, getValueType());
    729 }
    730 
    731 /// Emit a load from an l-value of atomic type.  Note that the r-value
    732 /// we produce is an r-value of the atomic *value* type.
    733 RValue CodeGenFunction::EmitAtomicLoad(LValue src, AggValueSlot resultSlot) {
    734   AtomicInfo atomics(*this, src);
    735 
    736   // Check whether we should use a library call.
    737   if (atomics.shouldUseLibcall()) {
    738     llvm::Value *tempAddr;
    739     if (!resultSlot.isIgnored()) {
    740       assert(atomics.getEvaluationKind() == TEK_Aggregate);
    741       tempAddr = resultSlot.getAddr();
    742     } else {
    743       tempAddr = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp");
    744     }
    745 
    746     // void __atomic_load(size_t size, void *mem, void *return, int order);
    747     CallArgList args;
    748     args.add(RValue::get(atomics.getAtomicSizeValue()),
    749              getContext().getSizeType());
    750     args.add(RValue::get(EmitCastToVoidPtr(src.getAddress())),
    751              getContext().VoidPtrTy);
    752     args.add(RValue::get(EmitCastToVoidPtr(tempAddr)),
    753              getContext().VoidPtrTy);
    754     args.add(RValue::get(llvm::ConstantInt::get(IntTy,
    755                                                 AO_ABI_memory_order_seq_cst)),
    756              getContext().IntTy);
    757     emitAtomicLibcall(*this, "__atomic_load", getContext().VoidTy, args);
    758 
    759     // Produce the r-value.
    760     return atomics.convertTempToRValue(tempAddr, resultSlot);
    761   }
    762 
    763   // Okay, we're doing this natively.
    764   llvm::Value *addr = atomics.emitCastToAtomicIntPointer(src.getAddress());
    765   llvm::LoadInst *load = Builder.CreateLoad(addr, "atomic-load");
    766   load->setAtomic(llvm::SequentiallyConsistent);
    767 
    768   // Other decoration.
    769   load->setAlignment(src.getAlignment().getQuantity());
    770   if (src.isVolatileQualified())
    771     load->setVolatile(true);
    772   if (src.getTBAAInfo())
    773     CGM.DecorateInstruction(load, src.getTBAAInfo());
    774 
    775   // Okay, turn that back into the original value type.
    776   QualType valueType = atomics.getValueType();
    777   llvm::Value *result = load;
    778 
    779   // If we're ignoring an aggregate return, don't do anything.
    780   if (atomics.getEvaluationKind() == TEK_Aggregate && resultSlot.isIgnored())
    781     return RValue::getAggregate(0, false);
    782 
    783   // The easiest way to do this this is to go through memory, but we
    784   // try not to in some easy cases.
    785   if (atomics.getEvaluationKind() == TEK_Scalar && !atomics.hasPadding()) {
    786     llvm::Type *resultTy = CGM.getTypes().ConvertTypeForMem(valueType);
    787     if (isa<llvm::IntegerType>(resultTy)) {
    788       assert(result->getType() == resultTy);
    789       result = EmitFromMemory(result, valueType);
    790     } else if (isa<llvm::PointerType>(resultTy)) {
    791       result = Builder.CreateIntToPtr(result, resultTy);
    792     } else {
    793       result = Builder.CreateBitCast(result, resultTy);
    794     }
    795     return RValue::get(result);
    796   }
    797 
    798   // Create a temporary.  This needs to be big enough to hold the
    799   // atomic integer.
    800   llvm::Value *temp;
    801   bool tempIsVolatile = false;
    802   CharUnits tempAlignment;
    803   if (atomics.getEvaluationKind() == TEK_Aggregate) {
    804     assert(!resultSlot.isIgnored());
    805     temp = resultSlot.getAddr();
    806     tempAlignment = atomics.getValueAlignment();
    807     tempIsVolatile = resultSlot.isVolatile();
    808   } else {
    809     temp = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp");
    810     tempAlignment = atomics.getAtomicAlignment();
    811   }
    812 
    813   // Slam the integer into the temporary.
    814   llvm::Value *castTemp = atomics.emitCastToAtomicIntPointer(temp);
    815   Builder.CreateAlignedStore(result, castTemp, tempAlignment.getQuantity())
    816     ->setVolatile(tempIsVolatile);
    817 
    818   return atomics.convertTempToRValue(temp, resultSlot);
    819 }
    820 
    821 
    822 
    823 /// Copy an r-value into memory as part of storing to an atomic type.
    824 /// This needs to create a bit-pattern suitable for atomic operations.
    825 void AtomicInfo::emitCopyIntoMemory(RValue rvalue, LValue dest) const {
    826   // If we have an r-value, the rvalue should be of the atomic type,
    827   // which means that the caller is responsible for having zeroed
    828   // any padding.  Just do an aggregate copy of that type.
    829   if (rvalue.isAggregate()) {
    830     CGF.EmitAggregateCopy(dest.getAddress(),
    831                           rvalue.getAggregateAddr(),
    832                           getAtomicType(),
    833                           (rvalue.isVolatileQualified()
    834                            || dest.isVolatileQualified()),
    835                           dest.getAlignment());
    836     return;
    837   }
    838 
    839   // Okay, otherwise we're copying stuff.
    840 
    841   // Zero out the buffer if necessary.
    842   emitMemSetZeroIfNecessary(dest);
    843 
    844   // Drill past the padding if present.
    845   dest = projectValue(dest);
    846 
    847   // Okay, store the rvalue in.
    848   if (rvalue.isScalar()) {
    849     CGF.EmitStoreOfScalar(rvalue.getScalarVal(), dest, /*init*/ true);
    850   } else {
    851     CGF.EmitStoreOfComplex(rvalue.getComplexVal(), dest, /*init*/ true);
    852   }
    853 }
    854 
    855 
    856 /// Materialize an r-value into memory for the purposes of storing it
    857 /// to an atomic type.
    858 llvm::Value *AtomicInfo::materializeRValue(RValue rvalue) const {
    859   // Aggregate r-values are already in memory, and EmitAtomicStore
    860   // requires them to be values of the atomic type.
    861   if (rvalue.isAggregate())
    862     return rvalue.getAggregateAddr();
    863 
    864   // Otherwise, make a temporary and materialize into it.
    865   llvm::Value *temp = CGF.CreateMemTemp(getAtomicType(), "atomic-store-temp");
    866   LValue tempLV = CGF.MakeAddrLValue(temp, getAtomicType(), getAtomicAlignment());
    867   emitCopyIntoMemory(rvalue, tempLV);
    868   return temp;
    869 }
    870 
    871 /// Emit a store to an l-value of atomic type.
    872 ///
    873 /// Note that the r-value is expected to be an r-value *of the atomic
    874 /// type*; this means that for aggregate r-values, it should include
    875 /// storage for any padding that was necessary.
    876 void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest,
    877                                       bool isInit) {
    878   // If this is an aggregate r-value, it should agree in type except
    879   // maybe for address-space qualification.
    880   assert(!rvalue.isAggregate() ||
    881          rvalue.getAggregateAddr()->getType()->getPointerElementType()
    882            == dest.getAddress()->getType()->getPointerElementType());
    883 
    884   AtomicInfo atomics(*this, dest);
    885 
    886   // If this is an initialization, just put the value there normally.
    887   if (isInit) {
    888     atomics.emitCopyIntoMemory(rvalue, dest);
    889     return;
    890   }
    891 
    892   // Check whether we should use a library call.
    893   if (atomics.shouldUseLibcall()) {
    894     // Produce a source address.
    895     llvm::Value *srcAddr = atomics.materializeRValue(rvalue);
    896 
    897     // void __atomic_store(size_t size, void *mem, void *val, int order)
    898     CallArgList args;
    899     args.add(RValue::get(atomics.getAtomicSizeValue()),
    900              getContext().getSizeType());
    901     args.add(RValue::get(EmitCastToVoidPtr(dest.getAddress())),
    902              getContext().VoidPtrTy);
    903     args.add(RValue::get(EmitCastToVoidPtr(srcAddr)),
    904              getContext().VoidPtrTy);
    905     args.add(RValue::get(llvm::ConstantInt::get(IntTy,
    906                                                 AO_ABI_memory_order_seq_cst)),
    907              getContext().IntTy);
    908     emitAtomicLibcall(*this, "__atomic_store", getContext().VoidTy, args);
    909     return;
    910   }
    911 
    912   // Okay, we're doing this natively.
    913   llvm::Value *intValue;
    914 
    915   // If we've got a scalar value of the right size, try to avoid going
    916   // through memory.
    917   if (rvalue.isScalar() && !atomics.hasPadding()) {
    918     llvm::Value *value = rvalue.getScalarVal();
    919     if (isa<llvm::IntegerType>(value->getType())) {
    920       intValue = value;
    921     } else {
    922       llvm::IntegerType *inputIntTy =
    923         llvm::IntegerType::get(getLLVMContext(), atomics.getValueSizeInBits());
    924       if (isa<llvm::PointerType>(value->getType())) {
    925         intValue = Builder.CreatePtrToInt(value, inputIntTy);
    926       } else {
    927         intValue = Builder.CreateBitCast(value, inputIntTy);
    928       }
    929     }
    930 
    931   // Otherwise, we need to go through memory.
    932   } else {
    933     // Put the r-value in memory.
    934     llvm::Value *addr = atomics.materializeRValue(rvalue);
    935 
    936     // Cast the temporary to the atomic int type and pull a value out.
    937     addr = atomics.emitCastToAtomicIntPointer(addr);
    938     intValue = Builder.CreateAlignedLoad(addr,
    939                                  atomics.getAtomicAlignment().getQuantity());
    940   }
    941 
    942   // Do the atomic store.
    943   llvm::Value *addr = atomics.emitCastToAtomicIntPointer(dest.getAddress());
    944   llvm::StoreInst *store = Builder.CreateStore(intValue, addr);
    945 
    946   // Initializations don't need to be atomic.
    947   if (!isInit) store->setAtomic(llvm::SequentiallyConsistent);
    948 
    949   // Other decoration.
    950   store->setAlignment(dest.getAlignment().getQuantity());
    951   if (dest.isVolatileQualified())
    952     store->setVolatile(true);
    953   if (dest.getTBAAInfo())
    954     CGM.DecorateInstruction(store, dest.getTBAAInfo());
    955 }
    956 
    957 void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) {
    958   AtomicInfo atomics(*this, dest);
    959 
    960   switch (atomics.getEvaluationKind()) {
    961   case TEK_Scalar: {
    962     llvm::Value *value = EmitScalarExpr(init);
    963     atomics.emitCopyIntoMemory(RValue::get(value), dest);
    964     return;
    965   }
    966 
    967   case TEK_Complex: {
    968     ComplexPairTy value = EmitComplexExpr(init);
    969     atomics.emitCopyIntoMemory(RValue::getComplex(value), dest);
    970     return;
    971   }
    972 
    973   case TEK_Aggregate: {
    974     // Fix up the destination if the initializer isn't an expression
    975     // of atomic type.
    976     bool Zeroed = false;
    977     if (!init->getType()->isAtomicType()) {
    978       Zeroed = atomics.emitMemSetZeroIfNecessary(dest);
    979       dest = atomics.projectValue(dest);
    980     }
    981 
    982     // Evaluate the expression directly into the destination.
    983     AggValueSlot slot = AggValueSlot::forLValue(dest,
    984                                         AggValueSlot::IsNotDestructed,
    985                                         AggValueSlot::DoesNotNeedGCBarriers,
    986                                         AggValueSlot::IsNotAliased,
    987                                         Zeroed ? AggValueSlot::IsZeroed :
    988                                                  AggValueSlot::IsNotZeroed);
    989 
    990     EmitAggExpr(init, slot);
    991     return;
    992   }
    993   }
    994   llvm_unreachable("bad evaluation kind");
    995 }
    996