Home | History | Annotate | Download | only in SelectionDAG
      1 //===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This implements routines for translating from LLVM IR into SelectionDAG IR.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #define DEBUG_TYPE "isel"
     15 #include "SDNodeDbgValue.h"
     16 #include "SelectionDAGBuilder.h"
     17 #include "llvm/ADT/BitVector.h"
     18 #include "llvm/ADT/PostOrderIterator.h"
     19 #include "llvm/ADT/SmallSet.h"
     20 #include "llvm/Analysis/AliasAnalysis.h"
     21 #include "llvm/Analysis/ConstantFolding.h"
     22 #include "llvm/Constants.h"
     23 #include "llvm/CallingConv.h"
     24 #include "llvm/DerivedTypes.h"
     25 #include "llvm/Function.h"
     26 #include "llvm/GlobalVariable.h"
     27 #include "llvm/InlineAsm.h"
     28 #include "llvm/Instructions.h"
     29 #include "llvm/Intrinsics.h"
     30 #include "llvm/IntrinsicInst.h"
     31 #include "llvm/LLVMContext.h"
     32 #include "llvm/Module.h"
     33 #include "llvm/CodeGen/Analysis.h"
     34 #include "llvm/CodeGen/FastISel.h"
     35 #include "llvm/CodeGen/FunctionLoweringInfo.h"
     36 #include "llvm/CodeGen/GCStrategy.h"
     37 #include "llvm/CodeGen/GCMetadata.h"
     38 #include "llvm/CodeGen/MachineFunction.h"
     39 #include "llvm/CodeGen/MachineFrameInfo.h"
     40 #include "llvm/CodeGen/MachineInstrBuilder.h"
     41 #include "llvm/CodeGen/MachineJumpTableInfo.h"
     42 #include "llvm/CodeGen/MachineModuleInfo.h"
     43 #include "llvm/CodeGen/MachineRegisterInfo.h"
     44 #include "llvm/CodeGen/SelectionDAG.h"
     45 #include "llvm/Analysis/DebugInfo.h"
     46 #include "llvm/Target/TargetData.h"
     47 #include "llvm/Target/TargetFrameLowering.h"
     48 #include "llvm/Target/TargetInstrInfo.h"
     49 #include "llvm/Target/TargetIntrinsicInfo.h"
     50 #include "llvm/Target/TargetLibraryInfo.h"
     51 #include "llvm/Target/TargetLowering.h"
     52 #include "llvm/Target/TargetOptions.h"
     53 #include "llvm/Support/CommandLine.h"
     54 #include "llvm/Support/Debug.h"
     55 #include "llvm/Support/ErrorHandling.h"
     56 #include "llvm/Support/MathExtras.h"
     57 #include "llvm/Support/raw_ostream.h"
     58 #include <algorithm>
     59 using namespace llvm;
     60 
     61 /// LimitFloatPrecision - Generate low-precision inline sequences for
     62 /// some float libcalls (6, 8 or 12 bits).
     63 static unsigned LimitFloatPrecision;
     64 
     65 static cl::opt<unsigned, true>
     66 LimitFPPrecision("limit-float-precision",
     67                  cl::desc("Generate low-precision inline sequences "
     68                           "for some float libcalls"),
     69                  cl::location(LimitFloatPrecision),
     70                  cl::init(0));
     71 
     72 // Limit the width of DAG chains. This is important in general to prevent
     73 // prevent DAG-based analysis from blowing up. For example, alias analysis and
     74 // load clustering may not complete in reasonable time. It is difficult to
     75 // recognize and avoid this situation within each individual analysis, and
     76 // future analyses are likely to have the same behavior. Limiting DAG width is
     77 // the safe approach, and will be especially important with global DAGs.
     78 //
     79 // MaxParallelChains default is arbitrarily high to avoid affecting
     80 // optimization, but could be lowered to improve compile time. Any ld-ld-st-st
     81 // sequence over this should have been converted to llvm.memcpy by the
     82 // frontend. It easy to induce this behavior with .ll code such as:
     83 // %buffer = alloca [4096 x i8]
     84 // %data = load [4096 x i8]* %argPtr
     85 // store [4096 x i8] %data, [4096 x i8]* %buffer
     86 static const unsigned MaxParallelChains = 64;
     87 
     88 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
     89                                       const SDValue *Parts, unsigned NumParts,
     90                                       EVT PartVT, EVT ValueVT);
     91 
     92 /// getCopyFromParts - Create a value that contains the specified legal parts
     93 /// combined into the value they represent.  If the parts combine to a type
     94 /// larger then ValueVT then AssertOp can be used to specify whether the extra
     95 /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
     96 /// (ISD::AssertSext).
     97 static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
     98                                 const SDValue *Parts,
     99                                 unsigned NumParts, EVT PartVT, EVT ValueVT,
    100                                 ISD::NodeType AssertOp = ISD::DELETED_NODE) {
    101   if (ValueVT.isVector())
    102     return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT);
    103 
    104   assert(NumParts > 0 && "No parts to assemble!");
    105   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
    106   SDValue Val = Parts[0];
    107 
    108   if (NumParts > 1) {
    109     // Assemble the value from multiple parts.
    110     if (ValueVT.isInteger()) {
    111       unsigned PartBits = PartVT.getSizeInBits();
    112       unsigned ValueBits = ValueVT.getSizeInBits();
    113 
    114       // Assemble the power of 2 part.
    115       unsigned RoundParts = NumParts & (NumParts - 1) ?
    116         1 << Log2_32(NumParts) : NumParts;
    117       unsigned RoundBits = PartBits * RoundParts;
    118       EVT RoundVT = RoundBits == ValueBits ?
    119         ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
    120       SDValue Lo, Hi;
    121 
    122       EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
    123 
    124       if (RoundParts > 2) {
    125         Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
    126                               PartVT, HalfVT);
    127         Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
    128                               RoundParts / 2, PartVT, HalfVT);
    129       } else {
    130         Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
    131         Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
    132       }
    133 
    134       if (TLI.isBigEndian())
    135         std::swap(Lo, Hi);
    136 
    137       Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
    138 
    139       if (RoundParts < NumParts) {
    140         // Assemble the trailing non-power-of-2 part.
    141         unsigned OddParts = NumParts - RoundParts;
    142         EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
    143         Hi = getCopyFromParts(DAG, DL,
    144                               Parts + RoundParts, OddParts, PartVT, OddVT);
    145 
    146         // Combine the round and odd parts.
    147         Lo = Val;
    148         if (TLI.isBigEndian())
    149           std::swap(Lo, Hi);
    150         EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
    151         Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
    152         Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
    153                          DAG.getConstant(Lo.getValueType().getSizeInBits(),
    154                                          TLI.getPointerTy()));
    155         Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
    156         Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
    157       }
    158     } else if (PartVT.isFloatingPoint()) {
    159       // FP split into multiple FP parts (for ppcf128)
    160       assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
    161              "Unexpected split");
    162       SDValue Lo, Hi;
    163       Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
    164       Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
    165       if (TLI.isBigEndian())
    166         std::swap(Lo, Hi);
    167       Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
    168     } else {
    169       // FP split into integer parts (soft fp)
    170       assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
    171              !PartVT.isVector() && "Unexpected split");
    172       EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
    173       Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT);
    174     }
    175   }
    176 
    177   // There is now one part, held in Val.  Correct it to match ValueVT.
    178   PartVT = Val.getValueType();
    179 
    180   if (PartVT == ValueVT)
    181     return Val;
    182 
    183   if (PartVT.isInteger() && ValueVT.isInteger()) {
    184     if (ValueVT.bitsLT(PartVT)) {
    185       // For a truncate, see if we have any information to
    186       // indicate whether the truncated bits will always be
    187       // zero or sign-extension.
    188       if (AssertOp != ISD::DELETED_NODE)
    189         Val = DAG.getNode(AssertOp, DL, PartVT, Val,
    190                           DAG.getValueType(ValueVT));
    191       return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
    192     }
    193     return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
    194   }
    195 
    196   if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
    197     // FP_ROUND's are always exact here.
    198     if (ValueVT.bitsLT(Val.getValueType()))
    199       return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val,
    200                          DAG.getTargetConstant(1, TLI.getPointerTy()));
    201 
    202     return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
    203   }
    204 
    205   if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
    206     return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
    207 
    208   llvm_unreachable("Unknown mismatch!");
    209 }
    210 
    211 /// getCopyFromParts - Create a value that contains the specified legal parts
    212 /// combined into the value they represent.  If the parts combine to a type
    213 /// larger then ValueVT then AssertOp can be used to specify whether the extra
    214 /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
    215 /// (ISD::AssertSext).
    216 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
    217                                       const SDValue *Parts, unsigned NumParts,
    218                                       EVT PartVT, EVT ValueVT) {
    219   assert(ValueVT.isVector() && "Not a vector value");
    220   assert(NumParts > 0 && "No parts to assemble!");
    221   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
    222   SDValue Val = Parts[0];
    223 
    224   // Handle a multi-element vector.
    225   if (NumParts > 1) {
    226     EVT IntermediateVT, RegisterVT;
    227     unsigned NumIntermediates;
    228     unsigned NumRegs =
    229     TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
    230                                NumIntermediates, RegisterVT);
    231     assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
    232     NumParts = NumRegs; // Silence a compiler warning.
    233     assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
    234     assert(RegisterVT == Parts[0].getValueType() &&
    235            "Part type doesn't match part!");
    236 
    237     // Assemble the parts into intermediate operands.
    238     SmallVector<SDValue, 8> Ops(NumIntermediates);
    239     if (NumIntermediates == NumParts) {
    240       // If the register was not expanded, truncate or copy the value,
    241       // as appropriate.
    242       for (unsigned i = 0; i != NumParts; ++i)
    243         Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
    244                                   PartVT, IntermediateVT);
    245     } else if (NumParts > 0) {
    246       // If the intermediate type was expanded, build the intermediate
    247       // operands from the parts.
    248       assert(NumParts % NumIntermediates == 0 &&
    249              "Must expand into a divisible number of parts!");
    250       unsigned Factor = NumParts / NumIntermediates;
    251       for (unsigned i = 0; i != NumIntermediates; ++i)
    252         Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
    253                                   PartVT, IntermediateVT);
    254     }
    255 
    256     // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
    257     // intermediate operands.
    258     Val = DAG.getNode(IntermediateVT.isVector() ?
    259                       ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL,
    260                       ValueVT, &Ops[0], NumIntermediates);
    261   }
    262 
    263   // There is now one part, held in Val.  Correct it to match ValueVT.
    264   PartVT = Val.getValueType();
    265 
    266   if (PartVT == ValueVT)
    267     return Val;
    268 
    269   if (PartVT.isVector()) {
    270     // If the element type of the source/dest vectors are the same, but the
    271     // parts vector has more elements than the value vector, then we have a
    272     // vector widening case (e.g. <2 x float> -> <4 x float>).  Extract the
    273     // elements we want.
    274     if (PartVT.getVectorElementType() == ValueVT.getVectorElementType()) {
    275       assert(PartVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
    276              "Cannot narrow, it would be a lossy transformation");
    277       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
    278                          DAG.getIntPtrConstant(0));
    279     }
    280 
    281     // Vector/Vector bitcast.
    282     if (ValueVT.getSizeInBits() == PartVT.getSizeInBits())
    283       return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
    284 
    285     assert(PartVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
    286       "Cannot handle this kind of promotion");
    287     // Promoted vector extract
    288     bool Smaller = ValueVT.bitsLE(PartVT);
    289     return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
    290                        DL, ValueVT, Val);
    291 
    292   }
    293 
    294   // Trivial bitcast if the types are the same size and the destination
    295   // vector type is legal.
    296   if (PartVT.getSizeInBits() == ValueVT.getSizeInBits() &&
    297       TLI.isTypeLegal(ValueVT))
    298     return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
    299 
    300   // Handle cases such as i8 -> <1 x i1>
    301   assert(ValueVT.getVectorNumElements() == 1 &&
    302          "Only trivial scalar-to-vector conversions should get here!");
    303 
    304   if (ValueVT.getVectorNumElements() == 1 &&
    305       ValueVT.getVectorElementType() != PartVT) {
    306     bool Smaller = ValueVT.bitsLE(PartVT);
    307     Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
    308                        DL, ValueVT.getScalarType(), Val);
    309   }
    310 
    311   return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
    312 }
    313 
    314 
    315 
    316 
    317 static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl,
    318                                  SDValue Val, SDValue *Parts, unsigned NumParts,
    319                                  EVT PartVT);
    320 
    321 /// getCopyToParts - Create a series of nodes that contain the specified value
    322 /// split into legal parts.  If the parts contain more bits than Val, then, for
    323 /// integers, ExtendKind can be used to specify how to generate the extra bits.
    324 static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
    325                            SDValue Val, SDValue *Parts, unsigned NumParts,
    326                            EVT PartVT,
    327                            ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
    328   EVT ValueVT = Val.getValueType();
    329 
    330   // Handle the vector case separately.
    331   if (ValueVT.isVector())
    332     return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT);
    333 
    334   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
    335   unsigned PartBits = PartVT.getSizeInBits();
    336   unsigned OrigNumParts = NumParts;
    337   assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!");
    338 
    339   if (NumParts == 0)
    340     return;
    341 
    342   assert(!ValueVT.isVector() && "Vector case handled elsewhere");
    343   if (PartVT == ValueVT) {
    344     assert(NumParts == 1 && "No-op copy with multiple parts!");
    345     Parts[0] = Val;
    346     return;
    347   }
    348 
    349   if (NumParts * PartBits > ValueVT.getSizeInBits()) {
    350     // If the parts cover more bits than the value has, promote the value.
    351     if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
    352       assert(NumParts == 1 && "Do not know what to promote to!");
    353       Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
    354     } else {
    355       assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
    356              ValueVT.isInteger() &&
    357              "Unknown mismatch!");
    358       ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
    359       Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
    360       if (PartVT == MVT::x86mmx)
    361         Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
    362     }
    363   } else if (PartBits == ValueVT.getSizeInBits()) {
    364     // Different types of the same size.
    365     assert(NumParts == 1 && PartVT != ValueVT);
    366     Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
    367   } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
    368     // If the parts cover less bits than value has, truncate the value.
    369     assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
    370            ValueVT.isInteger() &&
    371            "Unknown mismatch!");
    372     ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
    373     Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
    374     if (PartVT == MVT::x86mmx)
    375       Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
    376   }
    377 
    378   // The value may have changed - recompute ValueVT.
    379   ValueVT = Val.getValueType();
    380   assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
    381          "Failed to tile the value with PartVT!");
    382 
    383   if (NumParts == 1) {
    384     assert(PartVT == ValueVT && "Type conversion failed!");
    385     Parts[0] = Val;
    386     return;
    387   }
    388 
    389   // Expand the value into multiple parts.
    390   if (NumParts & (NumParts - 1)) {
    391     // The number of parts is not a power of 2.  Split off and copy the tail.
    392     assert(PartVT.isInteger() && ValueVT.isInteger() &&
    393            "Do not know what to expand to!");
    394     unsigned RoundParts = 1 << Log2_32(NumParts);
    395     unsigned RoundBits = RoundParts * PartBits;
    396     unsigned OddParts = NumParts - RoundParts;
    397     SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
    398                                  DAG.getIntPtrConstant(RoundBits));
    399     getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT);
    400 
    401     if (TLI.isBigEndian())
    402       // The odd parts were reversed by getCopyToParts - unreverse them.
    403       std::reverse(Parts + RoundParts, Parts + NumParts);
    404 
    405     NumParts = RoundParts;
    406     ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
    407     Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
    408   }
    409 
    410   // The number of parts is a power of 2.  Repeatedly bisect the value using
    411   // EXTRACT_ELEMENT.
    412   Parts[0] = DAG.getNode(ISD::BITCAST, DL,
    413                          EVT::getIntegerVT(*DAG.getContext(),
    414                                            ValueVT.getSizeInBits()),
    415                          Val);
    416 
    417   for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
    418     for (unsigned i = 0; i < NumParts; i += StepSize) {
    419       unsigned ThisBits = StepSize * PartBits / 2;
    420       EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
    421       SDValue &Part0 = Parts[i];
    422       SDValue &Part1 = Parts[i+StepSize/2];
    423 
    424       Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
    425                           ThisVT, Part0, DAG.getIntPtrConstant(1));
    426       Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
    427                           ThisVT, Part0, DAG.getIntPtrConstant(0));
    428 
    429       if (ThisBits == PartBits && ThisVT != PartVT) {
    430         Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
    431         Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
    432       }
    433     }
    434   }
    435 
    436   if (TLI.isBigEndian())
    437     std::reverse(Parts, Parts + OrigNumParts);
    438 }
    439 
    440 
    441 /// getCopyToPartsVector - Create a series of nodes that contain the specified
    442 /// value split into legal parts.
    443 static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
    444                                  SDValue Val, SDValue *Parts, unsigned NumParts,
    445                                  EVT PartVT) {
    446   EVT ValueVT = Val.getValueType();
    447   assert(ValueVT.isVector() && "Not a vector");
    448   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
    449 
    450   if (NumParts == 1) {
    451     if (PartVT == ValueVT) {
    452       // Nothing to do.
    453     } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
    454       // Bitconvert vector->vector case.
    455       Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
    456     } else if (PartVT.isVector() &&
    457                PartVT.getVectorElementType() == ValueVT.getVectorElementType() &&
    458                PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
    459       EVT ElementVT = PartVT.getVectorElementType();
    460       // Vector widening case, e.g. <2 x float> -> <4 x float>.  Shuffle in
    461       // undef elements.
    462       SmallVector<SDValue, 16> Ops;
    463       for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
    464         Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
    465                                   ElementVT, Val, DAG.getIntPtrConstant(i)));
    466 
    467       for (unsigned i = ValueVT.getVectorNumElements(),
    468            e = PartVT.getVectorNumElements(); i != e; ++i)
    469         Ops.push_back(DAG.getUNDEF(ElementVT));
    470 
    471       Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size());
    472 
    473       // FIXME: Use CONCAT for 2x -> 4x.
    474 
    475       //SDValue UndefElts = DAG.getUNDEF(VectorTy);
    476       //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
    477     } else if (PartVT.isVector() &&
    478                PartVT.getVectorElementType().bitsGE(
    479                  ValueVT.getVectorElementType()) &&
    480                PartVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
    481 
    482       // Promoted vector extract
    483       bool Smaller = PartVT.bitsLE(ValueVT);
    484       Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
    485                         DL, PartVT, Val);
    486     } else{
    487       // Vector -> scalar conversion.
    488       assert(ValueVT.getVectorNumElements() == 1 &&
    489              "Only trivial vector-to-scalar conversions should get here!");
    490       Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
    491                         PartVT, Val, DAG.getIntPtrConstant(0));
    492 
    493       bool Smaller = ValueVT.bitsLE(PartVT);
    494       Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
    495                          DL, PartVT, Val);
    496     }
    497 
    498     Parts[0] = Val;
    499     return;
    500   }
    501 
    502   // Handle a multi-element vector.
    503   EVT IntermediateVT, RegisterVT;
    504   unsigned NumIntermediates;
    505   unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
    506                                                 IntermediateVT,
    507                                                 NumIntermediates, RegisterVT);
    508   unsigned NumElements = ValueVT.getVectorNumElements();
    509 
    510   assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
    511   NumParts = NumRegs; // Silence a compiler warning.
    512   assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
    513 
    514   // Split the vector into intermediate operands.
    515   SmallVector<SDValue, 8> Ops(NumIntermediates);
    516   for (unsigned i = 0; i != NumIntermediates; ++i) {
    517     if (IntermediateVT.isVector())
    518       Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
    519                            IntermediateVT, Val,
    520                    DAG.getIntPtrConstant(i * (NumElements / NumIntermediates)));
    521     else
    522       Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
    523                            IntermediateVT, Val, DAG.getIntPtrConstant(i));
    524   }
    525 
    526   // Split the intermediate operands into legal parts.
    527   if (NumParts == NumIntermediates) {
    528     // If the register was not expanded, promote or copy the value,
    529     // as appropriate.
    530     for (unsigned i = 0; i != NumParts; ++i)
    531       getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT);
    532   } else if (NumParts > 0) {
    533     // If the intermediate type was expanded, split each the value into
    534     // legal parts.
    535     assert(NumParts % NumIntermediates == 0 &&
    536            "Must expand into a divisible number of parts!");
    537     unsigned Factor = NumParts / NumIntermediates;
    538     for (unsigned i = 0; i != NumIntermediates; ++i)
    539       getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT);
    540   }
    541 }
    542 
    543 
    544 
    545 
    546 namespace {
    547   /// RegsForValue - This struct represents the registers (physical or virtual)
    548   /// that a particular set of values is assigned, and the type information
    549   /// about the value. The most common situation is to represent one value at a
    550   /// time, but struct or array values are handled element-wise as multiple
    551   /// values.  The splitting of aggregates is performed recursively, so that we
    552   /// never have aggregate-typed registers. The values at this point do not
    553   /// necessarily have legal types, so each value may require one or more
    554   /// registers of some legal type.
    555   ///
    556   struct RegsForValue {
    557     /// ValueVTs - The value types of the values, which may not be legal, and
    558     /// may need be promoted or synthesized from one or more registers.
    559     ///
    560     SmallVector<EVT, 4> ValueVTs;
    561 
    562     /// RegVTs - The value types of the registers. This is the same size as
    563     /// ValueVTs and it records, for each value, what the type of the assigned
    564     /// register or registers are. (Individual values are never synthesized
    565     /// from more than one type of register.)
    566     ///
    567     /// With virtual registers, the contents of RegVTs is redundant with TLI's
    568     /// getRegisterType member function, however when with physical registers
    569     /// it is necessary to have a separate record of the types.
    570     ///
    571     SmallVector<EVT, 4> RegVTs;
    572 
    573     /// Regs - This list holds the registers assigned to the values.
    574     /// Each legal or promoted value requires one register, and each
    575     /// expanded value requires multiple registers.
    576     ///
    577     SmallVector<unsigned, 4> Regs;
    578 
    579     RegsForValue() {}
    580 
    581     RegsForValue(const SmallVector<unsigned, 4> &regs,
    582                  EVT regvt, EVT valuevt)
    583       : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
    584 
    585     RegsForValue(LLVMContext &Context, const TargetLowering &tli,
    586                  unsigned Reg, Type *Ty) {
    587       ComputeValueVTs(tli, Ty, ValueVTs);
    588 
    589       for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
    590         EVT ValueVT = ValueVTs[Value];
    591         unsigned NumRegs = tli.getNumRegisters(Context, ValueVT);
    592         EVT RegisterVT = tli.getRegisterType(Context, ValueVT);
    593         for (unsigned i = 0; i != NumRegs; ++i)
    594           Regs.push_back(Reg + i);
    595         RegVTs.push_back(RegisterVT);
    596         Reg += NumRegs;
    597       }
    598     }
    599 
    600     /// areValueTypesLegal - Return true if types of all the values are legal.
    601     bool areValueTypesLegal(const TargetLowering &TLI) {
    602       for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
    603         EVT RegisterVT = RegVTs[Value];
    604         if (!TLI.isTypeLegal(RegisterVT))
    605           return false;
    606       }
    607       return true;
    608     }
    609 
    610     /// append - Add the specified values to this one.
    611     void append(const RegsForValue &RHS) {
    612       ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
    613       RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
    614       Regs.append(RHS.Regs.begin(), RHS.Regs.end());
    615     }
    616 
    617     /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
    618     /// this value and returns the result as a ValueVTs value.  This uses
    619     /// Chain/Flag as the input and updates them for the output Chain/Flag.
    620     /// If the Flag pointer is NULL, no flag is used.
    621     SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
    622                             DebugLoc dl,
    623                             SDValue &Chain, SDValue *Flag) const;
    624 
    625     /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
    626     /// specified value into the registers specified by this object.  This uses
    627     /// Chain/Flag as the input and updates them for the output Chain/Flag.
    628     /// If the Flag pointer is NULL, no flag is used.
    629     void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
    630                        SDValue &Chain, SDValue *Flag) const;
    631 
    632     /// AddInlineAsmOperands - Add this value to the specified inlineasm node
    633     /// operand list.  This adds the code marker, matching input operand index
    634     /// (if applicable), and includes the number of values added into it.
    635     void AddInlineAsmOperands(unsigned Kind,
    636                               bool HasMatching, unsigned MatchingIdx,
    637                               SelectionDAG &DAG,
    638                               std::vector<SDValue> &Ops) const;
    639   };
    640 }
    641 
    642 /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
    643 /// this value and returns the result as a ValueVT value.  This uses
    644 /// Chain/Flag as the input and updates them for the output Chain/Flag.
    645 /// If the Flag pointer is NULL, no flag is used.
    646 SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
    647                                       FunctionLoweringInfo &FuncInfo,
    648                                       DebugLoc dl,
    649                                       SDValue &Chain, SDValue *Flag) const {
    650   // A Value with type {} or [0 x %t] needs no registers.
    651   if (ValueVTs.empty())
    652     return SDValue();
    653 
    654   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
    655 
    656   // Assemble the legal parts into the final values.
    657   SmallVector<SDValue, 4> Values(ValueVTs.size());
    658   SmallVector<SDValue, 8> Parts;
    659   for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
    660     // Copy the legal parts from the registers.
    661     EVT ValueVT = ValueVTs[Value];
    662     unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
    663     EVT RegisterVT = RegVTs[Value];
    664 
    665     Parts.resize(NumRegs);
    666     for (unsigned i = 0; i != NumRegs; ++i) {
    667       SDValue P;
    668       if (Flag == 0) {
    669         P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
    670       } else {
    671         P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
    672         *Flag = P.getValue(2);
    673       }
    674 
    675       Chain = P.getValue(1);
    676       Parts[i] = P;
    677 
    678       // If the source register was virtual and if we know something about it,
    679       // add an assert node.
    680       if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
    681           !RegisterVT.isInteger() || RegisterVT.isVector())
    682         continue;
    683 
    684       const FunctionLoweringInfo::LiveOutInfo *LOI =
    685         FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
    686       if (!LOI)
    687         continue;
    688 
    689       unsigned RegSize = RegisterVT.getSizeInBits();
    690       unsigned NumSignBits = LOI->NumSignBits;
    691       unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes();
    692 
    693       // FIXME: We capture more information than the dag can represent.  For
    694       // now, just use the tightest assertzext/assertsext possible.
    695       bool isSExt = true;
    696       EVT FromVT(MVT::Other);
    697       if (NumSignBits == RegSize)
    698         isSExt = true, FromVT = MVT::i1;   // ASSERT SEXT 1
    699       else if (NumZeroBits >= RegSize-1)
    700         isSExt = false, FromVT = MVT::i1;  // ASSERT ZEXT 1
    701       else if (NumSignBits > RegSize-8)
    702         isSExt = true, FromVT = MVT::i8;   // ASSERT SEXT 8
    703       else if (NumZeroBits >= RegSize-8)
    704         isSExt = false, FromVT = MVT::i8;  // ASSERT ZEXT 8
    705       else if (NumSignBits > RegSize-16)
    706         isSExt = true, FromVT = MVT::i16;  // ASSERT SEXT 16
    707       else if (NumZeroBits >= RegSize-16)
    708         isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
    709       else if (NumSignBits > RegSize-32)
    710         isSExt = true, FromVT = MVT::i32;  // ASSERT SEXT 32
    711       else if (NumZeroBits >= RegSize-32)
    712         isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
    713       else
    714         continue;
    715 
    716       // Add an assertion node.
    717       assert(FromVT != MVT::Other);
    718       Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
    719                              RegisterVT, P, DAG.getValueType(FromVT));
    720     }
    721 
    722     Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
    723                                      NumRegs, RegisterVT, ValueVT);
    724     Part += NumRegs;
    725     Parts.clear();
    726   }
    727 
    728   return DAG.getNode(ISD::MERGE_VALUES, dl,
    729                      DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
    730                      &Values[0], ValueVTs.size());
    731 }
    732 
    733 /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
    734 /// specified value into the registers specified by this object.  This uses
    735 /// Chain/Flag as the input and updates them for the output Chain/Flag.
    736 /// If the Flag pointer is NULL, no flag is used.
    737 void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
    738                                  SDValue &Chain, SDValue *Flag) const {
    739   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
    740 
    741   // Get the list of the values's legal parts.
    742   unsigned NumRegs = Regs.size();
    743   SmallVector<SDValue, 8> Parts(NumRegs);
    744   for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
    745     EVT ValueVT = ValueVTs[Value];
    746     unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
    747     EVT RegisterVT = RegVTs[Value];
    748 
    749     getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
    750                    &Parts[Part], NumParts, RegisterVT);
    751     Part += NumParts;
    752   }
    753 
    754   // Copy the parts into the registers.
    755   SmallVector<SDValue, 8> Chains(NumRegs);
    756   for (unsigned i = 0; i != NumRegs; ++i) {
    757     SDValue Part;
    758     if (Flag == 0) {
    759       Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
    760     } else {
    761       Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
    762       *Flag = Part.getValue(1);
    763     }
    764 
    765     Chains[i] = Part.getValue(0);
    766   }
    767 
    768   if (NumRegs == 1 || Flag)
    769     // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
    770     // flagged to it. That is the CopyToReg nodes and the user are considered
    771     // a single scheduling unit. If we create a TokenFactor and return it as
    772     // chain, then the TokenFactor is both a predecessor (operand) of the
    773     // user as well as a successor (the TF operands are flagged to the user).
    774     // c1, f1 = CopyToReg
    775     // c2, f2 = CopyToReg
    776     // c3     = TokenFactor c1, c2
    777     // ...
    778     //        = op c3, ..., f2
    779     Chain = Chains[NumRegs-1];
    780   else
    781     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs);
    782 }
    783 
    784 /// AddInlineAsmOperands - Add this value to the specified inlineasm node
    785 /// operand list.  This adds the code marker and includes the number of
    786 /// values added into it.
    787 void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
    788                                         unsigned MatchingIdx,
    789                                         SelectionDAG &DAG,
    790                                         std::vector<SDValue> &Ops) const {
    791   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
    792 
    793   unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
    794   if (HasMatching)
    795     Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
    796   else if (!Regs.empty() &&
    797            TargetRegisterInfo::isVirtualRegister(Regs.front())) {
    798     // Put the register class of the virtual registers in the flag word.  That
    799     // way, later passes can recompute register class constraints for inline
    800     // assembly as well as normal instructions.
    801     // Don't do this for tied operands that can use the regclass information
    802     // from the def.
    803     const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
    804     const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
    805     Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
    806   }
    807 
    808   SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
    809   Ops.push_back(Res);
    810 
    811   for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
    812     unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
    813     EVT RegisterVT = RegVTs[Value];
    814     for (unsigned i = 0; i != NumRegs; ++i) {
    815       assert(Reg < Regs.size() && "Mismatch in # registers expected");
    816       Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
    817     }
    818   }
    819 }
    820 
    821 void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
    822                                const TargetLibraryInfo *li) {
    823   AA = &aa;
    824   GFI = gfi;
    825   LibInfo = li;
    826   TD = DAG.getTarget().getTargetData();
    827   LPadToCallSiteMap.clear();
    828 }
    829 
    830 /// clear - Clear out the current SelectionDAG and the associated
    831 /// state and prepare this SelectionDAGBuilder object to be used
    832 /// for a new block. This doesn't clear out information about
    833 /// additional blocks that are needed to complete switch lowering
    834 /// or PHI node updating; that information is cleared out as it is
    835 /// consumed.
    836 void SelectionDAGBuilder::clear() {
    837   NodeMap.clear();
    838   UnusedArgNodeMap.clear();
    839   PendingLoads.clear();
    840   PendingExports.clear();
    841   CurDebugLoc = DebugLoc();
    842   HasTailCall = false;
    843 }
    844 
    845 /// clearDanglingDebugInfo - Clear the dangling debug information
    846 /// map. This function is seperated from the clear so that debug
    847 /// information that is dangling in a basic block can be properly
    848 /// resolved in a different basic block. This allows the
    849 /// SelectionDAG to resolve dangling debug information attached
    850 /// to PHI nodes.
    851 void SelectionDAGBuilder::clearDanglingDebugInfo() {
    852   DanglingDebugInfoMap.clear();
    853 }
    854 
    855 /// getRoot - Return the current virtual root of the Selection DAG,
    856 /// flushing any PendingLoad items. This must be done before emitting
    857 /// a store or any other node that may need to be ordered after any
    858 /// prior load instructions.
    859 ///
    860 SDValue SelectionDAGBuilder::getRoot() {
    861   if (PendingLoads.empty())
    862     return DAG.getRoot();
    863 
    864   if (PendingLoads.size() == 1) {
    865     SDValue Root = PendingLoads[0];
    866     DAG.setRoot(Root);
    867     PendingLoads.clear();
    868     return Root;
    869   }
    870 
    871   // Otherwise, we have to make a token factor node.
    872   SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
    873                                &PendingLoads[0], PendingLoads.size());
    874   PendingLoads.clear();
    875   DAG.setRoot(Root);
    876   return Root;
    877 }
    878 
    879 /// getControlRoot - Similar to getRoot, but instead of flushing all the
    880 /// PendingLoad items, flush all the PendingExports items. It is necessary
    881 /// to do this before emitting a terminator instruction.
    882 ///
    883 SDValue SelectionDAGBuilder::getControlRoot() {
    884   SDValue Root = DAG.getRoot();
    885 
    886   if (PendingExports.empty())
    887     return Root;
    888 
    889   // Turn all of the CopyToReg chains into one factored node.
    890   if (Root.getOpcode() != ISD::EntryToken) {
    891     unsigned i = 0, e = PendingExports.size();
    892     for (; i != e; ++i) {
    893       assert(PendingExports[i].getNode()->getNumOperands() > 1);
    894       if (PendingExports[i].getNode()->getOperand(0) == Root)
    895         break;  // Don't add the root if we already indirectly depend on it.
    896     }
    897 
    898     if (i == e)
    899       PendingExports.push_back(Root);
    900   }
    901 
    902   Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
    903                      &PendingExports[0],
    904                      PendingExports.size());
    905   PendingExports.clear();
    906   DAG.setRoot(Root);
    907   return Root;
    908 }
    909 
    910 void SelectionDAGBuilder::AssignOrderingToNode(const SDNode *Node) {
    911   if (DAG.GetOrdering(Node) != 0) return; // Already has ordering.
    912   DAG.AssignOrdering(Node, SDNodeOrder);
    913 
    914   for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I)
    915     AssignOrderingToNode(Node->getOperand(I).getNode());
    916 }
    917 
    918 void SelectionDAGBuilder::visit(const Instruction &I) {
    919   // Set up outgoing PHI node register values before emitting the terminator.
    920   if (isa<TerminatorInst>(&I))
    921     HandlePHINodesInSuccessorBlocks(I.getParent());
    922 
    923   CurDebugLoc = I.getDebugLoc();
    924 
    925   visit(I.getOpcode(), I);
    926 
    927   if (!isa<TerminatorInst>(&I) && !HasTailCall)
    928     CopyToExportRegsIfNeeded(&I);
    929 
    930   CurDebugLoc = DebugLoc();
    931 }
    932 
    933 void SelectionDAGBuilder::visitPHI(const PHINode &) {
    934   llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
    935 }
    936 
    937 void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
    938   // Note: this doesn't use InstVisitor, because it has to work with
    939   // ConstantExpr's in addition to instructions.
    940   switch (Opcode) {
    941   default: llvm_unreachable("Unknown instruction type encountered!");
    942     // Build the switch statement using the Instruction.def file.
    943 #define HANDLE_INST(NUM, OPCODE, CLASS) \
    944     case Instruction::OPCODE: visit##OPCODE((CLASS&)I); break;
    945 #include "llvm/Instruction.def"
    946   }
    947 
    948   // Assign the ordering to the freshly created DAG nodes.
    949   if (NodeMap.count(&I)) {
    950     ++SDNodeOrder;
    951     AssignOrderingToNode(getValue(&I).getNode());
    952   }
    953 }
    954 
    955 // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
    956 // generate the debug data structures now that we've seen its definition.
    957 void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
    958                                                    SDValue Val) {
    959   DanglingDebugInfo &DDI = DanglingDebugInfoMap[V];
    960   if (DDI.getDI()) {
    961     const DbgValueInst *DI = DDI.getDI();
    962     DebugLoc dl = DDI.getdl();
    963     unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
    964     MDNode *Variable = DI->getVariable();
    965     uint64_t Offset = DI->getOffset();
    966     SDDbgValue *SDV;
    967     if (Val.getNode()) {
    968       if (!EmitFuncArgumentDbgValue(V, Variable, Offset, Val)) {
    969         SDV = DAG.getDbgValue(Variable, Val.getNode(),
    970                               Val.getResNo(), Offset, dl, DbgSDNodeOrder);
    971         DAG.AddDbgValue(SDV, Val.getNode(), false);
    972       }
    973     } else
    974       DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
    975     DanglingDebugInfoMap[V] = DanglingDebugInfo();
    976   }
    977 }
    978 
    979 /// getValue - Return an SDValue for the given Value.
    980 SDValue SelectionDAGBuilder::getValue(const Value *V) {
    981   // If we already have an SDValue for this value, use it. It's important
    982   // to do this first, so that we don't create a CopyFromReg if we already
    983   // have a regular SDValue.
    984   SDValue &N = NodeMap[V];
    985   if (N.getNode()) return N;
    986 
    987   // If there's a virtual register allocated and initialized for this
    988   // value, use it.
    989   DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
    990   if (It != FuncInfo.ValueMap.end()) {
    991     unsigned InReg = It->second;
    992     RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
    993     SDValue Chain = DAG.getEntryNode();
    994     N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
    995     resolveDanglingDebugInfo(V, N);
    996     return N;
    997   }
    998 
    999   // Otherwise create a new SDValue and remember it.
   1000   SDValue Val = getValueImpl(V);
   1001   NodeMap[V] = Val;
   1002   resolveDanglingDebugInfo(V, Val);
   1003   return Val;
   1004 }
   1005 
   1006 /// getNonRegisterValue - Return an SDValue for the given Value, but
   1007 /// don't look in FuncInfo.ValueMap for a virtual register.
   1008 SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
   1009   // If we already have an SDValue for this value, use it.
   1010   SDValue &N = NodeMap[V];
   1011   if (N.getNode()) return N;
   1012 
   1013   // Otherwise create a new SDValue and remember it.
   1014   SDValue Val = getValueImpl(V);
   1015   NodeMap[V] = Val;
   1016   resolveDanglingDebugInfo(V, Val);
   1017   return Val;
   1018 }
   1019 
   1020 /// getValueImpl - Helper function for getValue and getNonRegisterValue.
   1021 /// Create an SDValue for the given value.
   1022 SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
   1023   if (const Constant *C = dyn_cast<Constant>(V)) {
   1024     EVT VT = TLI.getValueType(V->getType(), true);
   1025 
   1026     if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
   1027       return DAG.getConstant(*CI, VT);
   1028 
   1029     if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
   1030       return DAG.getGlobalAddress(GV, getCurDebugLoc(), VT);
   1031 
   1032     if (isa<ConstantPointerNull>(C))
   1033       return DAG.getConstant(0, TLI.getPointerTy());
   1034 
   1035     if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
   1036       return DAG.getConstantFP(*CFP, VT);
   1037 
   1038     if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
   1039       return DAG.getUNDEF(VT);
   1040 
   1041     if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
   1042       visit(CE->getOpcode(), *CE);
   1043       SDValue N1 = NodeMap[V];
   1044       assert(N1.getNode() && "visit didn't populate the NodeMap!");
   1045       return N1;
   1046     }
   1047 
   1048     if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
   1049       SmallVector<SDValue, 4> Constants;
   1050       for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
   1051            OI != OE; ++OI) {
   1052         SDNode *Val = getValue(*OI).getNode();
   1053         // If the operand is an empty aggregate, there are no values.
   1054         if (!Val) continue;
   1055         // Add each leaf value from the operand to the Constants list
   1056         // to form a flattened list of all the values.
   1057         for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
   1058           Constants.push_back(SDValue(Val, i));
   1059       }
   1060 
   1061       return DAG.getMergeValues(&Constants[0], Constants.size(),
   1062                                 getCurDebugLoc());
   1063     }
   1064 
   1065     if (const ConstantDataSequential *CDS =
   1066           dyn_cast<ConstantDataSequential>(C)) {
   1067       SmallVector<SDValue, 4> Ops;
   1068       for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
   1069         SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
   1070         // Add each leaf value from the operand to the Constants list
   1071         // to form a flattened list of all the values.
   1072         for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
   1073           Ops.push_back(SDValue(Val, i));
   1074       }
   1075 
   1076       if (isa<ArrayType>(CDS->getType()))
   1077         return DAG.getMergeValues(&Ops[0], Ops.size(), getCurDebugLoc());
   1078       return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
   1079                                       VT, &Ops[0], Ops.size());
   1080     }
   1081 
   1082     if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
   1083       assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
   1084              "Unknown struct or array constant!");
   1085 
   1086       SmallVector<EVT, 4> ValueVTs;
   1087       ComputeValueVTs(TLI, C->getType(), ValueVTs);
   1088       unsigned NumElts = ValueVTs.size();
   1089       if (NumElts == 0)
   1090         return SDValue(); // empty struct
   1091       SmallVector<SDValue, 4> Constants(NumElts);
   1092       for (unsigned i = 0; i != NumElts; ++i) {
   1093         EVT EltVT = ValueVTs[i];
   1094         if (isa<UndefValue>(C))
   1095           Constants[i] = DAG.getUNDEF(EltVT);
   1096         else if (EltVT.isFloatingPoint())
   1097           Constants[i] = DAG.getConstantFP(0, EltVT);
   1098         else
   1099           Constants[i] = DAG.getConstant(0, EltVT);
   1100       }
   1101 
   1102       return DAG.getMergeValues(&Constants[0], NumElts,
   1103                                 getCurDebugLoc());
   1104     }
   1105 
   1106     if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
   1107       return DAG.getBlockAddress(BA, VT);
   1108 
   1109     VectorType *VecTy = cast<VectorType>(V->getType());
   1110     unsigned NumElements = VecTy->getNumElements();
   1111 
   1112     // Now that we know the number and type of the elements, get that number of
   1113     // elements into the Ops array based on what kind of constant it is.
   1114     SmallVector<SDValue, 16> Ops;
   1115     if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
   1116       for (unsigned i = 0; i != NumElements; ++i)
   1117         Ops.push_back(getValue(CV->getOperand(i)));
   1118     } else {
   1119       assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
   1120       EVT EltVT = TLI.getValueType(VecTy->getElementType());
   1121 
   1122       SDValue Op;
   1123       if (EltVT.isFloatingPoint())
   1124         Op = DAG.getConstantFP(0, EltVT);
   1125       else
   1126         Op = DAG.getConstant(0, EltVT);
   1127       Ops.assign(NumElements, Op);
   1128     }
   1129 
   1130     // Create a BUILD_VECTOR node.
   1131     return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
   1132                                     VT, &Ops[0], Ops.size());
   1133   }
   1134 
   1135   // If this is a static alloca, generate it as the frameindex instead of
   1136   // computation.
   1137   if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
   1138     DenseMap<const AllocaInst*, int>::iterator SI =
   1139       FuncInfo.StaticAllocaMap.find(AI);
   1140     if (SI != FuncInfo.StaticAllocaMap.end())
   1141       return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
   1142   }
   1143 
   1144   // If this is an instruction which fast-isel has deferred, select it now.
   1145   if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
   1146     unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
   1147     RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType());
   1148     SDValue Chain = DAG.getEntryNode();
   1149     return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
   1150   }
   1151 
   1152   llvm_unreachable("Can't get register for value!");
   1153 }
   1154 
   1155 void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
   1156   SDValue Chain = getControlRoot();
   1157   SmallVector<ISD::OutputArg, 8> Outs;
   1158   SmallVector<SDValue, 8> OutVals;
   1159 
   1160   if (!FuncInfo.CanLowerReturn) {
   1161     unsigned DemoteReg = FuncInfo.DemoteRegister;
   1162     const Function *F = I.getParent()->getParent();
   1163 
   1164     // Emit a store of the return value through the virtual register.
   1165     // Leave Outs empty so that LowerReturn won't try to load return
   1166     // registers the usual way.
   1167     SmallVector<EVT, 1> PtrValueVTs;
   1168     ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()),
   1169                     PtrValueVTs);
   1170 
   1171     SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
   1172     SDValue RetOp = getValue(I.getOperand(0));
   1173 
   1174     SmallVector<EVT, 4> ValueVTs;
   1175     SmallVector<uint64_t, 4> Offsets;
   1176     ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
   1177     unsigned NumValues = ValueVTs.size();
   1178 
   1179     SmallVector<SDValue, 4> Chains(NumValues);
   1180     for (unsigned i = 0; i != NumValues; ++i) {
   1181       SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(),
   1182                                 RetPtr.getValueType(), RetPtr,
   1183                                 DAG.getIntPtrConstant(Offsets[i]));
   1184       Chains[i] =
   1185         DAG.getStore(Chain, getCurDebugLoc(),
   1186                      SDValue(RetOp.getNode(), RetOp.getResNo() + i),
   1187                      // FIXME: better loc info would be nice.
   1188                      Add, MachinePointerInfo(), false, false, 0);
   1189     }
   1190 
   1191     Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
   1192                         MVT::Other, &Chains[0], NumValues);
   1193   } else if (I.getNumOperands() != 0) {
   1194     SmallVector<EVT, 4> ValueVTs;
   1195     ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs);
   1196     unsigned NumValues = ValueVTs.size();
   1197     if (NumValues) {
   1198       SDValue RetOp = getValue(I.getOperand(0));
   1199       for (unsigned j = 0, f = NumValues; j != f; ++j) {
   1200         EVT VT = ValueVTs[j];
   1201 
   1202         ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
   1203 
   1204         const Function *F = I.getParent()->getParent();
   1205         if (F->paramHasAttr(0, Attribute::SExt))
   1206           ExtendKind = ISD::SIGN_EXTEND;
   1207         else if (F->paramHasAttr(0, Attribute::ZExt))
   1208           ExtendKind = ISD::ZERO_EXTEND;
   1209 
   1210         if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
   1211           VT = TLI.getTypeForExtArgOrReturn(*DAG.getContext(), VT, ExtendKind);
   1212 
   1213         unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
   1214         EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
   1215         SmallVector<SDValue, 4> Parts(NumParts);
   1216         getCopyToParts(DAG, getCurDebugLoc(),
   1217                        SDValue(RetOp.getNode(), RetOp.getResNo() + j),
   1218                        &Parts[0], NumParts, PartVT, ExtendKind);
   1219 
   1220         // 'inreg' on function refers to return value
   1221         ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
   1222         if (F->paramHasAttr(0, Attribute::InReg))
   1223           Flags.setInReg();
   1224 
   1225         // Propagate extension type if any
   1226         if (ExtendKind == ISD::SIGN_EXTEND)
   1227           Flags.setSExt();
   1228         else if (ExtendKind == ISD::ZERO_EXTEND)
   1229           Flags.setZExt();
   1230 
   1231         for (unsigned i = 0; i < NumParts; ++i) {
   1232           Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
   1233                                         /*isfixed=*/true));
   1234           OutVals.push_back(Parts[i]);
   1235         }
   1236       }
   1237     }
   1238   }
   1239 
   1240   bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
   1241   CallingConv::ID CallConv =
   1242     DAG.getMachineFunction().getFunction()->getCallingConv();
   1243   Chain = TLI.LowerReturn(Chain, CallConv, isVarArg,
   1244                           Outs, OutVals, getCurDebugLoc(), DAG);
   1245 
   1246   // Verify that the target's LowerReturn behaved as expected.
   1247   assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
   1248          "LowerReturn didn't return a valid chain!");
   1249 
   1250   // Update the DAG with the new chain value resulting from return lowering.
   1251   DAG.setRoot(Chain);
   1252 }
   1253 
   1254 /// CopyToExportRegsIfNeeded - If the given value has virtual registers
   1255 /// created for it, emit nodes to copy the value into the virtual
   1256 /// registers.
   1257 void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
   1258   // Skip empty types
   1259   if (V->getType()->isEmptyTy())
   1260     return;
   1261 
   1262   DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
   1263   if (VMI != FuncInfo.ValueMap.end()) {
   1264     assert(!V->use_empty() && "Unused value assigned virtual registers!");
   1265     CopyValueToVirtualRegister(V, VMI->second);
   1266   }
   1267 }
   1268 
   1269 /// ExportFromCurrentBlock - If this condition isn't known to be exported from
   1270 /// the current basic block, add it to ValueMap now so that we'll get a
   1271 /// CopyTo/FromReg.
   1272 void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
   1273   // No need to export constants.
   1274   if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
   1275 
   1276   // Already exported?
   1277   if (FuncInfo.isExportedInst(V)) return;
   1278 
   1279   unsigned Reg = FuncInfo.InitializeRegForValue(V);
   1280   CopyValueToVirtualRegister(V, Reg);
   1281 }
   1282 
   1283 bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
   1284                                                      const BasicBlock *FromBB) {
   1285   // The operands of the setcc have to be in this block.  We don't know
   1286   // how to export them from some other block.
   1287   if (const Instruction *VI = dyn_cast<Instruction>(V)) {
   1288     // Can export from current BB.
   1289     if (VI->getParent() == FromBB)
   1290       return true;
   1291 
   1292     // Is already exported, noop.
   1293     return FuncInfo.isExportedInst(V);
   1294   }
   1295 
   1296   // If this is an argument, we can export it if the BB is the entry block or
   1297   // if it is already exported.
   1298   if (isa<Argument>(V)) {
   1299     if (FromBB == &FromBB->getParent()->getEntryBlock())
   1300       return true;
   1301 
   1302     // Otherwise, can only export this if it is already exported.
   1303     return FuncInfo.isExportedInst(V);
   1304   }
   1305 
   1306   // Otherwise, constants can always be exported.
   1307   return true;
   1308 }
   1309 
   1310 /// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
   1311 uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src,
   1312                                             const MachineBasicBlock *Dst) const {
   1313   BranchProbabilityInfo *BPI = FuncInfo.BPI;
   1314   if (!BPI)
   1315     return 0;
   1316   const BasicBlock *SrcBB = Src->getBasicBlock();
   1317   const BasicBlock *DstBB = Dst->getBasicBlock();
   1318   return BPI->getEdgeWeight(SrcBB, DstBB);
   1319 }
   1320 
   1321 void SelectionDAGBuilder::
   1322 addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
   1323                        uint32_t Weight /* = 0 */) {
   1324   if (!Weight)
   1325     Weight = getEdgeWeight(Src, Dst);
   1326   Src->addSuccessor(Dst, Weight);
   1327 }
   1328 
   1329 
   1330 static bool InBlock(const Value *V, const BasicBlock *BB) {
   1331   if (const Instruction *I = dyn_cast<Instruction>(V))
   1332     return I->getParent() == BB;
   1333   return true;
   1334 }
   1335 
   1336 /// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
   1337 /// This function emits a branch and is used at the leaves of an OR or an
   1338 /// AND operator tree.
   1339 ///
   1340 void
   1341 SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
   1342                                                   MachineBasicBlock *TBB,
   1343                                                   MachineBasicBlock *FBB,
   1344                                                   MachineBasicBlock *CurBB,
   1345                                                   MachineBasicBlock *SwitchBB) {
   1346   const BasicBlock *BB = CurBB->getBasicBlock();
   1347 
   1348   // If the leaf of the tree is a comparison, merge the condition into
   1349   // the caseblock.
   1350   if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
   1351     // The operands of the cmp have to be in this block.  We don't know
   1352     // how to export them from some other block.  If this is the first block
   1353     // of the sequence, no exporting is needed.
   1354     if (CurBB == SwitchBB ||
   1355         (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
   1356          isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
   1357       ISD::CondCode Condition;
   1358       if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
   1359         Condition = getICmpCondCode(IC->getPredicate());
   1360       } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
   1361         Condition = getFCmpCondCode(FC->getPredicate());
   1362         if (TM.Options.NoNaNsFPMath)
   1363           Condition = getFCmpCodeWithoutNaN(Condition);
   1364       } else {
   1365         Condition = ISD::SETEQ; // silence warning.
   1366         llvm_unreachable("Unknown compare instruction");
   1367       }
   1368 
   1369       CaseBlock CB(Condition, BOp->getOperand(0),
   1370                    BOp->getOperand(1), NULL, TBB, FBB, CurBB);
   1371       SwitchCases.push_back(CB);
   1372       return;
   1373     }
   1374   }
   1375 
   1376   // Create a CaseBlock record representing this branch.
   1377   CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
   1378                NULL, TBB, FBB, CurBB);
   1379   SwitchCases.push_back(CB);
   1380 }
   1381 
   1382 /// FindMergedConditions - If Cond is an expression like
   1383 void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
   1384                                                MachineBasicBlock *TBB,
   1385                                                MachineBasicBlock *FBB,
   1386                                                MachineBasicBlock *CurBB,
   1387                                                MachineBasicBlock *SwitchBB,
   1388                                                unsigned Opc) {
   1389   // If this node is not part of the or/and tree, emit it as a branch.
   1390   const Instruction *BOp = dyn_cast<Instruction>(Cond);
   1391   if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
   1392       (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
   1393       BOp->getParent() != CurBB->getBasicBlock() ||
   1394       !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
   1395       !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
   1396     EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB);
   1397     return;
   1398   }
   1399 
   1400   //  Create TmpBB after CurBB.
   1401   MachineFunction::iterator BBI = CurBB;
   1402   MachineFunction &MF = DAG.getMachineFunction();
   1403   MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
   1404   CurBB->getParent()->insert(++BBI, TmpBB);
   1405 
   1406   if (Opc == Instruction::Or) {
   1407     // Codegen X | Y as:
   1408     //   jmp_if_X TBB
   1409     //   jmp TmpBB
   1410     // TmpBB:
   1411     //   jmp_if_Y TBB
   1412     //   jmp FBB
   1413     //
   1414 
   1415     // Emit the LHS condition.
   1416     FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc);
   1417 
   1418     // Emit the RHS condition into TmpBB.
   1419     FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
   1420   } else {
   1421     assert(Opc == Instruction::And && "Unknown merge op!");
   1422     // Codegen X & Y as:
   1423     //   jmp_if_X TmpBB
   1424     //   jmp FBB
   1425     // TmpBB:
   1426     //   jmp_if_Y TBB
   1427     //   jmp FBB
   1428     //
   1429     //  This requires creation of TmpBB after CurBB.
   1430 
   1431     // Emit the LHS condition.
   1432     FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc);
   1433 
   1434     // Emit the RHS condition into TmpBB.
   1435     FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
   1436   }
   1437 }
   1438 
   1439 /// If the set of cases should be emitted as a series of branches, return true.
   1440 /// If we should emit this as a bunch of and/or'd together conditions, return
   1441 /// false.
   1442 bool
   1443 SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
   1444   if (Cases.size() != 2) return true;
   1445 
   1446   // If this is two comparisons of the same values or'd or and'd together, they
   1447   // will get folded into a single comparison, so don't emit two blocks.
   1448   if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
   1449        Cases[0].CmpRHS == Cases[1].CmpRHS) ||
   1450       (Cases[0].CmpRHS == Cases[1].CmpLHS &&
   1451        Cases[0].CmpLHS == Cases[1].CmpRHS)) {
   1452     return false;
   1453   }
   1454 
   1455   // Handle: (X != null) | (Y != null) --> (X|Y) != 0
   1456   // Handle: (X == null) & (Y == null) --> (X|Y) == 0
   1457   if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
   1458       Cases[0].CC == Cases[1].CC &&
   1459       isa<Constant>(Cases[0].CmpRHS) &&
   1460       cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
   1461     if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
   1462       return false;
   1463     if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
   1464       return false;
   1465   }
   1466 
   1467   return true;
   1468 }
   1469 
   1470 void SelectionDAGBuilder::visitBr(const BranchInst &I) {
   1471   MachineBasicBlock *BrMBB = FuncInfo.MBB;
   1472 
   1473   // Update machine-CFG edges.
   1474   MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
   1475 
   1476   // Figure out which block is immediately after the current one.
   1477   MachineBasicBlock *NextBlock = 0;
   1478   MachineFunction::iterator BBI = BrMBB;
   1479   if (++BBI != FuncInfo.MF->end())
   1480     NextBlock = BBI;
   1481 
   1482   if (I.isUnconditional()) {
   1483     // Update machine-CFG edges.
   1484     BrMBB->addSuccessor(Succ0MBB);
   1485 
   1486     // If this is not a fall-through branch, emit the branch.
   1487     if (Succ0MBB != NextBlock)
   1488       DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
   1489                               MVT::Other, getControlRoot(),
   1490                               DAG.getBasicBlock(Succ0MBB)));
   1491 
   1492     return;
   1493   }
   1494 
   1495   // If this condition is one of the special cases we handle, do special stuff
   1496   // now.
   1497   const Value *CondVal = I.getCondition();
   1498   MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
   1499 
   1500   // If this is a series of conditions that are or'd or and'd together, emit
   1501   // this as a sequence of branches instead of setcc's with and/or operations.
   1502   // As long as jumps are not expensive, this should improve performance.
   1503   // For example, instead of something like:
   1504   //     cmp A, B
   1505   //     C = seteq
   1506   //     cmp D, E
   1507   //     F = setle
   1508   //     or C, F
   1509   //     jnz foo
   1510   // Emit:
   1511   //     cmp A, B
   1512   //     je foo
   1513   //     cmp D, E
   1514   //     jle foo
   1515   //
   1516   if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
   1517     if (!TLI.isJumpExpensive() &&
   1518         BOp->hasOneUse() &&
   1519         (BOp->getOpcode() == Instruction::And ||
   1520          BOp->getOpcode() == Instruction::Or)) {
   1521       FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
   1522                            BOp->getOpcode());
   1523       // If the compares in later blocks need to use values not currently
   1524       // exported from this block, export them now.  This block should always
   1525       // be the first entry.
   1526       assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
   1527 
   1528       // Allow some cases to be rejected.
   1529       if (ShouldEmitAsBranches(SwitchCases)) {
   1530         for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
   1531           ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
   1532           ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
   1533         }
   1534 
   1535         // Emit the branch for this block.
   1536         visitSwitchCase(SwitchCases[0], BrMBB);
   1537         SwitchCases.erase(SwitchCases.begin());
   1538         return;
   1539       }
   1540 
   1541       // Okay, we decided not to do this, remove any inserted MBB's and clear
   1542       // SwitchCases.
   1543       for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
   1544         FuncInfo.MF->erase(SwitchCases[i].ThisBB);
   1545 
   1546       SwitchCases.clear();
   1547     }
   1548   }
   1549 
   1550   // Create a CaseBlock record representing this branch.
   1551   CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
   1552                NULL, Succ0MBB, Succ1MBB, BrMBB);
   1553 
   1554   // Use visitSwitchCase to actually insert the fast branch sequence for this
   1555   // cond branch.
   1556   visitSwitchCase(CB, BrMBB);
   1557 }
   1558 
   1559 /// visitSwitchCase - Emits the necessary code to represent a single node in
   1560 /// the binary search tree resulting from lowering a switch instruction.
   1561 void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
   1562                                           MachineBasicBlock *SwitchBB) {
   1563   SDValue Cond;
   1564   SDValue CondLHS = getValue(CB.CmpLHS);
   1565   DebugLoc dl = getCurDebugLoc();
   1566 
   1567   // Build the setcc now.
   1568   if (CB.CmpMHS == NULL) {
   1569     // Fold "(X == true)" to X and "(X == false)" to !X to
   1570     // handle common cases produced by branch lowering.
   1571     if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
   1572         CB.CC == ISD::SETEQ)
   1573       Cond = CondLHS;
   1574     else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
   1575              CB.CC == ISD::SETEQ) {
   1576       SDValue True = DAG.getConstant(1, CondLHS.getValueType());
   1577       Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
   1578     } else
   1579       Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
   1580   } else {
   1581     assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
   1582 
   1583     const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
   1584     const APInt& High  = cast<ConstantInt>(CB.CmpRHS)->getValue();
   1585 
   1586     SDValue CmpOp = getValue(CB.CmpMHS);
   1587     EVT VT = CmpOp.getValueType();
   1588 
   1589     if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
   1590       Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
   1591                           ISD::SETLE);
   1592     } else {
   1593       SDValue SUB = DAG.getNode(ISD::SUB, dl,
   1594                                 VT, CmpOp, DAG.getConstant(Low, VT));
   1595       Cond = DAG.getSetCC(dl, MVT::i1, SUB,
   1596                           DAG.getConstant(High-Low, VT), ISD::SETULE);
   1597     }
   1598   }
   1599 
   1600   // Update successor info
   1601   addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight);
   1602   addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
   1603 
   1604   // Set NextBlock to be the MBB immediately after the current one, if any.
   1605   // This is used to avoid emitting unnecessary branches to the next block.
   1606   MachineBasicBlock *NextBlock = 0;
   1607   MachineFunction::iterator BBI = SwitchBB;
   1608   if (++BBI != FuncInfo.MF->end())
   1609     NextBlock = BBI;
   1610 
   1611   // If the lhs block is the next block, invert the condition so that we can
   1612   // fall through to the lhs instead of the rhs block.
   1613   if (CB.TrueBB == NextBlock) {
   1614     std::swap(CB.TrueBB, CB.FalseBB);
   1615     SDValue True = DAG.getConstant(1, Cond.getValueType());
   1616     Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
   1617   }
   1618 
   1619   SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
   1620                                MVT::Other, getControlRoot(), Cond,
   1621                                DAG.getBasicBlock(CB.TrueBB));
   1622 
   1623   // Insert the false branch. Do this even if it's a fall through branch,
   1624   // this makes it easier to do DAG optimizations which require inverting
   1625   // the branch condition.
   1626   BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
   1627                        DAG.getBasicBlock(CB.FalseBB));
   1628 
   1629   DAG.setRoot(BrCond);
   1630 }
   1631 
   1632 /// visitJumpTable - Emit JumpTable node in the current MBB
   1633 void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
   1634   // Emit the code for the jump table
   1635   assert(JT.Reg != -1U && "Should lower JT Header first!");
   1636   EVT PTy = TLI.getPointerTy();
   1637   SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
   1638                                      JT.Reg, PTy);
   1639   SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
   1640   SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurDebugLoc(),
   1641                                     MVT::Other, Index.getValue(1),
   1642                                     Table, Index);
   1643   DAG.setRoot(BrJumpTable);
   1644 }
   1645 
   1646 /// visitJumpTableHeader - This function emits necessary code to produce index
   1647 /// in the JumpTable from switch case.
   1648 void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
   1649                                                JumpTableHeader &JTH,
   1650                                                MachineBasicBlock *SwitchBB) {
   1651   // Subtract the lowest switch case value from the value being switched on and
   1652   // conditional branch to default mbb if the result is greater than the
   1653   // difference between smallest and largest cases.
   1654   SDValue SwitchOp = getValue(JTH.SValue);
   1655   EVT VT = SwitchOp.getValueType();
   1656   SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
   1657                             DAG.getConstant(JTH.First, VT));
   1658 
   1659   // The SDNode we just created, which holds the value being switched on minus
   1660   // the smallest case value, needs to be copied to a virtual register so it
   1661   // can be used as an index into the jump table in a subsequent basic block.
   1662   // This value may be smaller or larger than the target's pointer type, and
   1663   // therefore require extension or truncating.
   1664   SwitchOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy());
   1665 
   1666   unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy());
   1667   SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
   1668                                     JumpTableReg, SwitchOp);
   1669   JT.Reg = JumpTableReg;
   1670 
   1671   // Emit the range check for the jump table, and branch to the default block
   1672   // for the switch statement if the value being switched on exceeds the largest
   1673   // case in the switch.
   1674   SDValue CMP = DAG.getSetCC(getCurDebugLoc(),
   1675                              TLI.getSetCCResultType(Sub.getValueType()), Sub,
   1676                              DAG.getConstant(JTH.Last-JTH.First,VT),
   1677                              ISD::SETUGT);
   1678 
   1679   // Set NextBlock to be the MBB immediately after the current one, if any.
   1680   // This is used to avoid emitting unnecessary branches to the next block.
   1681   MachineBasicBlock *NextBlock = 0;
   1682   MachineFunction::iterator BBI = SwitchBB;
   1683 
   1684   if (++BBI != FuncInfo.MF->end())
   1685     NextBlock = BBI;
   1686 
   1687   SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
   1688                                MVT::Other, CopyTo, CMP,
   1689                                DAG.getBasicBlock(JT.Default));
   1690 
   1691   if (JT.MBB != NextBlock)
   1692     BrCond = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond,
   1693                          DAG.getBasicBlock(JT.MBB));
   1694 
   1695   DAG.setRoot(BrCond);
   1696 }
   1697 
   1698 /// visitBitTestHeader - This function emits necessary code to produce value
   1699 /// suitable for "bit tests"
   1700 void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
   1701                                              MachineBasicBlock *SwitchBB) {
   1702   // Subtract the minimum value
   1703   SDValue SwitchOp = getValue(B.SValue);
   1704   EVT VT = SwitchOp.getValueType();
   1705   SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
   1706                             DAG.getConstant(B.First, VT));
   1707 
   1708   // Check range
   1709   SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(),
   1710                                   TLI.getSetCCResultType(Sub.getValueType()),
   1711                                   Sub, DAG.getConstant(B.Range, VT),
   1712                                   ISD::SETUGT);
   1713 
   1714   // Determine the type of the test operands.
   1715   bool UsePtrType = false;
   1716   if (!TLI.isTypeLegal(VT))
   1717     UsePtrType = true;
   1718   else {
   1719     for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
   1720       if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
   1721         // Switch table case range are encoded into series of masks.
   1722         // Just use pointer type, it's guaranteed to fit.
   1723         UsePtrType = true;
   1724         break;
   1725       }
   1726   }
   1727   if (UsePtrType) {
   1728     VT = TLI.getPointerTy();
   1729     Sub = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), VT);
   1730   }
   1731 
   1732   B.RegVT = VT;
   1733   B.Reg = FuncInfo.CreateReg(VT);
   1734   SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
   1735                                     B.Reg, Sub);
   1736 
   1737   // Set NextBlock to be the MBB immediately after the current one, if any.
   1738   // This is used to avoid emitting unnecessary branches to the next block.
   1739   MachineBasicBlock *NextBlock = 0;
   1740   MachineFunction::iterator BBI = SwitchBB;
   1741   if (++BBI != FuncInfo.MF->end())
   1742     NextBlock = BBI;
   1743 
   1744   MachineBasicBlock* MBB = B.Cases[0].ThisBB;
   1745 
   1746   addSuccessorWithWeight(SwitchBB, B.Default);
   1747   addSuccessorWithWeight(SwitchBB, MBB);
   1748 
   1749   SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
   1750                                 MVT::Other, CopyTo, RangeCmp,
   1751                                 DAG.getBasicBlock(B.Default));
   1752 
   1753   if (MBB != NextBlock)
   1754     BrRange = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo,
   1755                           DAG.getBasicBlock(MBB));
   1756 
   1757   DAG.setRoot(BrRange);
   1758 }
   1759 
   1760 /// visitBitTestCase - this function produces one "bit test"
   1761 void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
   1762                                            MachineBasicBlock* NextMBB,
   1763                                            unsigned Reg,
   1764                                            BitTestCase &B,
   1765                                            MachineBasicBlock *SwitchBB) {
   1766   EVT VT = BB.RegVT;
   1767   SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
   1768                                        Reg, VT);
   1769   SDValue Cmp;
   1770   unsigned PopCount = CountPopulation_64(B.Mask);
   1771   if (PopCount == 1) {
   1772     // Testing for a single bit; just compare the shift count with what it
   1773     // would need to be to shift a 1 bit in that position.
   1774     Cmp = DAG.getSetCC(getCurDebugLoc(),
   1775                        TLI.getSetCCResultType(VT),
   1776                        ShiftOp,
   1777                        DAG.getConstant(CountTrailingZeros_64(B.Mask), VT),
   1778                        ISD::SETEQ);
   1779   } else if (PopCount == BB.Range) {
   1780     // There is only one zero bit in the range, test for it directly.
   1781     Cmp = DAG.getSetCC(getCurDebugLoc(),
   1782                        TLI.getSetCCResultType(VT),
   1783                        ShiftOp,
   1784                        DAG.getConstant(CountTrailingOnes_64(B.Mask), VT),
   1785                        ISD::SETNE);
   1786   } else {
   1787     // Make desired shift
   1788     SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT,
   1789                                     DAG.getConstant(1, VT), ShiftOp);
   1790 
   1791     // Emit bit tests and jumps
   1792     SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
   1793                                 VT, SwitchVal, DAG.getConstant(B.Mask, VT));
   1794     Cmp = DAG.getSetCC(getCurDebugLoc(),
   1795                        TLI.getSetCCResultType(VT),
   1796                        AndOp, DAG.getConstant(0, VT),
   1797                        ISD::SETNE);
   1798   }
   1799 
   1800   addSuccessorWithWeight(SwitchBB, B.TargetBB);
   1801   addSuccessorWithWeight(SwitchBB, NextMBB);
   1802 
   1803   SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
   1804                               MVT::Other, getControlRoot(),
   1805                               Cmp, DAG.getBasicBlock(B.TargetBB));
   1806 
   1807   // Set NextBlock to be the MBB immediately after the current one, if any.
   1808   // This is used to avoid emitting unnecessary branches to the next block.
   1809   MachineBasicBlock *NextBlock = 0;
   1810   MachineFunction::iterator BBI = SwitchBB;
   1811   if (++BBI != FuncInfo.MF->end())
   1812     NextBlock = BBI;
   1813 
   1814   if (NextMBB != NextBlock)
   1815     BrAnd = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd,
   1816                         DAG.getBasicBlock(NextMBB));
   1817 
   1818   DAG.setRoot(BrAnd);
   1819 }
   1820 
   1821 void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
   1822   MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
   1823 
   1824   // Retrieve successors.
   1825   MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
   1826   MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
   1827 
   1828   const Value *Callee(I.getCalledValue());
   1829   if (isa<InlineAsm>(Callee))
   1830     visitInlineAsm(&I);
   1831   else
   1832     LowerCallTo(&I, getValue(Callee), false, LandingPad);
   1833 
   1834   // If the value of the invoke is used outside of its defining block, make it
   1835   // available as a virtual register.
   1836   CopyToExportRegsIfNeeded(&I);
   1837 
   1838   // Update successor info
   1839   addSuccessorWithWeight(InvokeMBB, Return);
   1840   addSuccessorWithWeight(InvokeMBB, LandingPad);
   1841 
   1842   // Drop into normal successor.
   1843   DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
   1844                           MVT::Other, getControlRoot(),
   1845                           DAG.getBasicBlock(Return)));
   1846 }
   1847 
   1848 void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
   1849   llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
   1850 }
   1851 
   1852 void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
   1853   assert(FuncInfo.MBB->isLandingPad() &&
   1854          "Call to landingpad not in landing pad!");
   1855 
   1856   MachineBasicBlock *MBB = FuncInfo.MBB;
   1857   MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
   1858   AddLandingPadInfo(LP, MMI, MBB);
   1859 
   1860   // If there aren't registers to copy the values into (e.g., during SjLj
   1861   // exceptions), then don't bother to create these DAG nodes.
   1862   if (TLI.getExceptionPointerRegister() == 0 &&
   1863       TLI.getExceptionSelectorRegister() == 0)
   1864     return;
   1865 
   1866   SmallVector<EVT, 2> ValueVTs;
   1867   ComputeValueVTs(TLI, LP.getType(), ValueVTs);
   1868 
   1869   // Insert the EXCEPTIONADDR instruction.
   1870   assert(FuncInfo.MBB->isLandingPad() &&
   1871          "Call to eh.exception not in landing pad!");
   1872   SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
   1873   SDValue Ops[2];
   1874   Ops[0] = DAG.getRoot();
   1875   SDValue Op1 = DAG.getNode(ISD::EXCEPTIONADDR, getCurDebugLoc(), VTs, Ops, 1);
   1876   SDValue Chain = Op1.getValue(1);
   1877 
   1878   // Insert the EHSELECTION instruction.
   1879   VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
   1880   Ops[0] = Op1;
   1881   Ops[1] = Chain;
   1882   SDValue Op2 = DAG.getNode(ISD::EHSELECTION, getCurDebugLoc(), VTs, Ops, 2);
   1883   Chain = Op2.getValue(1);
   1884   Op2 = DAG.getSExtOrTrunc(Op2, getCurDebugLoc(), MVT::i32);
   1885 
   1886   Ops[0] = Op1;
   1887   Ops[1] = Op2;
   1888   SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
   1889                             DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
   1890                             &Ops[0], 2);
   1891 
   1892   std::pair<SDValue, SDValue> RetPair = std::make_pair(Res, Chain);
   1893   setValue(&LP, RetPair.first);
   1894   DAG.setRoot(RetPair.second);
   1895 }
   1896 
   1897 /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
   1898 /// small case ranges).
   1899 bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
   1900                                                  CaseRecVector& WorkList,
   1901                                                  const Value* SV,
   1902                                                  MachineBasicBlock *Default,
   1903                                                  MachineBasicBlock *SwitchBB) {
   1904   Case& BackCase  = *(CR.Range.second-1);
   1905 
   1906   // Size is the number of Cases represented by this range.
   1907   size_t Size = CR.Range.second - CR.Range.first;
   1908   if (Size > 3)
   1909     return false;
   1910 
   1911   // Get the MachineFunction which holds the current MBB.  This is used when
   1912   // inserting any additional MBBs necessary to represent the switch.
   1913   MachineFunction *CurMF = FuncInfo.MF;
   1914 
   1915   // Figure out which block is immediately after the current one.
   1916   MachineBasicBlock *NextBlock = 0;
   1917   MachineFunction::iterator BBI = CR.CaseBB;
   1918 
   1919   if (++BBI != FuncInfo.MF->end())
   1920     NextBlock = BBI;
   1921 
   1922   // If any two of the cases has the same destination, and if one value
   1923   // is the same as the other, but has one bit unset that the other has set,
   1924   // use bit manipulation to do two compares at once.  For example:
   1925   // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
   1926   // TODO: This could be extended to merge any 2 cases in switches with 3 cases.
   1927   // TODO: Handle cases where CR.CaseBB != SwitchBB.
   1928   if (Size == 2 && CR.CaseBB == SwitchBB) {
   1929     Case &Small = *CR.Range.first;
   1930     Case &Big = *(CR.Range.second-1);
   1931 
   1932     if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) {
   1933       const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue();
   1934       const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue();
   1935 
   1936       // Check that there is only one bit different.
   1937       if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 &&
   1938           (SmallValue | BigValue) == BigValue) {
   1939         // Isolate the common bit.
   1940         APInt CommonBit = BigValue & ~SmallValue;
   1941         assert((SmallValue | CommonBit) == BigValue &&
   1942                CommonBit.countPopulation() == 1 && "Not a common bit?");
   1943 
   1944         SDValue CondLHS = getValue(SV);
   1945         EVT VT = CondLHS.getValueType();
   1946         DebugLoc DL = getCurDebugLoc();
   1947 
   1948         SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
   1949                                  DAG.getConstant(CommonBit, VT));
   1950         SDValue Cond = DAG.getSetCC(DL, MVT::i1,
   1951                                     Or, DAG.getConstant(BigValue, VT),
   1952                                     ISD::SETEQ);
   1953 
   1954         // Update successor info.
   1955         addSuccessorWithWeight(SwitchBB, Small.BB);
   1956         addSuccessorWithWeight(SwitchBB, Default);
   1957 
   1958         // Insert the true branch.
   1959         SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other,
   1960                                      getControlRoot(), Cond,
   1961                                      DAG.getBasicBlock(Small.BB));
   1962 
   1963         // Insert the false branch.
   1964         BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
   1965                              DAG.getBasicBlock(Default));
   1966 
   1967         DAG.setRoot(BrCond);
   1968         return true;
   1969       }
   1970     }
   1971   }
   1972 
   1973   // Rearrange the case blocks so that the last one falls through if possible.
   1974   if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
   1975     // The last case block won't fall through into 'NextBlock' if we emit the
   1976     // branches in this order.  See if rearranging a case value would help.
   1977     for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) {
   1978       if (I->BB == NextBlock) {
   1979         std::swap(*I, BackCase);
   1980         break;
   1981       }
   1982     }
   1983   }
   1984 
   1985   // Create a CaseBlock record representing a conditional branch to
   1986   // the Case's target mbb if the value being switched on SV is equal
   1987   // to C.
   1988   MachineBasicBlock *CurBlock = CR.CaseBB;
   1989   for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
   1990     MachineBasicBlock *FallThrough;
   1991     if (I != E-1) {
   1992       FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock());
   1993       CurMF->insert(BBI, FallThrough);
   1994 
   1995       // Put SV in a virtual register to make it available from the new blocks.
   1996       ExportFromCurrentBlock(SV);
   1997     } else {
   1998       // If the last case doesn't match, go to the default block.
   1999       FallThrough = Default;
   2000     }
   2001 
   2002     const Value *RHS, *LHS, *MHS;
   2003     ISD::CondCode CC;
   2004     if (I->High == I->Low) {
   2005       // This is just small small case range :) containing exactly 1 case
   2006       CC = ISD::SETEQ;
   2007       LHS = SV; RHS = I->High; MHS = NULL;
   2008     } else {
   2009       CC = ISD::SETLE;
   2010       LHS = I->Low; MHS = SV; RHS = I->High;
   2011     }
   2012 
   2013     uint32_t ExtraWeight = I->ExtraWeight;
   2014     CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough,
   2015                  /* me */ CurBlock,
   2016                  /* trueweight */ ExtraWeight / 2, /* falseweight */ ExtraWeight / 2);
   2017 
   2018     // If emitting the first comparison, just call visitSwitchCase to emit the
   2019     // code into the current block.  Otherwise, push the CaseBlock onto the
   2020     // vector to be later processed by SDISel, and insert the node's MBB
   2021     // before the next MBB.
   2022     if (CurBlock == SwitchBB)
   2023       visitSwitchCase(CB, SwitchBB);
   2024     else
   2025       SwitchCases.push_back(CB);
   2026 
   2027     CurBlock = FallThrough;
   2028   }
   2029 
   2030   return true;
   2031 }
   2032 
   2033 static inline bool areJTsAllowed(const TargetLowering &TLI) {
   2034   return !TLI.getTargetMachine().Options.DisableJumpTables &&
   2035           (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
   2036            TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
   2037 }
   2038 
   2039 static APInt ComputeRange(const APInt &First, const APInt &Last) {
   2040   uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
   2041   APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth);
   2042   return (LastExt - FirstExt + 1ULL);
   2043 }
   2044 
   2045 /// handleJTSwitchCase - Emit jumptable for current switch case range
   2046 bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
   2047                                              CaseRecVector &WorkList,
   2048                                              const Value *SV,
   2049                                              MachineBasicBlock *Default,
   2050                                              MachineBasicBlock *SwitchBB) {
   2051   Case& FrontCase = *CR.Range.first;
   2052   Case& BackCase  = *(CR.Range.second-1);
   2053 
   2054   const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
   2055   const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
   2056 
   2057   APInt TSize(First.getBitWidth(), 0);
   2058   for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
   2059     TSize += I->size();
   2060 
   2061   if (!areJTsAllowed(TLI) || TSize.ult(4))
   2062     return false;
   2063 
   2064   APInt Range = ComputeRange(First, Last);
   2065   // The density is TSize / Range. Require at least 40%.
   2066   // It should not be possible for IntTSize to saturate for sane code, but make
   2067   // sure we handle Range saturation correctly.
   2068   uint64_t IntRange = Range.getLimitedValue(UINT64_MAX/10);
   2069   uint64_t IntTSize = TSize.getLimitedValue(UINT64_MAX/10);
   2070   if (IntTSize * 10 < IntRange * 4)
   2071     return false;
   2072 
   2073   DEBUG(dbgs() << "Lowering jump table\n"
   2074                << "First entry: " << First << ". Last entry: " << Last << '\n'
   2075                << "Range: " << Range << ". Size: " << TSize << ".\n\n");
   2076 
   2077   // Get the MachineFunction which holds the current MBB.  This is used when
   2078   // inserting any additional MBBs necessary to represent the switch.
   2079   MachineFunction *CurMF = FuncInfo.MF;
   2080 
   2081   // Figure out which block is immediately after the current one.
   2082   MachineFunction::iterator BBI = CR.CaseBB;
   2083   ++BBI;
   2084 
   2085   const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
   2086 
   2087   // Create a new basic block to hold the code for loading the address
   2088   // of the jump table, and jumping to it.  Update successor information;
   2089   // we will either branch to the default case for the switch, or the jump
   2090   // table.
   2091   MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
   2092   CurMF->insert(BBI, JumpTableBB);
   2093 
   2094   addSuccessorWithWeight(CR.CaseBB, Default);
   2095   addSuccessorWithWeight(CR.CaseBB, JumpTableBB);
   2096 
   2097   // Build a vector of destination BBs, corresponding to each target
   2098   // of the jump table. If the value of the jump table slot corresponds to
   2099   // a case statement, push the case's BB onto the vector, otherwise, push
   2100   // the default BB.
   2101   std::vector<MachineBasicBlock*> DestBBs;
   2102   APInt TEI = First;
   2103   for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
   2104     const APInt &Low = cast<ConstantInt>(I->Low)->getValue();
   2105     const APInt &High = cast<ConstantInt>(I->High)->getValue();
   2106 
   2107     if (Low.sle(TEI) && TEI.sle(High)) {
   2108       DestBBs.push_back(I->BB);
   2109       if (TEI==High)
   2110         ++I;
   2111     } else {
   2112       DestBBs.push_back(Default);
   2113     }
   2114   }
   2115 
   2116   // Update successor info. Add one edge to each unique successor.
   2117   BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
   2118   for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
   2119          E = DestBBs.end(); I != E; ++I) {
   2120     if (!SuccsHandled[(*I)->getNumber()]) {
   2121       SuccsHandled[(*I)->getNumber()] = true;
   2122       addSuccessorWithWeight(JumpTableBB, *I);
   2123     }
   2124   }
   2125 
   2126   // Create a jump table index for this jump table.
   2127   unsigned JTEncoding = TLI.getJumpTableEncoding();
   2128   unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding)
   2129                        ->createJumpTableIndex(DestBBs);
   2130 
   2131   // Set the jump table information so that we can codegen it as a second
   2132   // MachineBasicBlock
   2133   JumpTable JT(-1U, JTI, JumpTableBB, Default);
   2134   JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB));
   2135   if (CR.CaseBB == SwitchBB)
   2136     visitJumpTableHeader(JT, JTH, SwitchBB);
   2137 
   2138   JTCases.push_back(JumpTableBlock(JTH, JT));
   2139   return true;
   2140 }
   2141 
   2142 /// handleBTSplitSwitchCase - emit comparison and split binary search tree into
   2143 /// 2 subtrees.
   2144 bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
   2145                                                   CaseRecVector& WorkList,
   2146                                                   const Value* SV,
   2147                                                   MachineBasicBlock *Default,
   2148                                                   MachineBasicBlock *SwitchBB) {
   2149   // Get the MachineFunction which holds the current MBB.  This is used when
   2150   // inserting any additional MBBs necessary to represent the switch.
   2151   MachineFunction *CurMF = FuncInfo.MF;
   2152 
   2153   // Figure out which block is immediately after the current one.
   2154   MachineFunction::iterator BBI = CR.CaseBB;
   2155   ++BBI;
   2156 
   2157   Case& FrontCase = *CR.Range.first;
   2158   Case& BackCase  = *(CR.Range.second-1);
   2159   const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
   2160 
   2161   // Size is the number of Cases represented by this range.
   2162   unsigned Size = CR.Range.second - CR.Range.first;
   2163 
   2164   const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
   2165   const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
   2166   double FMetric = 0;
   2167   CaseItr Pivot = CR.Range.first + Size/2;
   2168 
   2169   // Select optimal pivot, maximizing sum density of LHS and RHS. This will
   2170   // (heuristically) allow us to emit JumpTable's later.
   2171   APInt TSize(First.getBitWidth(), 0);
   2172   for (CaseItr I = CR.Range.first, E = CR.Range.second;
   2173        I!=E; ++I)
   2174     TSize += I->size();
   2175 
   2176   APInt LSize = FrontCase.size();
   2177   APInt RSize = TSize-LSize;
   2178   DEBUG(dbgs() << "Selecting best pivot: \n"
   2179                << "First: " << First << ", Last: " << Last <<'\n'
   2180                << "LSize: " << LSize << ", RSize: " << RSize << '\n');
   2181   for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
   2182        J!=E; ++I, ++J) {
   2183     const APInt &LEnd = cast<ConstantInt>(I->High)->getValue();
   2184     const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue();
   2185     APInt Range = ComputeRange(LEnd, RBegin);
   2186     assert((Range - 2ULL).isNonNegative() &&
   2187            "Invalid case distance");
   2188     // Use volatile double here to avoid excess precision issues on some hosts,
   2189     // e.g. that use 80-bit X87 registers.
   2190     volatile double LDensity =
   2191        (double)LSize.roundToDouble() /
   2192                            (LEnd - First + 1ULL).roundToDouble();
   2193     volatile double RDensity =
   2194       (double)RSize.roundToDouble() /
   2195                            (Last - RBegin + 1ULL).roundToDouble();
   2196     double Metric = Range.logBase2()*(LDensity+RDensity);
   2197     // Should always split in some non-trivial place
   2198     DEBUG(dbgs() <<"=>Step\n"
   2199                  << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
   2200                  << "LDensity: " << LDensity
   2201                  << ", RDensity: " << RDensity << '\n'
   2202                  << "Metric: " << Metric << '\n');
   2203     if (FMetric < Metric) {
   2204       Pivot = J;
   2205       FMetric = Metric;
   2206       DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n');
   2207     }
   2208 
   2209     LSize += J->size();
   2210     RSize -= J->size();
   2211   }
   2212   if (areJTsAllowed(TLI)) {
   2213     // If our case is dense we *really* should handle it earlier!
   2214     assert((FMetric > 0) && "Should handle dense range earlier!");
   2215   } else {
   2216     Pivot = CR.Range.first + Size/2;
   2217   }
   2218 
   2219   CaseRange LHSR(CR.Range.first, Pivot);
   2220   CaseRange RHSR(Pivot, CR.Range.second);
   2221   const Constant *C = Pivot->Low;
   2222   MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
   2223 
   2224   // We know that we branch to the LHS if the Value being switched on is
   2225   // less than the Pivot value, C.  We use this to optimize our binary
   2226   // tree a bit, by recognizing that if SV is greater than or equal to the
   2227   // LHS's Case Value, and that Case Value is exactly one less than the
   2228   // Pivot's Value, then we can branch directly to the LHS's Target,
   2229   // rather than creating a leaf node for it.
   2230   if ((LHSR.second - LHSR.first) == 1 &&
   2231       LHSR.first->High == CR.GE &&
   2232       cast<ConstantInt>(C)->getValue() ==
   2233       (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {
   2234     TrueBB = LHSR.first->BB;
   2235   } else {
   2236     TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);
   2237     CurMF->insert(BBI, TrueBB);
   2238     WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR));
   2239 
   2240     // Put SV in a virtual register to make it available from the new blocks.
   2241     ExportFromCurrentBlock(SV);
   2242   }
   2243 
   2244   // Similar to the optimization above, if the Value being switched on is
   2245   // known to be less than the Constant CR.LT, and the current Case Value
   2246   // is CR.LT - 1, then we can branch directly to the target block for
   2247   // the current Case Value, rather than emitting a RHS leaf node for it.
   2248   if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
   2249       cast<ConstantInt>(RHSR.first->Low)->getValue() ==
   2250       (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {
   2251     FalseBB = RHSR.first->BB;
   2252   } else {
   2253     FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
   2254     CurMF->insert(BBI, FalseBB);
   2255     WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR));
   2256 
   2257     // Put SV in a virtual register to make it available from the new blocks.
   2258     ExportFromCurrentBlock(SV);
   2259   }
   2260 
   2261   // Create a CaseBlock record representing a conditional branch to
   2262   // the LHS node if the value being switched on SV is less than C.
   2263   // Otherwise, branch to LHS.
   2264   CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
   2265 
   2266   if (CR.CaseBB == SwitchBB)
   2267     visitSwitchCase(CB, SwitchBB);
   2268   else
   2269     SwitchCases.push_back(CB);
   2270 
   2271   return true;
   2272 }
   2273 
   2274 /// handleBitTestsSwitchCase - if current case range has few destination and
   2275 /// range span less, than machine word bitwidth, encode case range into series
   2276 /// of masks and emit bit tests with these masks.
   2277 bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
   2278                                                    CaseRecVector& WorkList,
   2279                                                    const Value* SV,
   2280                                                    MachineBasicBlock* Default,
   2281                                                    MachineBasicBlock *SwitchBB){
   2282   EVT PTy = TLI.getPointerTy();
   2283   unsigned IntPtrBits = PTy.getSizeInBits();
   2284 
   2285   Case& FrontCase = *CR.Range.first;
   2286   Case& BackCase  = *(CR.Range.second-1);
   2287 
   2288   // Get the MachineFunction which holds the current MBB.  This is used when
   2289   // inserting any additional MBBs necessary to represent the switch.
   2290   MachineFunction *CurMF = FuncInfo.MF;
   2291 
   2292   // If target does not have legal shift left, do not emit bit tests at all.
   2293   if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy()))
   2294     return false;
   2295 
   2296   size_t numCmps = 0;
   2297   for (CaseItr I = CR.Range.first, E = CR.Range.second;
   2298        I!=E; ++I) {
   2299     // Single case counts one, case range - two.
   2300     numCmps += (I->Low == I->High ? 1 : 2);
   2301   }
   2302 
   2303   // Count unique destinations
   2304   SmallSet<MachineBasicBlock*, 4> Dests;
   2305   for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
   2306     Dests.insert(I->BB);
   2307     if (Dests.size() > 3)
   2308       // Don't bother the code below, if there are too much unique destinations
   2309       return false;
   2310   }
   2311   DEBUG(dbgs() << "Total number of unique destinations: "
   2312         << Dests.size() << '\n'
   2313         << "Total number of comparisons: " << numCmps << '\n');
   2314 
   2315   // Compute span of values.
   2316   const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue();
   2317   const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue();
   2318   APInt cmpRange = maxValue - minValue;
   2319 
   2320   DEBUG(dbgs() << "Compare range: " << cmpRange << '\n'
   2321                << "Low bound: " << minValue << '\n'
   2322                << "High bound: " << maxValue << '\n');
   2323 
   2324   if (cmpRange.uge(IntPtrBits) ||
   2325       (!(Dests.size() == 1 && numCmps >= 3) &&
   2326        !(Dests.size() == 2 && numCmps >= 5) &&
   2327        !(Dests.size() >= 3 && numCmps >= 6)))
   2328     return false;
   2329 
   2330   DEBUG(dbgs() << "Emitting bit tests\n");
   2331   APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth());
   2332 
   2333   // Optimize the case where all the case values fit in a
   2334   // word without having to subtract minValue. In this case,
   2335   // we can optimize away the subtraction.
   2336   if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) {
   2337     cmpRange = maxValue;
   2338   } else {
   2339     lowBound = minValue;
   2340   }
   2341 
   2342   CaseBitsVector CasesBits;
   2343   unsigned i, count = 0;
   2344 
   2345   for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
   2346     MachineBasicBlock* Dest = I->BB;
   2347     for (i = 0; i < count; ++i)
   2348       if (Dest == CasesBits[i].BB)
   2349         break;
   2350 
   2351     if (i == count) {
   2352       assert((count < 3) && "Too much destinations to test!");
   2353       CasesBits.push_back(CaseBits(0, Dest, 0));
   2354       count++;
   2355     }
   2356 
   2357     const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue();
   2358     const APInt& highValue = cast<ConstantInt>(I->High)->getValue();
   2359 
   2360     uint64_t lo = (lowValue - lowBound).getZExtValue();
   2361     uint64_t hi = (highValue - lowBound).getZExtValue();
   2362 
   2363     for (uint64_t j = lo; j <= hi; j++) {
   2364       CasesBits[i].Mask |=  1ULL << j;
   2365       CasesBits[i].Bits++;
   2366     }
   2367 
   2368   }
   2369   std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp());
   2370 
   2371   BitTestInfo BTC;
   2372 
   2373   // Figure out which block is immediately after the current one.
   2374   MachineFunction::iterator BBI = CR.CaseBB;
   2375   ++BBI;
   2376 
   2377   const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
   2378 
   2379   DEBUG(dbgs() << "Cases:\n");
   2380   for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
   2381     DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask
   2382                  << ", Bits: " << CasesBits[i].Bits
   2383                  << ", BB: " << CasesBits[i].BB << '\n');
   2384 
   2385     MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
   2386     CurMF->insert(BBI, CaseBB);
   2387     BTC.push_back(BitTestCase(CasesBits[i].Mask,
   2388                               CaseBB,
   2389                               CasesBits[i].BB));
   2390 
   2391     // Put SV in a virtual register to make it available from the new blocks.
   2392     ExportFromCurrentBlock(SV);
   2393   }
   2394 
   2395   BitTestBlock BTB(lowBound, cmpRange, SV,
   2396                    -1U, MVT::Other, (CR.CaseBB == SwitchBB),
   2397                    CR.CaseBB, Default, BTC);
   2398 
   2399   if (CR.CaseBB == SwitchBB)
   2400     visitBitTestHeader(BTB, SwitchBB);
   2401 
   2402   BitTestCases.push_back(BTB);
   2403 
   2404   return true;
   2405 }
   2406 
   2407 /// Clusterify - Transform simple list of Cases into list of CaseRange's
   2408 size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
   2409                                        const SwitchInst& SI) {
   2410   size_t numCmps = 0;
   2411 
   2412   BranchProbabilityInfo *BPI = FuncInfo.BPI;
   2413   // Start with "simple" cases
   2414   for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
   2415        i != e; ++i) {
   2416     const BasicBlock *SuccBB = i.getCaseSuccessor();
   2417     MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
   2418 
   2419     uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0;
   2420 
   2421     Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(),
   2422                          SMBB, ExtraWeight));
   2423   }
   2424   std::sort(Cases.begin(), Cases.end(), CaseCmp());
   2425 
   2426   // Merge case into clusters
   2427   if (Cases.size() >= 2)
   2428     // Must recompute end() each iteration because it may be
   2429     // invalidated by erase if we hold on to it
   2430     for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin());
   2431          J != Cases.end(); ) {
   2432       const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
   2433       const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
   2434       MachineBasicBlock* nextBB = J->BB;
   2435       MachineBasicBlock* currentBB = I->BB;
   2436 
   2437       // If the two neighboring cases go to the same destination, merge them
   2438       // into a single case.
   2439       if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
   2440         I->High = J->High;
   2441         J = Cases.erase(J);
   2442 
   2443         if (BranchProbabilityInfo *BPI = FuncInfo.BPI) {
   2444           uint32_t CurWeight = currentBB->getBasicBlock() ?
   2445             BPI->getEdgeWeight(SI.getParent(), currentBB->getBasicBlock()) : 16;
   2446           uint32_t NextWeight = nextBB->getBasicBlock() ?
   2447             BPI->getEdgeWeight(SI.getParent(), nextBB->getBasicBlock()) : 16;
   2448 
   2449           BPI->setEdgeWeight(SI.getParent(), currentBB->getBasicBlock(),
   2450                              CurWeight + NextWeight);
   2451         }
   2452       } else {
   2453         I = J++;
   2454       }
   2455     }
   2456 
   2457   for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
   2458     if (I->Low != I->High)
   2459       // A range counts double, since it requires two compares.
   2460       ++numCmps;
   2461   }
   2462 
   2463   return numCmps;
   2464 }
   2465 
   2466 void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
   2467                                            MachineBasicBlock *Last) {
   2468   // Update JTCases.
   2469   for (unsigned i = 0, e = JTCases.size(); i != e; ++i)
   2470     if (JTCases[i].first.HeaderBB == First)
   2471       JTCases[i].first.HeaderBB = Last;
   2472 
   2473   // Update BitTestCases.
   2474   for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
   2475     if (BitTestCases[i].Parent == First)
   2476       BitTestCases[i].Parent = Last;
   2477 }
   2478 
   2479 void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
   2480   MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
   2481 
   2482   // Figure out which block is immediately after the current one.
   2483   MachineBasicBlock *NextBlock = 0;
   2484   MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
   2485 
   2486   // If there is only the default destination, branch to it if it is not the
   2487   // next basic block.  Otherwise, just fall through.
   2488   if (!SI.getNumCases()) {
   2489     // Update machine-CFG edges.
   2490 
   2491     // If this is not a fall-through branch, emit the branch.
   2492     SwitchMBB->addSuccessor(Default);
   2493     if (Default != NextBlock)
   2494       DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
   2495                               MVT::Other, getControlRoot(),
   2496                               DAG.getBasicBlock(Default)));
   2497 
   2498     return;
   2499   }
   2500 
   2501   // If there are any non-default case statements, create a vector of Cases
   2502   // representing each one, and sort the vector so that we can efficiently
   2503   // create a binary search tree from them.
   2504   CaseVector Cases;
   2505   size_t numCmps = Clusterify(Cases, SI);
   2506   DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
   2507                << ". Total compares: " << numCmps << '\n');
   2508   (void)numCmps;
   2509 
   2510   // Get the Value to be switched on and default basic blocks, which will be
   2511   // inserted into CaseBlock records, representing basic blocks in the binary
   2512   // search tree.
   2513   const Value *SV = SI.getCondition();
   2514 
   2515   // Push the initial CaseRec onto the worklist
   2516   CaseRecVector WorkList;
   2517   WorkList.push_back(CaseRec(SwitchMBB,0,0,
   2518                              CaseRange(Cases.begin(),Cases.end())));
   2519 
   2520   while (!WorkList.empty()) {
   2521     // Grab a record representing a case range to process off the worklist
   2522     CaseRec CR = WorkList.back();
   2523     WorkList.pop_back();
   2524 
   2525     if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
   2526       continue;
   2527 
   2528     // If the range has few cases (two or less) emit a series of specific
   2529     // tests.
   2530     if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB))
   2531       continue;
   2532 
   2533     // If the switch has more than 5 blocks, and at least 40% dense, and the
   2534     // target supports indirect branches, then emit a jump table rather than
   2535     // lowering the switch to a binary tree of conditional branches.
   2536     if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
   2537       continue;
   2538 
   2539     // Emit binary tree. We need to pick a pivot, and push left and right ranges
   2540     // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
   2541     handleBTSplitSwitchCase(CR, WorkList, SV, Default, SwitchMBB);
   2542   }
   2543 }
   2544 
   2545 void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
   2546   MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
   2547 
   2548   // Update machine-CFG edges with unique successors.
   2549   SmallVector<BasicBlock*, 32> succs;
   2550   succs.reserve(I.getNumSuccessors());
   2551   for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i)
   2552     succs.push_back(I.getSuccessor(i));
   2553   array_pod_sort(succs.begin(), succs.end());
   2554   succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
   2555   for (unsigned i = 0, e = succs.size(); i != e; ++i) {
   2556     MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]];
   2557     addSuccessorWithWeight(IndirectBrMBB, Succ);
   2558   }
   2559 
   2560   DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
   2561                           MVT::Other, getControlRoot(),
   2562                           getValue(I.getAddress())));
   2563 }
   2564 
   2565 void SelectionDAGBuilder::visitFSub(const User &I) {
   2566   // -0.0 - X --> fneg
   2567   Type *Ty = I.getType();
   2568   if (isa<Constant>(I.getOperand(0)) &&
   2569       I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) {
   2570     SDValue Op2 = getValue(I.getOperand(1));
   2571     setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
   2572                              Op2.getValueType(), Op2));
   2573     return;
   2574   }
   2575 
   2576   visitBinary(I, ISD::FSUB);
   2577 }
   2578 
   2579 void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
   2580   SDValue Op1 = getValue(I.getOperand(0));
   2581   SDValue Op2 = getValue(I.getOperand(1));
   2582   setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(),
   2583                            Op1.getValueType(), Op1, Op2));
   2584 }
   2585 
   2586 void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
   2587   SDValue Op1 = getValue(I.getOperand(0));
   2588   SDValue Op2 = getValue(I.getOperand(1));
   2589 
   2590   MVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType());
   2591 
   2592   // Coerce the shift amount to the right type if we can.
   2593   if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
   2594     unsigned ShiftSize = ShiftTy.getSizeInBits();
   2595     unsigned Op2Size = Op2.getValueType().getSizeInBits();
   2596     DebugLoc DL = getCurDebugLoc();
   2597 
   2598     // If the operand is smaller than the shift count type, promote it.
   2599     if (ShiftSize > Op2Size)
   2600       Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
   2601 
   2602     // If the operand is larger than the shift count type but the shift
   2603     // count type has enough bits to represent any shift value, truncate
   2604     // it now. This is a common case and it exposes the truncate to
   2605     // optimization early.
   2606     else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
   2607       Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
   2608     // Otherwise we'll need to temporarily settle for some other convenient
   2609     // type.  Type legalization will make adjustments once the shiftee is split.
   2610     else
   2611       Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
   2612   }
   2613 
   2614   setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(),
   2615                            Op1.getValueType(), Op1, Op2));
   2616 }
   2617 
   2618 void SelectionDAGBuilder::visitSDiv(const User &I) {
   2619   SDValue Op1 = getValue(I.getOperand(0));
   2620   SDValue Op2 = getValue(I.getOperand(1));
   2621 
   2622   // Turn exact SDivs into multiplications.
   2623   // FIXME: This should be in DAGCombiner, but it doesn't have access to the
   2624   // exact bit.
   2625   if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() &&
   2626       !isa<ConstantSDNode>(Op1) &&
   2627       isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue())
   2628     setValue(&I, TLI.BuildExactSDIV(Op1, Op2, getCurDebugLoc(), DAG));
   2629   else
   2630     setValue(&I, DAG.getNode(ISD::SDIV, getCurDebugLoc(), Op1.getValueType(),
   2631                              Op1, Op2));
   2632 }
   2633 
   2634 void SelectionDAGBuilder::visitICmp(const User &I) {
   2635   ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
   2636   if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
   2637     predicate = IC->getPredicate();
   2638   else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
   2639     predicate = ICmpInst::Predicate(IC->getPredicate());
   2640   SDValue Op1 = getValue(I.getOperand(0));
   2641   SDValue Op2 = getValue(I.getOperand(1));
   2642   ISD::CondCode Opcode = getICmpCondCode(predicate);
   2643 
   2644   EVT DestVT = TLI.getValueType(I.getType());
   2645   setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode));
   2646 }
   2647 
   2648 void SelectionDAGBuilder::visitFCmp(const User &I) {
   2649   FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
   2650   if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
   2651     predicate = FC->getPredicate();
   2652   else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
   2653     predicate = FCmpInst::Predicate(FC->getPredicate());
   2654   SDValue Op1 = getValue(I.getOperand(0));
   2655   SDValue Op2 = getValue(I.getOperand(1));
   2656   ISD::CondCode Condition = getFCmpCondCode(predicate);
   2657   if (TM.Options.NoNaNsFPMath)
   2658     Condition = getFCmpCodeWithoutNaN(Condition);
   2659   EVT DestVT = TLI.getValueType(I.getType());
   2660   setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
   2661 }
   2662 
   2663 void SelectionDAGBuilder::visitSelect(const User &I) {
   2664   SmallVector<EVT, 4> ValueVTs;
   2665   ComputeValueVTs(TLI, I.getType(), ValueVTs);
   2666   unsigned NumValues = ValueVTs.size();
   2667   if (NumValues == 0) return;
   2668 
   2669   SmallVector<SDValue, 4> Values(NumValues);
   2670   SDValue Cond     = getValue(I.getOperand(0));
   2671   SDValue TrueVal  = getValue(I.getOperand(1));
   2672   SDValue FalseVal = getValue(I.getOperand(2));
   2673   ISD::NodeType OpCode = Cond.getValueType().isVector() ?
   2674     ISD::VSELECT : ISD::SELECT;
   2675 
   2676   for (unsigned i = 0; i != NumValues; ++i)
   2677     Values[i] = DAG.getNode(OpCode, getCurDebugLoc(),
   2678                             TrueVal.getNode()->getValueType(TrueVal.getResNo()+i),
   2679                             Cond,
   2680                             SDValue(TrueVal.getNode(),
   2681                                     TrueVal.getResNo() + i),
   2682                             SDValue(FalseVal.getNode(),
   2683                                     FalseVal.getResNo() + i));
   2684 
   2685   setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
   2686                            DAG.getVTList(&ValueVTs[0], NumValues),
   2687                            &Values[0], NumValues));
   2688 }
   2689 
   2690 void SelectionDAGBuilder::visitTrunc(const User &I) {
   2691   // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
   2692   SDValue N = getValue(I.getOperand(0));
   2693   EVT DestVT = TLI.getValueType(I.getType());
   2694   setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
   2695 }
   2696 
   2697 void SelectionDAGBuilder::visitZExt(const User &I) {
   2698   // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
   2699   // ZExt also can't be a cast to bool for same reason. So, nothing much to do
   2700   SDValue N = getValue(I.getOperand(0));
   2701   EVT DestVT = TLI.getValueType(I.getType());
   2702   setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N));
   2703 }
   2704 
   2705 void SelectionDAGBuilder::visitSExt(const User &I) {
   2706   // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
   2707   // SExt also can't be a cast to bool for same reason. So, nothing much to do
   2708   SDValue N = getValue(I.getOperand(0));
   2709   EVT DestVT = TLI.getValueType(I.getType());
   2710   setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N));
   2711 }
   2712 
   2713 void SelectionDAGBuilder::visitFPTrunc(const User &I) {
   2714   // FPTrunc is never a no-op cast, no need to check
   2715   SDValue N = getValue(I.getOperand(0));
   2716   EVT DestVT = TLI.getValueType(I.getType());
   2717   setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
   2718                            DestVT, N,
   2719                            DAG.getTargetConstant(0, TLI.getPointerTy())));
   2720 }
   2721 
   2722 void SelectionDAGBuilder::visitFPExt(const User &I){
   2723   // FPExt is never a no-op cast, no need to check
   2724   SDValue N = getValue(I.getOperand(0));
   2725   EVT DestVT = TLI.getValueType(I.getType());
   2726   setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
   2727 }
   2728 
   2729 void SelectionDAGBuilder::visitFPToUI(const User &I) {
   2730   // FPToUI is never a no-op cast, no need to check
   2731   SDValue N = getValue(I.getOperand(0));
   2732   EVT DestVT = TLI.getValueType(I.getType());
   2733   setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N));
   2734 }
   2735 
   2736 void SelectionDAGBuilder::visitFPToSI(const User &I) {
   2737   // FPToSI is never a no-op cast, no need to check
   2738   SDValue N = getValue(I.getOperand(0));
   2739   EVT DestVT = TLI.getValueType(I.getType());
   2740   setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N));
   2741 }
   2742 
   2743 void SelectionDAGBuilder::visitUIToFP(const User &I) {
   2744   // UIToFP is never a no-op cast, no need to check
   2745   SDValue N = getValue(I.getOperand(0));
   2746   EVT DestVT = TLI.getValueType(I.getType());
   2747   setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N));
   2748 }
   2749 
   2750 void SelectionDAGBuilder::visitSIToFP(const User &I){
   2751   // SIToFP is never a no-op cast, no need to check
   2752   SDValue N = getValue(I.getOperand(0));
   2753   EVT DestVT = TLI.getValueType(I.getType());
   2754   setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N));
   2755 }
   2756 
   2757 void SelectionDAGBuilder::visitPtrToInt(const User &I) {
   2758   // What to do depends on the size of the integer and the size of the pointer.
   2759   // We can either truncate, zero extend, or no-op, accordingly.
   2760   SDValue N = getValue(I.getOperand(0));
   2761   EVT DestVT = TLI.getValueType(I.getType());
   2762   setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
   2763 }
   2764 
   2765 void SelectionDAGBuilder::visitIntToPtr(const User &I) {
   2766   // What to do depends on the size of the integer and the size of the pointer.
   2767   // We can either truncate, zero extend, or no-op, accordingly.
   2768   SDValue N = getValue(I.getOperand(0));
   2769   EVT DestVT = TLI.getValueType(I.getType());
   2770   setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
   2771 }
   2772 
   2773 void SelectionDAGBuilder::visitBitCast(const User &I) {
   2774   SDValue N = getValue(I.getOperand(0));
   2775   EVT DestVT = TLI.getValueType(I.getType());
   2776 
   2777   // BitCast assures us that source and destination are the same size so this is
   2778   // either a BITCAST or a no-op.
   2779   if (DestVT != N.getValueType())
   2780     setValue(&I, DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
   2781                              DestVT, N)); // convert types.
   2782   else
   2783     setValue(&I, N);            // noop cast.
   2784 }
   2785 
   2786 void SelectionDAGBuilder::visitInsertElement(const User &I) {
   2787   SDValue InVec = getValue(I.getOperand(0));
   2788   SDValue InVal = getValue(I.getOperand(1));
   2789   SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
   2790                               TLI.getPointerTy(),
   2791                               getValue(I.getOperand(2)));
   2792   setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(),
   2793                            TLI.getValueType(I.getType()),
   2794                            InVec, InVal, InIdx));
   2795 }
   2796 
   2797 void SelectionDAGBuilder::visitExtractElement(const User &I) {
   2798   SDValue InVec = getValue(I.getOperand(0));
   2799   SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
   2800                               TLI.getPointerTy(),
   2801                               getValue(I.getOperand(1)));
   2802   setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
   2803                            TLI.getValueType(I.getType()), InVec, InIdx));
   2804 }
   2805 
   2806 // Utility for visitShuffleVector - Return true if every element in Mask,
   2807 // begining from position Pos and ending in Pos+Size, falls within the
   2808 // specified sequential range [L, L+Pos). or is undef.
   2809 static bool isSequentialInRange(const SmallVectorImpl<int> &Mask,
   2810                                 unsigned Pos, unsigned Size, int Low) {
   2811   for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
   2812     if (Mask[i] >= 0 && Mask[i] != Low)
   2813       return false;
   2814   return true;
   2815 }
   2816 
   2817 void SelectionDAGBuilder::visitShuffleVector(const User &I) {
   2818   SDValue Src1 = getValue(I.getOperand(0));
   2819   SDValue Src2 = getValue(I.getOperand(1));
   2820 
   2821   SmallVector<int, 8> Mask;
   2822   ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
   2823   unsigned MaskNumElts = Mask.size();
   2824 
   2825   EVT VT = TLI.getValueType(I.getType());
   2826   EVT SrcVT = Src1.getValueType();
   2827   unsigned SrcNumElts = SrcVT.getVectorNumElements();
   2828 
   2829   if (SrcNumElts == MaskNumElts) {
   2830     setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
   2831                                       &Mask[0]));
   2832     return;
   2833   }
   2834 
   2835   // Normalize the shuffle vector since mask and vector length don't match.
   2836   if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
   2837     // Mask is longer than the source vectors and is a multiple of the source
   2838     // vectors.  We can use concatenate vector to make the mask and vectors
   2839     // lengths match.
   2840     if (SrcNumElts*2 == MaskNumElts) {
   2841       // First check for Src1 in low and Src2 in high
   2842       if (isSequentialInRange(Mask, 0, SrcNumElts, 0) &&
   2843           isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) {
   2844         // The shuffle is concatenating two vectors together.
   2845         setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
   2846                                  VT, Src1, Src2));
   2847         return;
   2848       }
   2849       // Then check for Src2 in low and Src1 in high
   2850       if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) &&
   2851           isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) {
   2852         // The shuffle is concatenating two vectors together.
   2853         setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
   2854                                  VT, Src2, Src1));
   2855         return;
   2856       }
   2857     }
   2858 
   2859     // Pad both vectors with undefs to make them the same length as the mask.
   2860     unsigned NumConcat = MaskNumElts / SrcNumElts;
   2861     bool Src1U = Src1.getOpcode() == ISD::UNDEF;
   2862     bool Src2U = Src2.getOpcode() == ISD::UNDEF;
   2863     SDValue UndefVal = DAG.getUNDEF(SrcVT);
   2864 
   2865     SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
   2866     SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
   2867     MOps1[0] = Src1;
   2868     MOps2[0] = Src2;
   2869 
   2870     Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
   2871                                                   getCurDebugLoc(), VT,
   2872                                                   &MOps1[0], NumConcat);
   2873     Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
   2874                                                   getCurDebugLoc(), VT,
   2875                                                   &MOps2[0], NumConcat);
   2876 
   2877     // Readjust mask for new input vector length.
   2878     SmallVector<int, 8> MappedOps;
   2879     for (unsigned i = 0; i != MaskNumElts; ++i) {
   2880       int Idx = Mask[i];
   2881       if (Idx >= (int)SrcNumElts)
   2882         Idx -= SrcNumElts - MaskNumElts;
   2883       MappedOps.push_back(Idx);
   2884     }
   2885 
   2886     setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
   2887                                       &MappedOps[0]));
   2888     return;
   2889   }
   2890 
   2891   if (SrcNumElts > MaskNumElts) {
   2892     // Analyze the access pattern of the vector to see if we can extract
   2893     // two subvectors and do the shuffle. The analysis is done by calculating
   2894     // the range of elements the mask access on both vectors.
   2895     int MinRange[2] = { static_cast<int>(SrcNumElts),
   2896                         static_cast<int>(SrcNumElts)};
   2897     int MaxRange[2] = {-1, -1};
   2898 
   2899     for (unsigned i = 0; i != MaskNumElts; ++i) {
   2900       int Idx = Mask[i];
   2901       unsigned Input = 0;
   2902       if (Idx < 0)
   2903         continue;
   2904 
   2905       if (Idx >= (int)SrcNumElts) {
   2906         Input = 1;
   2907         Idx -= SrcNumElts;
   2908       }
   2909       if (Idx > MaxRange[Input])
   2910         MaxRange[Input] = Idx;
   2911       if (Idx < MinRange[Input])
   2912         MinRange[Input] = Idx;
   2913     }
   2914 
   2915     // Check if the access is smaller than the vector size and can we find
   2916     // a reasonable extract index.
   2917     int RangeUse[2] = { -1, -1 };  // 0 = Unused, 1 = Extract, -1 = Can not
   2918                                    // Extract.
   2919     int StartIdx[2];  // StartIdx to extract from
   2920     for (unsigned Input = 0; Input < 2; ++Input) {
   2921       if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) {
   2922         RangeUse[Input] = 0; // Unused
   2923         StartIdx[Input] = 0;
   2924         continue;
   2925       }
   2926 
   2927       // Find a good start index that is a multiple of the mask length. Then
   2928       // see if the rest of the elements are in range.
   2929       StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
   2930       if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
   2931           StartIdx[Input] + MaskNumElts <= SrcNumElts)
   2932         RangeUse[Input] = 1; // Extract from a multiple of the mask length.
   2933     }
   2934 
   2935     if (RangeUse[0] == 0 && RangeUse[1] == 0) {
   2936       setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
   2937       return;
   2938     }
   2939     if (RangeUse[0] >= 0 && RangeUse[1] >= 0) {
   2940       // Extract appropriate subvector and generate a vector shuffle
   2941       for (unsigned Input = 0; Input < 2; ++Input) {
   2942         SDValue &Src = Input == 0 ? Src1 : Src2;
   2943         if (RangeUse[Input] == 0)
   2944           Src = DAG.getUNDEF(VT);
   2945         else
   2946           Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT,
   2947                             Src, DAG.getIntPtrConstant(StartIdx[Input]));
   2948       }
   2949 
   2950       // Calculate new mask.
   2951       SmallVector<int, 8> MappedOps;
   2952       for (unsigned i = 0; i != MaskNumElts; ++i) {
   2953         int Idx = Mask[i];
   2954         if (Idx >= 0) {
   2955           if (Idx < (int)SrcNumElts)
   2956             Idx -= StartIdx[0];
   2957           else
   2958             Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
   2959         }
   2960         MappedOps.push_back(Idx);
   2961       }
   2962 
   2963       setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
   2964                                         &MappedOps[0]));
   2965       return;
   2966     }
   2967   }
   2968 
   2969   // We can't use either concat vectors or extract subvectors so fall back to
   2970   // replacing the shuffle with extract and build vector.
   2971   // to insert and build vector.
   2972   EVT EltVT = VT.getVectorElementType();
   2973   EVT PtrVT = TLI.getPointerTy();
   2974   SmallVector<SDValue,8> Ops;
   2975   for (unsigned i = 0; i != MaskNumElts; ++i) {
   2976     int Idx = Mask[i];
   2977     SDValue Res;
   2978 
   2979     if (Idx < 0) {
   2980       Res = DAG.getUNDEF(EltVT);
   2981     } else {
   2982       SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
   2983       if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
   2984 
   2985       Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
   2986                         EltVT, Src, DAG.getConstant(Idx, PtrVT));
   2987     }
   2988 
   2989     Ops.push_back(Res);
   2990   }
   2991 
   2992   setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
   2993                            VT, &Ops[0], Ops.size()));
   2994 }
   2995 
   2996 void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
   2997   const Value *Op0 = I.getOperand(0);
   2998   const Value *Op1 = I.getOperand(1);
   2999   Type *AggTy = I.getType();
   3000   Type *ValTy = Op1->getType();
   3001   bool IntoUndef = isa<UndefValue>(Op0);
   3002   bool FromUndef = isa<UndefValue>(Op1);
   3003 
   3004   unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
   3005 
   3006   SmallVector<EVT, 4> AggValueVTs;
   3007   ComputeValueVTs(TLI, AggTy, AggValueVTs);
   3008   SmallVector<EVT, 4> ValValueVTs;
   3009   ComputeValueVTs(TLI, ValTy, ValValueVTs);
   3010 
   3011   unsigned NumAggValues = AggValueVTs.size();
   3012   unsigned NumValValues = ValValueVTs.size();
   3013   SmallVector<SDValue, 4> Values(NumAggValues);
   3014 
   3015   SDValue Agg = getValue(Op0);
   3016   unsigned i = 0;
   3017   // Copy the beginning value(s) from the original aggregate.
   3018   for (; i != LinearIndex; ++i)
   3019     Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
   3020                 SDValue(Agg.getNode(), Agg.getResNo() + i);
   3021   // Copy values from the inserted value(s).
   3022   if (NumValValues) {
   3023     SDValue Val = getValue(Op1);
   3024     for (; i != LinearIndex + NumValValues; ++i)
   3025       Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
   3026                   SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
   3027   }
   3028   // Copy remaining value(s) from the original aggregate.
   3029   for (; i != NumAggValues; ++i)
   3030     Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
   3031                 SDValue(Agg.getNode(), Agg.getResNo() + i);
   3032 
   3033   setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
   3034                            DAG.getVTList(&AggValueVTs[0], NumAggValues),
   3035                            &Values[0], NumAggValues));
   3036 }
   3037 
   3038 void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
   3039   const Value *Op0 = I.getOperand(0);
   3040   Type *AggTy = Op0->getType();
   3041   Type *ValTy = I.getType();
   3042   bool OutOfUndef = isa<UndefValue>(Op0);
   3043 
   3044   unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
   3045 
   3046   SmallVector<EVT, 4> ValValueVTs;
   3047   ComputeValueVTs(TLI, ValTy, ValValueVTs);
   3048 
   3049   unsigned NumValValues = ValValueVTs.size();
   3050 
   3051   // Ignore a extractvalue that produces an empty object
   3052   if (!NumValValues) {
   3053     setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
   3054     return;
   3055   }
   3056 
   3057   SmallVector<SDValue, 4> Values(NumValValues);
   3058 
   3059   SDValue Agg = getValue(Op0);
   3060   // Copy out the selected value(s).
   3061   for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
   3062     Values[i - LinearIndex] =
   3063       OutOfUndef ?
   3064         DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
   3065         SDValue(Agg.getNode(), Agg.getResNo() + i);
   3066 
   3067   setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
   3068                            DAG.getVTList(&ValValueVTs[0], NumValValues),
   3069                            &Values[0], NumValValues));
   3070 }
   3071 
   3072 void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
   3073   SDValue N = getValue(I.getOperand(0));
   3074   // Note that the pointer operand may be a vector of pointers. Take the scalar
   3075   // element which holds a pointer.
   3076   Type *Ty = I.getOperand(0)->getType()->getScalarType();
   3077 
   3078   for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
   3079        OI != E; ++OI) {
   3080     const Value *Idx = *OI;
   3081     if (StructType *StTy = dyn_cast<StructType>(Ty)) {
   3082       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
   3083       if (Field) {
   3084         // N = N + Offset
   3085         uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
   3086         N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
   3087                         DAG.getIntPtrConstant(Offset));
   3088       }
   3089 
   3090       Ty = StTy->getElementType(Field);
   3091     } else {
   3092       Ty = cast<SequentialType>(Ty)->getElementType();
   3093 
   3094       // If this is a constant subscript, handle it quickly.
   3095       if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
   3096         if (CI->isZero()) continue;
   3097         uint64_t Offs =
   3098             TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
   3099         SDValue OffsVal;
   3100         EVT PTy = TLI.getPointerTy();
   3101         unsigned PtrBits = PTy.getSizeInBits();
   3102         if (PtrBits < 64)
   3103           OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
   3104                                 TLI.getPointerTy(),
   3105                                 DAG.getConstant(Offs, MVT::i64));
   3106         else
   3107           OffsVal = DAG.getIntPtrConstant(Offs);
   3108 
   3109         N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
   3110                         OffsVal);
   3111         continue;
   3112       }
   3113 
   3114       // N = N + Idx * ElementSize;
   3115       APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(),
   3116                                 TD->getTypeAllocSize(Ty));
   3117       SDValue IdxN = getValue(Idx);
   3118 
   3119       // If the index is smaller or larger than intptr_t, truncate or extend
   3120       // it.
   3121       IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType());
   3122 
   3123       // If this is a multiply by a power of two, turn it into a shl
   3124       // immediately.  This is a very common case.
   3125       if (ElementSize != 1) {
   3126         if (ElementSize.isPowerOf2()) {
   3127           unsigned Amt = ElementSize.logBase2();
   3128           IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
   3129                              N.getValueType(), IdxN,
   3130                              DAG.getConstant(Amt, IdxN.getValueType()));
   3131         } else {
   3132           SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy());
   3133           IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
   3134                              N.getValueType(), IdxN, Scale);
   3135         }
   3136       }
   3137 
   3138       N = DAG.getNode(ISD::ADD, getCurDebugLoc(),
   3139                       N.getValueType(), N, IdxN);
   3140     }
   3141   }
   3142 
   3143   setValue(&I, N);
   3144 }
   3145 
   3146 void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
   3147   // If this is a fixed sized alloca in the entry block of the function,
   3148   // allocate it statically on the stack.
   3149   if (FuncInfo.StaticAllocaMap.count(&I))
   3150     return;   // getValue will auto-populate this.
   3151 
   3152   Type *Ty = I.getAllocatedType();
   3153   uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
   3154   unsigned Align =
   3155     std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
   3156              I.getAlignment());
   3157 
   3158   SDValue AllocSize = getValue(I.getArraySize());
   3159 
   3160   EVT IntPtr = TLI.getPointerTy();
   3161   if (AllocSize.getValueType() != IntPtr)
   3162     AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr);
   3163 
   3164   AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), IntPtr,
   3165                           AllocSize,
   3166                           DAG.getConstant(TySize, IntPtr));
   3167 
   3168   // Handle alignment.  If the requested alignment is less than or equal to
   3169   // the stack alignment, ignore it.  If the size is greater than or equal to
   3170   // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
   3171   unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
   3172   if (Align <= StackAlign)
   3173     Align = 0;
   3174 
   3175   // Round the size of the allocation up to the stack alignment size
   3176   // by add SA-1 to the size.
   3177   AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(),
   3178                           AllocSize.getValueType(), AllocSize,
   3179                           DAG.getIntPtrConstant(StackAlign-1));
   3180 
   3181   // Mask out the low bits for alignment purposes.
   3182   AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(),
   3183                           AllocSize.getValueType(), AllocSize,
   3184                           DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1)));
   3185 
   3186   SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) };
   3187   SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
   3188   SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(),
   3189                             VTs, Ops, 3);
   3190   setValue(&I, DSA);
   3191   DAG.setRoot(DSA.getValue(1));
   3192 
   3193   // Inform the Frame Information that we have just allocated a variable-sized
   3194   // object.
   3195   FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1);
   3196 }
   3197 
   3198 void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
   3199   if (I.isAtomic())
   3200     return visitAtomicLoad(I);
   3201 
   3202   const Value *SV = I.getOperand(0);
   3203   SDValue Ptr = getValue(SV);
   3204 
   3205   Type *Ty = I.getType();
   3206 
   3207   bool isVolatile = I.isVolatile();
   3208   bool isNonTemporal = I.getMetadata("nontemporal") != 0;
   3209   bool isInvariant = I.getMetadata("invariant.load") != 0;
   3210   unsigned Alignment = I.getAlignment();
   3211   const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
   3212   const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
   3213 
   3214   SmallVector<EVT, 4> ValueVTs;
   3215   SmallVector<uint64_t, 4> Offsets;
   3216   ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets);
   3217   unsigned NumValues = ValueVTs.size();
   3218   if (NumValues == 0)
   3219     return;
   3220 
   3221   SDValue Root;
   3222   bool ConstantMemory = false;
   3223   if (I.isVolatile() || NumValues > MaxParallelChains)
   3224     // Serialize volatile loads with other side effects.
   3225     Root = getRoot();
   3226   else if (AA->pointsToConstantMemory(
   3227              AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), TBAAInfo))) {
   3228     // Do not serialize (non-volatile) loads of constant memory with anything.
   3229     Root = DAG.getEntryNode();
   3230     ConstantMemory = true;
   3231   } else {
   3232     // Do not serialize non-volatile loads against each other.
   3233     Root = DAG.getRoot();
   3234   }
   3235 
   3236   SmallVector<SDValue, 4> Values(NumValues);
   3237   SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
   3238                                           NumValues));
   3239   EVT PtrVT = Ptr.getValueType();
   3240   unsigned ChainI = 0;
   3241   for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
   3242     // Serializing loads here may result in excessive register pressure, and
   3243     // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
   3244     // could recover a bit by hoisting nodes upward in the chain by recognizing
   3245     // they are side-effect free or do not alias. The optimizer should really
   3246     // avoid this case by converting large object/array copies to llvm.memcpy
   3247     // (MaxParallelChains should always remain as failsafe).
   3248     if (ChainI == MaxParallelChains) {
   3249       assert(PendingLoads.empty() && "PendingLoads must be serialized first");
   3250       SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
   3251                                   MVT::Other, &Chains[0], ChainI);
   3252       Root = Chain;
   3253       ChainI = 0;
   3254     }
   3255     SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(),
   3256                             PtrVT, Ptr,
   3257                             DAG.getConstant(Offsets[i], PtrVT));
   3258     SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
   3259                             A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
   3260                             isNonTemporal, isInvariant, Alignment, TBAAInfo,
   3261                             Ranges);
   3262 
   3263     Values[i] = L;
   3264     Chains[ChainI] = L.getValue(1);
   3265   }
   3266 
   3267   if (!ConstantMemory) {
   3268     SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
   3269                                 MVT::Other, &Chains[0], ChainI);
   3270     if (isVolatile)
   3271       DAG.setRoot(Chain);
   3272     else
   3273       PendingLoads.push_back(Chain);
   3274   }
   3275 
   3276   setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
   3277                            DAG.getVTList(&ValueVTs[0], NumValues),
   3278                            &Values[0], NumValues));
   3279 }
   3280 
   3281 void SelectionDAGBuilder::visitStore(const StoreInst &I) {
   3282   if (I.isAtomic())
   3283     return visitAtomicStore(I);
   3284 
   3285   const Value *SrcV = I.getOperand(0);
   3286   const Value *PtrV = I.getOperand(1);
   3287 
   3288   SmallVector<EVT, 4> ValueVTs;
   3289   SmallVector<uint64_t, 4> Offsets;
   3290   ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets);
   3291   unsigned NumValues = ValueVTs.size();
   3292   if (NumValues == 0)
   3293     return;
   3294 
   3295   // Get the lowered operands. Note that we do this after
   3296   // checking if NumResults is zero, because with zero results
   3297   // the operands won't have values in the map.
   3298   SDValue Src = getValue(SrcV);
   3299   SDValue Ptr = getValue(PtrV);
   3300 
   3301   SDValue Root = getRoot();
   3302   SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
   3303                                           NumValues));
   3304   EVT PtrVT = Ptr.getValueType();
   3305   bool isVolatile = I.isVolatile();
   3306   bool isNonTemporal = I.getMetadata("nontemporal") != 0;
   3307   unsigned Alignment = I.getAlignment();
   3308   const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
   3309 
   3310   unsigned ChainI = 0;
   3311   for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
   3312     // See visitLoad comments.
   3313     if (ChainI == MaxParallelChains) {
   3314       SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
   3315                                   MVT::Other, &Chains[0], ChainI);
   3316       Root = Chain;
   3317       ChainI = 0;
   3318     }
   3319     SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr,
   3320                               DAG.getConstant(Offsets[i], PtrVT));
   3321     SDValue St = DAG.getStore(Root, getCurDebugLoc(),
   3322                               SDValue(Src.getNode(), Src.getResNo() + i),
   3323                               Add, MachinePointerInfo(PtrV, Offsets[i]),
   3324                               isVolatile, isNonTemporal, Alignment, TBAAInfo);
   3325     Chains[ChainI] = St;
   3326   }
   3327 
   3328   SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
   3329                                   MVT::Other, &Chains[0], ChainI);
   3330   ++SDNodeOrder;
   3331   AssignOrderingToNode(StoreNode.getNode());
   3332   DAG.setRoot(StoreNode);
   3333 }
   3334 
   3335 static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order,
   3336                                     SynchronizationScope Scope,
   3337                                     bool Before, DebugLoc dl,
   3338                                     SelectionDAG &DAG,
   3339                                     const TargetLowering &TLI) {
   3340   // Fence, if necessary
   3341   if (Before) {
   3342     if (Order == AcquireRelease || Order == SequentiallyConsistent)
   3343       Order = Release;
   3344     else if (Order == Acquire || Order == Monotonic)
   3345       return Chain;
   3346   } else {
   3347     if (Order == AcquireRelease)
   3348       Order = Acquire;
   3349     else if (Order == Release || Order == Monotonic)
   3350       return Chain;
   3351   }
   3352   SDValue Ops[3];
   3353   Ops[0] = Chain;
   3354   Ops[1] = DAG.getConstant(Order, TLI.getPointerTy());
   3355   Ops[2] = DAG.getConstant(Scope, TLI.getPointerTy());
   3356   return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3);
   3357 }
   3358 
   3359 void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
   3360   DebugLoc dl = getCurDebugLoc();
   3361   AtomicOrdering Order = I.getOrdering();
   3362   SynchronizationScope Scope = I.getSynchScope();
   3363 
   3364   SDValue InChain = getRoot();
   3365 
   3366   if (TLI.getInsertFencesForAtomic())
   3367     InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
   3368                                    DAG, TLI);
   3369 
   3370   SDValue L =
   3371     DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
   3372                   getValue(I.getCompareOperand()).getValueType().getSimpleVT(),
   3373                   InChain,
   3374                   getValue(I.getPointerOperand()),
   3375                   getValue(I.getCompareOperand()),
   3376                   getValue(I.getNewValOperand()),
   3377                   MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */,
   3378                   TLI.getInsertFencesForAtomic() ? Monotonic : Order,
   3379                   Scope);
   3380 
   3381   SDValue OutChain = L.getValue(1);
   3382 
   3383   if (TLI.getInsertFencesForAtomic())
   3384     OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
   3385                                     DAG, TLI);
   3386 
   3387   setValue(&I, L);
   3388   DAG.setRoot(OutChain);
   3389 }
   3390 
   3391 void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
   3392   DebugLoc dl = getCurDebugLoc();
   3393   ISD::NodeType NT;
   3394   switch (I.getOperation()) {
   3395   default: llvm_unreachable("Unknown atomicrmw operation");
   3396   case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
   3397   case AtomicRMWInst::Add:  NT = ISD::ATOMIC_LOAD_ADD; break;
   3398   case AtomicRMWInst::Sub:  NT = ISD::ATOMIC_LOAD_SUB; break;
   3399   case AtomicRMWInst::And:  NT = ISD::ATOMIC_LOAD_AND; break;
   3400   case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
   3401   case AtomicRMWInst::Or:   NT = ISD::ATOMIC_LOAD_OR; break;
   3402   case AtomicRMWInst::Xor:  NT = ISD::ATOMIC_LOAD_XOR; break;
   3403   case AtomicRMWInst::Max:  NT = ISD::ATOMIC_LOAD_MAX; break;
   3404   case AtomicRMWInst::Min:  NT = ISD::ATOMIC_LOAD_MIN; break;
   3405   case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
   3406   case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
   3407   }
   3408   AtomicOrdering Order = I.getOrdering();
   3409   SynchronizationScope Scope = I.getSynchScope();
   3410 
   3411   SDValue InChain = getRoot();
   3412 
   3413   if (TLI.getInsertFencesForAtomic())
   3414     InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
   3415                                    DAG, TLI);
   3416 
   3417   SDValue L =
   3418     DAG.getAtomic(NT, dl,
   3419                   getValue(I.getValOperand()).getValueType().getSimpleVT(),
   3420                   InChain,
   3421                   getValue(I.getPointerOperand()),
   3422                   getValue(I.getValOperand()),
   3423                   I.getPointerOperand(), 0 /* Alignment */,
   3424                   TLI.getInsertFencesForAtomic() ? Monotonic : Order,
   3425                   Scope);
   3426 
   3427   SDValue OutChain = L.getValue(1);
   3428 
   3429   if (TLI.getInsertFencesForAtomic())
   3430     OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
   3431                                     DAG, TLI);
   3432 
   3433   setValue(&I, L);
   3434   DAG.setRoot(OutChain);
   3435 }
   3436 
   3437 void SelectionDAGBuilder::visitFence(const FenceInst &I) {
   3438   DebugLoc dl = getCurDebugLoc();
   3439   SDValue Ops[3];
   3440   Ops[0] = getRoot();
   3441   Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy());
   3442   Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy());
   3443   DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3));
   3444 }
   3445 
   3446 void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
   3447   DebugLoc dl = getCurDebugLoc();
   3448   AtomicOrdering Order = I.getOrdering();
   3449   SynchronizationScope Scope = I.getSynchScope();
   3450 
   3451   SDValue InChain = getRoot();
   3452 
   3453   EVT VT = EVT::getEVT(I.getType());
   3454 
   3455   if (I.getAlignment() * 8 < VT.getSizeInBits())
   3456     report_fatal_error("Cannot generate unaligned atomic load");
   3457 
   3458   SDValue L =
   3459     DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
   3460                   getValue(I.getPointerOperand()),
   3461                   I.getPointerOperand(), I.getAlignment(),
   3462                   TLI.getInsertFencesForAtomic() ? Monotonic : Order,
   3463                   Scope);
   3464 
   3465   SDValue OutChain = L.getValue(1);
   3466 
   3467   if (TLI.getInsertFencesForAtomic())
   3468     OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
   3469                                     DAG, TLI);
   3470 
   3471   setValue(&I, L);
   3472   DAG.setRoot(OutChain);
   3473 }
   3474 
   3475 void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
   3476   DebugLoc dl = getCurDebugLoc();
   3477 
   3478   AtomicOrdering Order = I.getOrdering();
   3479   SynchronizationScope Scope = I.getSynchScope();
   3480 
   3481   SDValue InChain = getRoot();
   3482 
   3483   EVT VT = EVT::getEVT(I.getValueOperand()->getType());
   3484 
   3485   if (I.getAlignment() * 8 < VT.getSizeInBits())
   3486     report_fatal_error("Cannot generate unaligned atomic store");
   3487 
   3488   if (TLI.getInsertFencesForAtomic())
   3489     InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
   3490                                    DAG, TLI);
   3491 
   3492   SDValue OutChain =
   3493     DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT,
   3494                   InChain,
   3495                   getValue(I.getPointerOperand()),
   3496                   getValue(I.getValueOperand()),
   3497                   I.getPointerOperand(), I.getAlignment(),
   3498                   TLI.getInsertFencesForAtomic() ? Monotonic : Order,
   3499                   Scope);
   3500 
   3501   if (TLI.getInsertFencesForAtomic())
   3502     OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
   3503                                     DAG, TLI);
   3504 
   3505   DAG.setRoot(OutChain);
   3506 }
   3507 
   3508 /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
   3509 /// node.
   3510 void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
   3511                                                unsigned Intrinsic) {
   3512   bool HasChain = !I.doesNotAccessMemory();
   3513   bool OnlyLoad = HasChain && I.onlyReadsMemory();
   3514 
   3515   // Build the operand list.
   3516   SmallVector<SDValue, 8> Ops;
   3517   if (HasChain) {  // If this intrinsic has side-effects, chainify it.
   3518     if (OnlyLoad) {
   3519       // We don't need to serialize loads against other loads.
   3520       Ops.push_back(DAG.getRoot());
   3521     } else {
   3522       Ops.push_back(getRoot());
   3523     }
   3524   }
   3525 
   3526   // Info is set by getTgtMemInstrinsic
   3527   TargetLowering::IntrinsicInfo Info;
   3528   bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
   3529 
   3530   // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
   3531   if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
   3532       Info.opc == ISD::INTRINSIC_W_CHAIN)
   3533     Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy()));
   3534 
   3535   // Add all operands of the call to the operand list.
   3536   for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
   3537     SDValue Op = getValue(I.getArgOperand(i));
   3538     Ops.push_back(Op);
   3539   }
   3540 
   3541   SmallVector<EVT, 4> ValueVTs;
   3542   ComputeValueVTs(TLI, I.getType(), ValueVTs);
   3543 
   3544   if (HasChain)
   3545     ValueVTs.push_back(MVT::Other);
   3546 
   3547   SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size());
   3548 
   3549   // Create the node.
   3550   SDValue Result;
   3551   if (IsTgtIntrinsic) {
   3552     // This is target intrinsic that touches memory
   3553     Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),
   3554                                      VTs, &Ops[0], Ops.size(),
   3555                                      Info.memVT,
   3556                                    MachinePointerInfo(Info.ptrVal, Info.offset),
   3557                                      Info.align, Info.vol,
   3558                                      Info.readMem, Info.writeMem);
   3559   } else if (!HasChain) {
   3560     Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(),
   3561                          VTs, &Ops[0], Ops.size());
   3562   } else if (!I.getType()->isVoidTy()) {
   3563     Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(),
   3564                          VTs, &Ops[0], Ops.size());
   3565   } else {
   3566     Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(),
   3567                          VTs, &Ops[0], Ops.size());
   3568   }
   3569 
   3570   if (HasChain) {
   3571     SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
   3572     if (OnlyLoad)
   3573       PendingLoads.push_back(Chain);
   3574     else
   3575       DAG.setRoot(Chain);
   3576   }
   3577 
   3578   if (!I.getType()->isVoidTy()) {
   3579     if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
   3580       EVT VT = TLI.getValueType(PTy);
   3581       Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result);
   3582     }
   3583 
   3584     setValue(&I, Result);
   3585   } else {
   3586     // Assign order to result here. If the intrinsic does not produce a result,
   3587     // it won't be mapped to a SDNode and visit() will not assign it an order
   3588     // number.
   3589     ++SDNodeOrder;
   3590     AssignOrderingToNode(Result.getNode());
   3591   }
   3592 }
   3593 
   3594 /// GetSignificand - Get the significand and build it into a floating-point
   3595 /// number with exponent of 1:
   3596 ///
   3597 ///   Op = (Op & 0x007fffff) | 0x3f800000;
   3598 ///
   3599 /// where Op is the hexidecimal representation of floating point value.
   3600 static SDValue
   3601 GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
   3602   SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
   3603                            DAG.getConstant(0x007fffff, MVT::i32));
   3604   SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
   3605                            DAG.getConstant(0x3f800000, MVT::i32));
   3606   return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
   3607 }
   3608 
   3609 /// GetExponent - Get the exponent:
   3610 ///
   3611 ///   (float)(int)(((Op & 0x7f800000) >> 23) - 127);
   3612 ///
   3613 /// where Op is the hexidecimal representation of floating point value.
   3614 static SDValue
   3615 GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
   3616             DebugLoc dl) {
   3617   SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
   3618                            DAG.getConstant(0x7f800000, MVT::i32));
   3619   SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
   3620                            DAG.getConstant(23, TLI.getPointerTy()));
   3621   SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
   3622                            DAG.getConstant(127, MVT::i32));
   3623   return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
   3624 }
   3625 
   3626 /// getF32Constant - Get 32-bit floating point constant.
   3627 static SDValue
   3628 getF32Constant(SelectionDAG &DAG, unsigned Flt) {
   3629   return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
   3630 }
   3631 
   3632 /// visitExp - Lower an exp intrinsic. Handles the special sequences for
   3633 /// limited-precision mode.
   3634 void
   3635 SelectionDAGBuilder::visitExp(const CallInst &I) {
   3636   SDValue result;
   3637   DebugLoc dl = getCurDebugLoc();
   3638 
   3639   if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
   3640       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
   3641     SDValue Op = getValue(I.getArgOperand(0));
   3642 
   3643     // Put the exponent in the right bit position for later addition to the
   3644     // final result:
   3645     //
   3646     //   #define LOG2OFe 1.4426950f
   3647     //   IntegerPartOfX = ((int32_t)(X * LOG2OFe));
   3648     SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
   3649                              getF32Constant(DAG, 0x3fb8aa3b));
   3650     SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
   3651 
   3652     //   FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX;
   3653     SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
   3654     SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
   3655 
   3656     //   IntegerPartOfX <<= 23;
   3657     IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
   3658                                  DAG.getConstant(23, TLI.getPointerTy()));
   3659 
   3660     if (LimitFloatPrecision <= 6) {
   3661       // For floating-point precision of 6:
   3662       //
   3663       //   TwoToFractionalPartOfX =
   3664       //     0.997535578f +
   3665       //       (0.735607626f + 0.252464424f * x) * x;
   3666       //
   3667       // error 0.0144103317, which is 6 bits
   3668       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
   3669                                getF32Constant(DAG, 0x3e814304));
   3670       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
   3671                                getF32Constant(DAG, 0x3f3c50c8));
   3672       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
   3673       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
   3674                                getF32Constant(DAG, 0x3f7f5e7e));
   3675       SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t5);
   3676 
   3677       // Add the exponent into the result in integer domain.
   3678       SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,
   3679                                TwoToFracPartOfX, IntegerPartOfX);
   3680 
   3681       result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t6);
   3682     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
   3683       // For floating-point precision of 12:
   3684       //
   3685       //   TwoToFractionalPartOfX =
   3686       //     0.999892986f +
   3687       //       (0.696457318f +
   3688       //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
   3689       //
   3690       // 0.000107046256 error, which is 13 to 14 bits
   3691       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
   3692                                getF32Constant(DAG, 0x3da235e3));
   3693       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
   3694                                getF32Constant(DAG, 0x3e65b8f3));
   3695       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
   3696       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
   3697                                getF32Constant(DAG, 0x3f324b07));
   3698       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
   3699       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
   3700                                getF32Constant(DAG, 0x3f7ff8fd));
   3701       SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t7);
   3702 
   3703       // Add the exponent into the result in integer domain.
   3704       SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,
   3705                                TwoToFracPartOfX, IntegerPartOfX);
   3706 
   3707       result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t8);
   3708     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
   3709       // For floating-point precision of 18:
   3710       //
   3711       //   TwoToFractionalPartOfX =
   3712       //     0.999999982f +
   3713       //       (0.693148872f +
   3714       //         (0.240227044f +
   3715       //           (0.554906021e-1f +
   3716       //             (0.961591928e-2f +
   3717       //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
   3718       //
   3719       // error 2.47208000*10^(-7), which is better than 18 bits
   3720       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
   3721                                getF32Constant(DAG, 0x3924b03e));
   3722       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
   3723                                getF32Constant(DAG, 0x3ab24b87));
   3724       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
   3725       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
   3726                                getF32Constant(DAG, 0x3c1d8c17));
   3727       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
   3728       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
   3729                                getF32Constant(DAG, 0x3d634a1d));
   3730       SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
   3731       SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
   3732                                getF32Constant(DAG, 0x3e75fe14));
   3733       SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
   3734       SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
   3735                                 getF32Constant(DAG, 0x3f317234));
   3736       SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
   3737       SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
   3738                                 getF32Constant(DAG, 0x3f800000));
   3739       SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,
   3740                                              MVT::i32, t13);
   3741 
   3742       // Add the exponent into the result in integer domain.
   3743       SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,
   3744                                 TwoToFracPartOfX, IntegerPartOfX);
   3745 
   3746       result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t14);
   3747     }
   3748   } else {
   3749     // No special expansion.
   3750     result = DAG.getNode(ISD::FEXP, dl,
   3751                          getValue(I.getArgOperand(0)).getValueType(),
   3752                          getValue(I.getArgOperand(0)));
   3753   }
   3754 
   3755   setValue(&I, result);
   3756 }
   3757 
   3758 /// visitLog - Lower a log intrinsic. Handles the special sequences for
   3759 /// limited-precision mode.
   3760 void
   3761 SelectionDAGBuilder::visitLog(const CallInst &I) {
   3762   SDValue result;
   3763   DebugLoc dl = getCurDebugLoc();
   3764 
   3765   if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
   3766       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
   3767     SDValue Op = getValue(I.getArgOperand(0));
   3768     SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
   3769 
   3770     // Scale the exponent by log(2) [0.69314718f].
   3771     SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
   3772     SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
   3773                                         getF32Constant(DAG, 0x3f317218));
   3774 
   3775     // Get the significand and build it into a floating-point number with
   3776     // exponent of 1.
   3777     SDValue X = GetSignificand(DAG, Op1, dl);
   3778 
   3779     if (LimitFloatPrecision <= 6) {
   3780       // For floating-point precision of 6:
   3781       //
   3782       //   LogofMantissa =
   3783       //     -1.1609546f +
   3784       //       (1.4034025f - 0.23903021f * x) * x;
   3785       //
   3786       // error 0.0034276066, which is better than 8 bits
   3787       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
   3788                                getF32Constant(DAG, 0xbe74c456));
   3789       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
   3790                                getF32Constant(DAG, 0x3fb3a2b1));
   3791       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
   3792       SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
   3793                                           getF32Constant(DAG, 0x3f949a29));
   3794 
   3795       result = DAG.getNode(ISD::FADD, dl,
   3796                            MVT::f32, LogOfExponent, LogOfMantissa);
   3797     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
   3798       // For floating-point precision of 12:
   3799       //
   3800       //   LogOfMantissa =
   3801       //     -1.7417939f +
   3802       //       (2.8212026f +
   3803       //         (-1.4699568f +
   3804       //           (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
   3805       //
   3806       // error 0.000061011436, which is 14 bits
   3807       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
   3808                                getF32Constant(DAG, 0xbd67b6d6));
   3809       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
   3810                                getF32Constant(DAG, 0x3ee4f4b8));
   3811       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
   3812       SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
   3813                                getF32Constant(DAG, 0x3fbc278b));
   3814       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
   3815       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
   3816                                getF32Constant(DAG, 0x40348e95));
   3817       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
   3818       SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
   3819                                           getF32Constant(DAG, 0x3fdef31a));
   3820 
   3821       result = DAG.getNode(ISD::FADD, dl,
   3822                            MVT::f32, LogOfExponent, LogOfMantissa);
   3823     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
   3824       // For floating-point precision of 18:
   3825       //
   3826       //   LogOfMantissa =
   3827       //     -2.1072184f +
   3828       //       (4.2372794f +
   3829       //         (-3.7029485f +
   3830       //           (2.2781945f +
   3831       //             (-0.87823314f +
   3832       //               (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
   3833       //
   3834       // error 0.0000023660568, which is better than 18 bits
   3835       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
   3836                                getF32Constant(DAG, 0xbc91e5ac));
   3837       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
   3838                                getF32Constant(DAG, 0x3e4350aa));
   3839       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
   3840       SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
   3841                                getF32Constant(DAG, 0x3f60d3e3));
   3842       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
   3843       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
   3844                                getF32Constant(DAG, 0x4011cdf0));
   3845       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
   3846       SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
   3847                                getF32Constant(DAG, 0x406cfd1c));
   3848       SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
   3849       SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
   3850                                getF32Constant(DAG, 0x408797cb));
   3851       SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
   3852       SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
   3853                                           getF32Constant(DAG, 0x4006dcab));
   3854 
   3855       result = DAG.getNode(ISD::FADD, dl,
   3856                            MVT::f32, LogOfExponent, LogOfMantissa);
   3857     }
   3858   } else {
   3859     // No special expansion.
   3860     result = DAG.getNode(ISD::FLOG, dl,
   3861                          getValue(I.getArgOperand(0)).getValueType(),
   3862                          getValue(I.getArgOperand(0)));
   3863   }
   3864 
   3865   setValue(&I, result);
   3866 }
   3867 
   3868 /// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for
   3869 /// limited-precision mode.
   3870 void
   3871 SelectionDAGBuilder::visitLog2(const CallInst &I) {
   3872   SDValue result;
   3873   DebugLoc dl = getCurDebugLoc();
   3874 
   3875   if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
   3876       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
   3877     SDValue Op = getValue(I.getArgOperand(0));
   3878     SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
   3879 
   3880     // Get the exponent.
   3881     SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
   3882 
   3883     // Get the significand and build it into a floating-point number with
   3884     // exponent of 1.
   3885     SDValue X = GetSignificand(DAG, Op1, dl);
   3886 
   3887     // Different possible minimax approximations of significand in
   3888     // floating-point for various degrees of accuracy over [1,2].
   3889     if (LimitFloatPrecision <= 6) {
   3890       // For floating-point precision of 6:
   3891       //
   3892       //   Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
   3893       //
   3894       // error 0.0049451742, which is more than 7 bits
   3895       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
   3896                                getF32Constant(DAG, 0xbeb08fe0));
   3897       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
   3898                                getF32Constant(DAG, 0x40019463));
   3899       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
   3900       SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
   3901                                            getF32Constant(DAG, 0x3fd6633d));
   3902 
   3903       result = DAG.getNode(ISD::FADD, dl,
   3904                            MVT::f32, LogOfExponent, Log2ofMantissa);
   3905     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
   3906       // For floating-point precision of 12:
   3907       //
   3908       //   Log2ofMantissa =
   3909       //     -2.51285454f +
   3910       //       (4.07009056f +
   3911       //         (-2.12067489f +
   3912       //           (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
   3913       //
   3914       // error 0.0000876136000, which is better than 13 bits
   3915       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
   3916                                getF32Constant(DAG, 0xbda7262e));
   3917       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
   3918                                getF32Constant(DAG, 0x3f25280b));
   3919       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
   3920       SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
   3921                                getF32Constant(DAG, 0x4007b923));
   3922       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
   3923       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
   3924                                getF32Constant(DAG, 0x40823e2f));
   3925       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
   3926       SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
   3927                                            getF32Constant(DAG, 0x4020d29c));
   3928 
   3929       result = DAG.getNode(ISD::FADD, dl,
   3930                            MVT::f32, LogOfExponent, Log2ofMantissa);
   3931     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
   3932       // For floating-point precision of 18:
   3933       //
   3934       //   Log2ofMantissa =
   3935       //     -3.0400495f +
   3936       //       (6.1129976f +
   3937       //         (-5.3420409f +
   3938       //           (3.2865683f +
   3939       //             (-1.2669343f +
   3940       //               (0.27515199f -
   3941       //                 0.25691327e-1f * x) * x) * x) * x) * x) * x;
   3942       //
   3943       // error 0.0000018516, which is better than 18 bits
   3944       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
   3945                                getF32Constant(DAG, 0xbcd2769e));
   3946       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
   3947                                getF32Constant(DAG, 0x3e8ce0b9));
   3948       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
   3949       SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
   3950                                getF32Constant(DAG, 0x3fa22ae7));
   3951       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
   3952       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
   3953                                getF32Constant(DAG, 0x40525723));
   3954       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
   3955       SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
   3956                                getF32Constant(DAG, 0x40aaf200));
   3957       SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
   3958       SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
   3959                                getF32Constant(DAG, 0x40c39dad));
   3960       SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
   3961       SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
   3962                                            getF32Constant(DAG, 0x4042902c));
   3963 
   3964       result = DAG.getNode(ISD::FADD, dl,
   3965                            MVT::f32, LogOfExponent, Log2ofMantissa);
   3966     }
   3967   } else {
   3968     // No special expansion.
   3969     result = DAG.getNode(ISD::FLOG2, dl,
   3970                          getValue(I.getArgOperand(0)).getValueType(),
   3971                          getValue(I.getArgOperand(0)));
   3972   }
   3973 
   3974   setValue(&I, result);
   3975 }
   3976 
   3977 /// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for
   3978 /// limited-precision mode.
   3979 void
   3980 SelectionDAGBuilder::visitLog10(const CallInst &I) {
   3981   SDValue result;
   3982   DebugLoc dl = getCurDebugLoc();
   3983 
   3984   if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
   3985       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
   3986     SDValue Op = getValue(I.getArgOperand(0));
   3987     SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
   3988 
   3989     // Scale the exponent by log10(2) [0.30102999f].
   3990     SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
   3991     SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
   3992                                         getF32Constant(DAG, 0x3e9a209a));
   3993 
   3994     // Get the significand and build it into a floating-point number with
   3995     // exponent of 1.
   3996     SDValue X = GetSignificand(DAG, Op1, dl);
   3997 
   3998     if (LimitFloatPrecision <= 6) {
   3999       // For floating-point precision of 6:
   4000       //
   4001       //   Log10ofMantissa =
   4002       //     -0.50419619f +
   4003       //       (0.60948995f - 0.10380950f * x) * x;
   4004       //
   4005       // error 0.0014886165, which is 6 bits
   4006       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
   4007                                getF32Constant(DAG, 0xbdd49a13));
   4008       SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
   4009                                getF32Constant(DAG, 0x3f1c0789));
   4010       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
   4011       SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
   4012                                             getF32Constant(DAG, 0x3f011300));
   4013 
   4014       result = DAG.getNode(ISD::FADD, dl,
   4015                            MVT::f32, LogOfExponent, Log10ofMantissa);
   4016     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
   4017       // For floating-point precision of 12:
   4018       //
   4019       //   Log10ofMantissa =
   4020       //     -0.64831180f +
   4021       //       (0.91751397f +
   4022       //         (-0.31664806f + 0.47637168e-1f * x) * x) * x;
   4023       //
   4024       // error 0.00019228036, which is better than 12 bits
   4025       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
   4026                                getF32Constant(DAG, 0x3d431f31));
   4027       SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
   4028                                getF32Constant(DAG, 0x3ea21fb2));
   4029       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
   4030       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
   4031                                getF32Constant(DAG, 0x3f6ae232));
   4032       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
   4033       SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
   4034                                             getF32Constant(DAG, 0x3f25f7c3));
   4035 
   4036       result = DAG.getNode(ISD::FADD, dl,
   4037                            MVT::f32, LogOfExponent, Log10ofMantissa);
   4038     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
   4039       // For floating-point precision of 18:
   4040       //
   4041       //   Log10ofMantissa =
   4042       //     -0.84299375f +
   4043       //       (1.5327582f +
   4044       //         (-1.0688956f +
   4045       //           (0.49102474f +
   4046       //             (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
   4047       //
   4048       // error 0.0000037995730, which is better than 18 bits
   4049       SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
   4050                                getF32Constant(DAG, 0x3c5d51ce));
   4051       SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
   4052                                getF32Constant(DAG, 0x3e00685a));
   4053       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
   4054       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
   4055                                getF32Constant(DAG, 0x3efb6798));
   4056       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
   4057       SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
   4058                                getF32Constant(DAG, 0x3f88d192));
   4059       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
   4060       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
   4061                                getF32Constant(DAG, 0x3fc4316c));
   4062       SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
   4063       SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
   4064                                             getF32Constant(DAG, 0x3f57ce70));
   4065 
   4066       result = DAG.getNode(ISD::FADD, dl,
   4067                            MVT::f32, LogOfExponent, Log10ofMantissa);
   4068     }
   4069   } else {
   4070     // No special expansion.
   4071     result = DAG.getNode(ISD::FLOG10, dl,
   4072                          getValue(I.getArgOperand(0)).getValueType(),
   4073                          getValue(I.getArgOperand(0)));
   4074   }
   4075 
   4076   setValue(&I, result);
   4077 }
   4078 
   4079 /// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for
   4080 /// limited-precision mode.
   4081 void
   4082 SelectionDAGBuilder::visitExp2(const CallInst &I) {
   4083   SDValue result;
   4084   DebugLoc dl = getCurDebugLoc();
   4085 
   4086   if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
   4087       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
   4088     SDValue Op = getValue(I.getArgOperand(0));
   4089 
   4090     SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
   4091 
   4092     //   FractionalPartOfX = x - (float)IntegerPartOfX;
   4093     SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
   4094     SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1);
   4095 
   4096     //   IntegerPartOfX <<= 23;
   4097     IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
   4098                                  DAG.getConstant(23, TLI.getPointerTy()));
   4099 
   4100     if (LimitFloatPrecision <= 6) {
   4101       // For floating-point precision of 6:
   4102       //
   4103       //   TwoToFractionalPartOfX =
   4104       //     0.997535578f +
   4105       //       (0.735607626f + 0.252464424f * x) * x;
   4106       //
   4107       // error 0.0144103317, which is 6 bits
   4108       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
   4109                                getF32Constant(DAG, 0x3e814304));
   4110       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
   4111                                getF32Constant(DAG, 0x3f3c50c8));
   4112       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
   4113       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
   4114                                getF32Constant(DAG, 0x3f7f5e7e));
   4115       SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
   4116       SDValue TwoToFractionalPartOfX =
   4117         DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
   4118 
   4119       result = DAG.getNode(ISD::BITCAST, dl,
   4120                            MVT::f32, TwoToFractionalPartOfX);
   4121     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
   4122       // For floating-point precision of 12:
   4123       //
   4124       //   TwoToFractionalPartOfX =
   4125       //     0.999892986f +
   4126       //       (0.696457318f +
   4127       //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
   4128       //
   4129       // error 0.000107046256, which is 13 to 14 bits
   4130       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
   4131                                getF32Constant(DAG, 0x3da235e3));
   4132       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
   4133                                getF32Constant(DAG, 0x3e65b8f3));
   4134       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
   4135       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
   4136                                getF32Constant(DAG, 0x3f324b07));
   4137       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
   4138       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
   4139                                getF32Constant(DAG, 0x3f7ff8fd));
   4140       SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
   4141       SDValue TwoToFractionalPartOfX =
   4142         DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
   4143 
   4144       result = DAG.getNode(ISD::BITCAST, dl,
   4145                            MVT::f32, TwoToFractionalPartOfX);
   4146     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
   4147       // For floating-point precision of 18:
   4148       //
   4149       //   TwoToFractionalPartOfX =
   4150       //     0.999999982f +
   4151       //       (0.693148872f +
   4152       //         (0.240227044f +
   4153       //           (0.554906021e-1f +
   4154       //             (0.961591928e-2f +
   4155       //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
   4156       // error 2.47208000*10^(-7), which is better than 18 bits
   4157       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
   4158                                getF32Constant(DAG, 0x3924b03e));
   4159       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
   4160                                getF32Constant(DAG, 0x3ab24b87));
   4161       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
   4162       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
   4163                                getF32Constant(DAG, 0x3c1d8c17));
   4164       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
   4165       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
   4166                                getF32Constant(DAG, 0x3d634a1d));
   4167       SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
   4168       SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
   4169                                getF32Constant(DAG, 0x3e75fe14));
   4170       SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
   4171       SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
   4172                                 getF32Constant(DAG, 0x3f317234));
   4173       SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
   4174       SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
   4175                                 getF32Constant(DAG, 0x3f800000));
   4176       SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
   4177       SDValue TwoToFractionalPartOfX =
   4178         DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
   4179 
   4180       result = DAG.getNode(ISD::BITCAST, dl,
   4181                            MVT::f32, TwoToFractionalPartOfX);
   4182     }
   4183   } else {
   4184     // No special expansion.
   4185     result = DAG.getNode(ISD::FEXP2, dl,
   4186                          getValue(I.getArgOperand(0)).getValueType(),
   4187                          getValue(I.getArgOperand(0)));
   4188   }
   4189 
   4190   setValue(&I, result);
   4191 }
   4192 
   4193 /// visitPow - Lower a pow intrinsic. Handles the special sequences for
   4194 /// limited-precision mode with x == 10.0f.
   4195 void
   4196 SelectionDAGBuilder::visitPow(const CallInst &I) {
   4197   SDValue result;
   4198   const Value *Val = I.getArgOperand(0);
   4199   DebugLoc dl = getCurDebugLoc();
   4200   bool IsExp10 = false;
   4201 
   4202   if (getValue(Val).getValueType() == MVT::f32 &&
   4203       getValue(I.getArgOperand(1)).getValueType() == MVT::f32 &&
   4204       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
   4205     if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) {
   4206       if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
   4207         APFloat Ten(10.0f);
   4208         IsExp10 = CFP->getValueAPF().bitwiseIsEqual(Ten);
   4209       }
   4210     }
   4211   }
   4212 
   4213   if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
   4214     SDValue Op = getValue(I.getArgOperand(1));
   4215 
   4216     // Put the exponent in the right bit position for later addition to the
   4217     // final result:
   4218     //
   4219     //   #define LOG2OF10 3.3219281f
   4220     //   IntegerPartOfX = (int32_t)(x * LOG2OF10);
   4221     SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
   4222                              getF32Constant(DAG, 0x40549a78));
   4223     SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
   4224 
   4225     //   FractionalPartOfX = x - (float)IntegerPartOfX;
   4226     SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
   4227     SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
   4228 
   4229     //   IntegerPartOfX <<= 23;
   4230     IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
   4231                                  DAG.getConstant(23, TLI.getPointerTy()));
   4232 
   4233     if (LimitFloatPrecision <= 6) {
   4234       // For floating-point precision of 6:
   4235       //
   4236       //   twoToFractionalPartOfX =
   4237       //     0.997535578f +
   4238       //       (0.735607626f + 0.252464424f * x) * x;
   4239       //
   4240       // error 0.0144103317, which is 6 bits
   4241       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
   4242                                getF32Constant(DAG, 0x3e814304));
   4243       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
   4244                                getF32Constant(DAG, 0x3f3c50c8));
   4245       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
   4246       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
   4247                                getF32Constant(DAG, 0x3f7f5e7e));
   4248       SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
   4249       SDValue TwoToFractionalPartOfX =
   4250         DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
   4251 
   4252       result = DAG.getNode(ISD::BITCAST, dl,
   4253                            MVT::f32, TwoToFractionalPartOfX);
   4254     } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
   4255       // For floating-point precision of 12:
   4256       //
   4257       //   TwoToFractionalPartOfX =
   4258       //     0.999892986f +
   4259       //       (0.696457318f +
   4260       //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
   4261       //
   4262       // error 0.000107046256, which is 13 to 14 bits
   4263       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
   4264                                getF32Constant(DAG, 0x3da235e3));
   4265       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
   4266                                getF32Constant(DAG, 0x3e65b8f3));
   4267       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
   4268       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
   4269                                getF32Constant(DAG, 0x3f324b07));
   4270       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
   4271       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
   4272                                getF32Constant(DAG, 0x3f7ff8fd));
   4273       SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
   4274       SDValue TwoToFractionalPartOfX =
   4275         DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
   4276 
   4277       result = DAG.getNode(ISD::BITCAST, dl,
   4278                            MVT::f32, TwoToFractionalPartOfX);
   4279     } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
   4280       // For floating-point precision of 18:
   4281       //
   4282       //   TwoToFractionalPartOfX =
   4283       //     0.999999982f +
   4284       //       (0.693148872f +
   4285       //         (0.240227044f +
   4286       //           (0.554906021e-1f +
   4287       //             (0.961591928e-2f +
   4288       //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
   4289       // error 2.47208000*10^(-7), which is better than 18 bits
   4290       SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
   4291                                getF32Constant(DAG, 0x3924b03e));
   4292       SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
   4293                                getF32Constant(DAG, 0x3ab24b87));
   4294       SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
   4295       SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
   4296                                getF32Constant(DAG, 0x3c1d8c17));
   4297       SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
   4298       SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
   4299                                getF32Constant(DAG, 0x3d634a1d));
   4300       SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
   4301       SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
   4302                                getF32Constant(DAG, 0x3e75fe14));
   4303       SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
   4304       SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
   4305                                 getF32Constant(DAG, 0x3f317234));
   4306       SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
   4307       SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
   4308                                 getF32Constant(DAG, 0x3f800000));
   4309       SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
   4310       SDValue TwoToFractionalPartOfX =
   4311         DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
   4312 
   4313       result = DAG.getNode(ISD::BITCAST, dl,
   4314                            MVT::f32, TwoToFractionalPartOfX);
   4315     }
   4316   } else {
   4317     // No special expansion.
   4318     result = DAG.getNode(ISD::FPOW, dl,
   4319                          getValue(I.getArgOperand(0)).getValueType(),
   4320                          getValue(I.getArgOperand(0)),
   4321                          getValue(I.getArgOperand(1)));
   4322   }
   4323 
   4324   setValue(&I, result);
   4325 }
   4326 
   4327 
   4328 /// ExpandPowI - Expand a llvm.powi intrinsic.
   4329 static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
   4330                           SelectionDAG &DAG) {
   4331   // If RHS is a constant, we can expand this out to a multiplication tree,
   4332   // otherwise we end up lowering to a call to __powidf2 (for example).  When
   4333   // optimizing for size, we only want to do this if the expansion would produce
   4334   // a small number of multiplies, otherwise we do the full expansion.
   4335   if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
   4336     // Get the exponent as a positive value.
   4337     unsigned Val = RHSC->getSExtValue();
   4338     if ((int)Val < 0) Val = -Val;
   4339 
   4340     // powi(x, 0) -> 1.0
   4341     if (Val == 0)
   4342       return DAG.getConstantFP(1.0, LHS.getValueType());
   4343 
   4344     const Function *F = DAG.getMachineFunction().getFunction();
   4345     if (!F->hasFnAttr(Attribute::OptimizeForSize) ||
   4346         // If optimizing for size, don't insert too many multiplies.  This
   4347         // inserts up to 5 multiplies.
   4348         CountPopulation_32(Val)+Log2_32(Val) < 7) {
   4349       // We use the simple binary decomposition method to generate the multiply
   4350       // sequence.  There are more optimal ways to do this (for example,
   4351       // powi(x,15) generates one more multiply than it should), but this has
   4352       // the benefit of being both really simple and much better than a libcall.
   4353       SDValue Res;  // Logically starts equal to 1.0
   4354       SDValue CurSquare = LHS;
   4355       while (Val) {
   4356         if (Val & 1) {
   4357           if (Res.getNode())
   4358             Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
   4359           else
   4360             Res = CurSquare;  // 1.0*CurSquare.
   4361         }
   4362 
   4363         CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
   4364                                 CurSquare, CurSquare);
   4365         Val >>= 1;
   4366       }
   4367 
   4368       // If the original was negative, invert the result, producing 1/(x*x*x).
   4369       if (RHSC->getSExtValue() < 0)
   4370         Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
   4371                           DAG.getConstantFP(1.0, LHS.getValueType()), Res);
   4372       return Res;
   4373     }
   4374   }
   4375 
   4376   // Otherwise, expand to a libcall.
   4377   return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
   4378 }
   4379 
   4380 // getTruncatedArgReg - Find underlying register used for an truncated
   4381 // argument.
   4382 static unsigned getTruncatedArgReg(const SDValue &N) {
   4383   if (N.getOpcode() != ISD::TRUNCATE)
   4384     return 0;
   4385 
   4386   const SDValue &Ext = N.getOperand(0);
   4387   if (Ext.getOpcode() == ISD::AssertZext || Ext.getOpcode() == ISD::AssertSext){
   4388     const SDValue &CFR = Ext.getOperand(0);
   4389     if (CFR.getOpcode() == ISD::CopyFromReg)
   4390       return cast<RegisterSDNode>(CFR.getOperand(1))->getReg();
   4391     if (CFR.getOpcode() == ISD::TRUNCATE)
   4392       return getTruncatedArgReg(CFR);
   4393   }
   4394   return 0;
   4395 }
   4396 
   4397 /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
   4398 /// argument, create the corresponding DBG_VALUE machine instruction for it now.
   4399 /// At the end of instruction selection, they will be inserted to the entry BB.
   4400 bool
   4401 SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
   4402                                               int64_t Offset,
   4403                                               const SDValue &N) {
   4404   const Argument *Arg = dyn_cast<Argument>(V);
   4405   if (!Arg)
   4406     return false;
   4407 
   4408   MachineFunction &MF = DAG.getMachineFunction();
   4409   const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
   4410   const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
   4411 
   4412   // Ignore inlined function arguments here.
   4413   DIVariable DV(Variable);
   4414   if (DV.isInlinedFnArgument(MF.getFunction()))
   4415     return false;
   4416 
   4417   unsigned Reg = 0;
   4418   // Some arguments' frame index is recorded during argument lowering.
   4419   Offset = FuncInfo.getArgumentFrameIndex(Arg);
   4420   if (Offset)
   4421     Reg = TRI->getFrameRegister(MF);
   4422 
   4423   if (!Reg && N.getNode()) {
   4424     if (N.getOpcode() == ISD::CopyFromReg)
   4425       Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
   4426     else
   4427       Reg = getTruncatedArgReg(N);
   4428     if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
   4429       MachineRegisterInfo &RegInfo = MF.getRegInfo();
   4430       unsigned PR = RegInfo.getLiveInPhysReg(Reg);
   4431       if (PR)
   4432         Reg = PR;
   4433     }
   4434   }
   4435 
   4436   if (!Reg) {
   4437     // Check if ValueMap has reg number.
   4438     DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
   4439     if (VMI != FuncInfo.ValueMap.end())
   4440       Reg = VMI->second;
   4441   }
   4442 
   4443   if (!Reg && N.getNode()) {
   4444     // Check if frame index is available.
   4445     if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
   4446       if (FrameIndexSDNode *FINode =
   4447           dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) {
   4448         Reg = TRI->getFrameRegister(MF);
   4449         Offset = FINode->getIndex();
   4450       }
   4451   }
   4452 
   4453   if (!Reg)
   4454     return false;
   4455 
   4456   MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(),
   4457                                     TII->get(TargetOpcode::DBG_VALUE))
   4458     .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable);
   4459   FuncInfo.ArgDbgValues.push_back(&*MIB);
   4460   return true;
   4461 }
   4462 
   4463 // VisualStudio defines setjmp as _setjmp
   4464 #if defined(_MSC_VER) && defined(setjmp) && \
   4465                          !defined(setjmp_undefined_for_msvc)
   4466 #  pragma push_macro("setjmp")
   4467 #  undef setjmp
   4468 #  define setjmp_undefined_for_msvc
   4469 #endif
   4470 
   4471 /// visitIntrinsicCall - Lower the call to the specified intrinsic function.  If
   4472 /// we want to emit this as a call to a named external function, return the name
   4473 /// otherwise lower it and return null.
   4474 const char *
   4475 SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   4476   DebugLoc dl = getCurDebugLoc();
   4477   SDValue Res;
   4478 
   4479   switch (Intrinsic) {
   4480   default:
   4481     // By default, turn this into a target intrinsic node.
   4482     visitTargetIntrinsic(I, Intrinsic);
   4483     return 0;
   4484   case Intrinsic::vastart:  visitVAStart(I); return 0;
   4485   case Intrinsic::vaend:    visitVAEnd(I); return 0;
   4486   case Intrinsic::vacopy:   visitVACopy(I); return 0;
   4487   case Intrinsic::returnaddress:
   4488     setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(),
   4489                              getValue(I.getArgOperand(0))));
   4490     return 0;
   4491   case Intrinsic::frameaddress:
   4492     setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(),
   4493                              getValue(I.getArgOperand(0))));
   4494     return 0;
   4495   case Intrinsic::setjmp:
   4496     return &"_setjmp"[!TLI.usesUnderscoreSetJmp()];
   4497   case Intrinsic::longjmp:
   4498     return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
   4499   case Intrinsic::memcpy: {
   4500     // Assert for address < 256 since we support only user defined address
   4501     // spaces.
   4502     assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
   4503            < 256 &&
   4504            cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
   4505            < 256 &&
   4506            "Unknown address space");
   4507     SDValue Op1 = getValue(I.getArgOperand(0));
   4508     SDValue Op2 = getValue(I.getArgOperand(1));
   4509     SDValue Op3 = getValue(I.getArgOperand(2));
   4510     unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
   4511     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
   4512     DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false,
   4513                               MachinePointerInfo(I.getArgOperand(0)),
   4514                               MachinePointerInfo(I.getArgOperand(1))));
   4515     return 0;
   4516   }
   4517   case Intrinsic::memset: {
   4518     // Assert for address < 256 since we support only user defined address
   4519     // spaces.
   4520     assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
   4521            < 256 &&
   4522            "Unknown address space");
   4523     SDValue Op1 = getValue(I.getArgOperand(0));
   4524     SDValue Op2 = getValue(I.getArgOperand(1));
   4525     SDValue Op3 = getValue(I.getArgOperand(2));
   4526     unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
   4527     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
   4528     DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
   4529                               MachinePointerInfo(I.getArgOperand(0))));
   4530     return 0;
   4531   }
   4532   case Intrinsic::memmove: {
   4533     // Assert for address < 256 since we support only user defined address
   4534     // spaces.
   4535     assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
   4536            < 256 &&
   4537            cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
   4538            < 256 &&
   4539            "Unknown address space");
   4540     SDValue Op1 = getValue(I.getArgOperand(0));
   4541     SDValue Op2 = getValue(I.getArgOperand(1));
   4542     SDValue Op3 = getValue(I.getArgOperand(2));
   4543     unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
   4544     bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
   4545     DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
   4546                                MachinePointerInfo(I.getArgOperand(0)),
   4547                                MachinePointerInfo(I.getArgOperand(1))));
   4548     return 0;
   4549   }
   4550   case Intrinsic::dbg_declare: {
   4551     const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
   4552     MDNode *Variable = DI.getVariable();
   4553     const Value *Address = DI.getAddress();
   4554     if (!Address || !DIVariable(Variable).Verify()) {
   4555       DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
   4556       return 0;
   4557     }
   4558 
   4559     // Build an entry in DbgOrdering.  Debug info input nodes get an SDNodeOrder
   4560     // but do not always have a corresponding SDNode built.  The SDNodeOrder
   4561     // absolute, but not relative, values are different depending on whether
   4562     // debug info exists.
   4563     ++SDNodeOrder;
   4564 
   4565     // Check if address has undef value.
   4566     if (isa<UndefValue>(Address) ||
   4567         (Address->use_empty() && !isa<Argument>(Address))) {
   4568       DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
   4569       return 0;
   4570     }
   4571 
   4572     SDValue &N = NodeMap[Address];
   4573     if (!N.getNode() && isa<Argument>(Address))
   4574       // Check unused arguments map.
   4575       N = UnusedArgNodeMap[Address];
   4576     SDDbgValue *SDV;
   4577     if (N.getNode()) {
   4578       if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
   4579         Address = BCI->getOperand(0);
   4580       // Parameters are handled specially.
   4581       bool isParameter =
   4582         (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable ||
   4583          isa<Argument>(Address));
   4584 
   4585       const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
   4586 
   4587       if (isParameter && !AI) {
   4588         FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
   4589         if (FINode)
   4590           // Byval parameter.  We have a frame index at this point.
   4591           SDV = DAG.getDbgValue(Variable, FINode->getIndex(),
   4592                                 0, dl, SDNodeOrder);
   4593         else {
   4594           // Address is an argument, so try to emit its dbg value using
   4595           // virtual register info from the FuncInfo.ValueMap.
   4596           EmitFuncArgumentDbgValue(Address, Variable, 0, N);
   4597           return 0;
   4598         }
   4599       } else if (AI)
   4600         SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(),
   4601                               0, dl, SDNodeOrder);
   4602       else {
   4603         // Can't do anything with other non-AI cases yet.
   4604         DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
   4605         DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t");
   4606         DEBUG(Address->dump());
   4607         return 0;
   4608       }
   4609       DAG.AddDbgValue(SDV, N.getNode(), isParameter);
   4610     } else {
   4611       // If Address is an argument then try to emit its dbg value using
   4612       // virtual register info from the FuncInfo.ValueMap.
   4613       if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) {
   4614         // If variable is pinned by a alloca in dominating bb then
   4615         // use StaticAllocaMap.
   4616         if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
   4617           if (AI->getParent() != DI.getParent()) {
   4618             DenseMap<const AllocaInst*, int>::iterator SI =
   4619               FuncInfo.StaticAllocaMap.find(AI);
   4620             if (SI != FuncInfo.StaticAllocaMap.end()) {
   4621               SDV = DAG.getDbgValue(Variable, SI->second,
   4622                                     0, dl, SDNodeOrder);
   4623               DAG.AddDbgValue(SDV, 0, false);
   4624               return 0;
   4625             }
   4626           }
   4627         }
   4628         DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
   4629       }
   4630     }
   4631     return 0;
   4632   }
   4633   case Intrinsic::dbg_value: {
   4634     const DbgValueInst &DI = cast<DbgValueInst>(I);
   4635     if (!DIVariable(DI.getVariable()).Verify())
   4636       return 0;
   4637 
   4638     MDNode *Variable = DI.getVariable();
   4639     uint64_t Offset = DI.getOffset();
   4640     const Value *V = DI.getValue();
   4641     if (!V)
   4642       return 0;
   4643 
   4644     // Build an entry in DbgOrdering.  Debug info input nodes get an SDNodeOrder
   4645     // but do not always have a corresponding SDNode built.  The SDNodeOrder
   4646     // absolute, but not relative, values are different depending on whether
   4647     // debug info exists.
   4648     ++SDNodeOrder;
   4649     SDDbgValue *SDV;
   4650     if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) {
   4651       SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder);
   4652       DAG.AddDbgValue(SDV, 0, false);
   4653     } else {
   4654       // Do not use getValue() in here; we don't want to generate code at
   4655       // this point if it hasn't been done yet.
   4656       SDValue N = NodeMap[V];
   4657       if (!N.getNode() && isa<Argument>(V))
   4658         // Check unused arguments map.
   4659         N = UnusedArgNodeMap[V];
   4660       if (N.getNode()) {
   4661         if (!EmitFuncArgumentDbgValue(V, Variable, Offset, N)) {
   4662           SDV = DAG.getDbgValue(Variable, N.getNode(),
   4663                                 N.getResNo(), Offset, dl, SDNodeOrder);
   4664           DAG.AddDbgValue(SDV, N.getNode(), false);
   4665         }
   4666       } else if (!V->use_empty() ) {
   4667         // Do not call getValue(V) yet, as we don't want to generate code.
   4668         // Remember it for later.
   4669         DanglingDebugInfo DDI(&DI, dl, SDNodeOrder);
   4670         DanglingDebugInfoMap[V] = DDI;
   4671       } else {
   4672         // We may expand this to cover more cases.  One case where we have no
   4673         // data available is an unreferenced parameter.
   4674         DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
   4675       }
   4676     }
   4677 
   4678     // Build a debug info table entry.
   4679     if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V))
   4680       V = BCI->getOperand(0);
   4681     const AllocaInst *AI = dyn_cast<AllocaInst>(V);
   4682     // Don't handle byval struct arguments or VLAs, for example.
   4683     if (!AI) {
   4684       DEBUG(dbgs() << "Dropping debug location info for:\n  " << DI << "\n");
   4685       DEBUG(dbgs() << "  Last seen at:\n    " << *V << "\n");
   4686       return 0;
   4687     }
   4688     DenseMap<const AllocaInst*, int>::iterator SI =
   4689       FuncInfo.StaticAllocaMap.find(AI);
   4690     if (SI == FuncInfo.StaticAllocaMap.end())
   4691       return 0; // VLAs.
   4692     int FI = SI->second;
   4693 
   4694     MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
   4695     if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())
   4696       MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
   4697     return 0;
   4698   }
   4699 
   4700   case Intrinsic::eh_typeid_for: {
   4701     // Find the type id for the given typeinfo.
   4702     GlobalVariable *GV = ExtractTypeInfo(I.getArgOperand(0));
   4703     unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV);
   4704     Res = DAG.getConstant(TypeID, MVT::i32);
   4705     setValue(&I, Res);
   4706     return 0;
   4707   }
   4708 
   4709   case Intrinsic::eh_return_i32:
   4710   case Intrinsic::eh_return_i64:
   4711     DAG.getMachineFunction().getMMI().setCallsEHReturn(true);
   4712     DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl,
   4713                             MVT::Other,
   4714                             getControlRoot(),
   4715                             getValue(I.getArgOperand(0)),
   4716                             getValue(I.getArgOperand(1))));
   4717     return 0;
   4718   case Intrinsic::eh_unwind_init:
   4719     DAG.getMachineFunction().getMMI().setCallsUnwindInit(true);
   4720     return 0;
   4721   case Intrinsic::eh_dwarf_cfa: {
   4722     SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), dl,
   4723                                         TLI.getPointerTy());
   4724     SDValue Offset = DAG.getNode(ISD::ADD, dl,
   4725                                  TLI.getPointerTy(),
   4726                                  DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl,
   4727                                              TLI.getPointerTy()),
   4728                                  CfaArg);
   4729     SDValue FA = DAG.getNode(ISD::FRAMEADDR, dl,
   4730                              TLI.getPointerTy(),
   4731                              DAG.getConstant(0, TLI.getPointerTy()));
   4732     setValue(&I, DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
   4733                              FA, Offset));
   4734     return 0;
   4735   }
   4736   case Intrinsic::eh_sjlj_callsite: {
   4737     MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
   4738     ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
   4739     assert(CI && "Non-constant call site value in eh.sjlj.callsite!");
   4740     assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
   4741 
   4742     MMI.setCurrentCallSite(CI->getZExtValue());
   4743     return 0;
   4744   }
   4745   case Intrinsic::eh_sjlj_functioncontext: {
   4746     // Get and store the index of the function context.
   4747     MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   4748     AllocaInst *FnCtx =
   4749       cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
   4750     int FI = FuncInfo.StaticAllocaMap[FnCtx];
   4751     MFI->setFunctionContextIndex(FI);
   4752     return 0;
   4753   }
   4754   case Intrinsic::eh_sjlj_setjmp: {
   4755     SDValue Ops[2];
   4756     Ops[0] = getRoot();
   4757     Ops[1] = getValue(I.getArgOperand(0));
   4758     SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, dl,
   4759                              DAG.getVTList(MVT::i32, MVT::Other),
   4760                              Ops, 2);
   4761     setValue(&I, Op.getValue(0));
   4762     DAG.setRoot(Op.getValue(1));
   4763     return 0;
   4764   }
   4765   case Intrinsic::eh_sjlj_longjmp: {
   4766     DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other,
   4767                             getRoot(), getValue(I.getArgOperand(0))));
   4768     return 0;
   4769   }
   4770 
   4771   case Intrinsic::x86_mmx_pslli_w:
   4772   case Intrinsic::x86_mmx_pslli_d:
   4773   case Intrinsic::x86_mmx_pslli_q:
   4774   case Intrinsic::x86_mmx_psrli_w:
   4775   case Intrinsic::x86_mmx_psrli_d:
   4776   case Intrinsic::x86_mmx_psrli_q:
   4777   case Intrinsic::x86_mmx_psrai_w:
   4778   case Intrinsic::x86_mmx_psrai_d: {
   4779     SDValue ShAmt = getValue(I.getArgOperand(1));
   4780     if (isa<ConstantSDNode>(ShAmt)) {
   4781       visitTargetIntrinsic(I, Intrinsic);
   4782       return 0;
   4783     }
   4784     unsigned NewIntrinsic = 0;
   4785     EVT ShAmtVT = MVT::v2i32;
   4786     switch (Intrinsic) {
   4787     case Intrinsic::x86_mmx_pslli_w:
   4788       NewIntrinsic = Intrinsic::x86_mmx_psll_w;
   4789       break;
   4790     case Intrinsic::x86_mmx_pslli_d:
   4791       NewIntrinsic = Intrinsic::x86_mmx_psll_d;
   4792       break;
   4793     case Intrinsic::x86_mmx_pslli_q:
   4794       NewIntrinsic = Intrinsic::x86_mmx_psll_q;
   4795       break;
   4796     case Intrinsic::x86_mmx_psrli_w:
   4797       NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
   4798       break;
   4799     case Intrinsic::x86_mmx_psrli_d:
   4800       NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
   4801       break;
   4802     case Intrinsic::x86_mmx_psrli_q:
   4803       NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
   4804       break;
   4805     case Intrinsic::x86_mmx_psrai_w:
   4806       NewIntrinsic = Intrinsic::x86_mmx_psra_w;
   4807       break;
   4808     case Intrinsic::x86_mmx_psrai_d:
   4809       NewIntrinsic = Intrinsic::x86_mmx_psra_d;
   4810       break;
   4811     default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
   4812     }
   4813 
   4814     // The vector shift intrinsics with scalars uses 32b shift amounts but
   4815     // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
   4816     // to be zero.
   4817     // We must do this early because v2i32 is not a legal type.
   4818     DebugLoc dl = getCurDebugLoc();
   4819     SDValue ShOps[2];
   4820     ShOps[0] = ShAmt;
   4821     ShOps[1] = DAG.getConstant(0, MVT::i32);
   4822     ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
   4823     EVT DestVT = TLI.getValueType(I.getType());
   4824     ShAmt = DAG.getNode(ISD::BITCAST, dl, DestVT, ShAmt);
   4825     Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
   4826                        DAG.getConstant(NewIntrinsic, MVT::i32),
   4827                        getValue(I.getArgOperand(0)), ShAmt);
   4828     setValue(&I, Res);
   4829     return 0;
   4830   }
   4831   case Intrinsic::x86_avx_vinsertf128_pd_256:
   4832   case Intrinsic::x86_avx_vinsertf128_ps_256:
   4833   case Intrinsic::x86_avx_vinsertf128_si_256:
   4834   case Intrinsic::x86_avx2_vinserti128: {
   4835     DebugLoc dl = getCurDebugLoc();
   4836     EVT DestVT = TLI.getValueType(I.getType());
   4837     EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
   4838     uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) *
   4839                    ElVT.getVectorNumElements();
   4840     Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT,
   4841                       getValue(I.getArgOperand(0)),
   4842                       getValue(I.getArgOperand(1)),
   4843                       DAG.getConstant(Idx, MVT::i32));
   4844     setValue(&I, Res);
   4845     return 0;
   4846   }
   4847   case Intrinsic::convertff:
   4848   case Intrinsic::convertfsi:
   4849   case Intrinsic::convertfui:
   4850   case Intrinsic::convertsif:
   4851   case Intrinsic::convertuif:
   4852   case Intrinsic::convertss:
   4853   case Intrinsic::convertsu:
   4854   case Intrinsic::convertus:
   4855   case Intrinsic::convertuu: {
   4856     ISD::CvtCode Code = ISD::CVT_INVALID;
   4857     switch (Intrinsic) {
   4858     default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
   4859     case Intrinsic::convertff:  Code = ISD::CVT_FF; break;
   4860     case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;
   4861     case Intrinsic::convertfui: Code = ISD::CVT_FU; break;
   4862     case Intrinsic::convertsif: Code = ISD::CVT_SF; break;
   4863     case Intrinsic::convertuif: Code = ISD::CVT_UF; break;
   4864     case Intrinsic::convertss:  Code = ISD::CVT_SS; break;
   4865     case Intrinsic::convertsu:  Code = ISD::CVT_SU; break;
   4866     case Intrinsic::convertus:  Code = ISD::CVT_US; break;
   4867     case Intrinsic::convertuu:  Code = ISD::CVT_UU; break;
   4868     }
   4869     EVT DestVT = TLI.getValueType(I.getType());
   4870     const Value *Op1 = I.getArgOperand(0);
   4871     Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1),
   4872                                DAG.getValueType(DestVT),
   4873                                DAG.getValueType(getValue(Op1).getValueType()),
   4874                                getValue(I.getArgOperand(1)),
   4875                                getValue(I.getArgOperand(2)),
   4876                                Code);
   4877     setValue(&I, Res);
   4878     return 0;
   4879   }
   4880   case Intrinsic::sqrt:
   4881     setValue(&I, DAG.getNode(ISD::FSQRT, dl,
   4882                              getValue(I.getArgOperand(0)).getValueType(),
   4883                              getValue(I.getArgOperand(0))));
   4884     return 0;
   4885   case Intrinsic::powi:
   4886     setValue(&I, ExpandPowI(dl, getValue(I.getArgOperand(0)),
   4887                             getValue(I.getArgOperand(1)), DAG));
   4888     return 0;
   4889   case Intrinsic::sin:
   4890     setValue(&I, DAG.getNode(ISD::FSIN, dl,
   4891                              getValue(I.getArgOperand(0)).getValueType(),
   4892                              getValue(I.getArgOperand(0))));
   4893     return 0;
   4894   case Intrinsic::cos:
   4895     setValue(&I, DAG.getNode(ISD::FCOS, dl,
   4896                              getValue(I.getArgOperand(0)).getValueType(),
   4897                              getValue(I.getArgOperand(0))));
   4898     return 0;
   4899   case Intrinsic::log:
   4900     visitLog(I);
   4901     return 0;
   4902   case Intrinsic::log2:
   4903     visitLog2(I);
   4904     return 0;
   4905   case Intrinsic::log10:
   4906     visitLog10(I);
   4907     return 0;
   4908   case Intrinsic::exp:
   4909     visitExp(I);
   4910     return 0;
   4911   case Intrinsic::exp2:
   4912     visitExp2(I);
   4913     return 0;
   4914   case Intrinsic::pow:
   4915     visitPow(I);
   4916     return 0;
   4917   case Intrinsic::fma:
   4918     setValue(&I, DAG.getNode(ISD::FMA, dl,
   4919                              getValue(I.getArgOperand(0)).getValueType(),
   4920                              getValue(I.getArgOperand(0)),
   4921                              getValue(I.getArgOperand(1)),
   4922                              getValue(I.getArgOperand(2))));
   4923     return 0;
   4924   case Intrinsic::convert_to_fp16:
   4925     setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl,
   4926                              MVT::i16, getValue(I.getArgOperand(0))));
   4927     return 0;
   4928   case Intrinsic::convert_from_fp16:
   4929     setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, dl,
   4930                              MVT::f32, getValue(I.getArgOperand(0))));
   4931     return 0;
   4932   case Intrinsic::pcmarker: {
   4933     SDValue Tmp = getValue(I.getArgOperand(0));
   4934     DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp));
   4935     return 0;
   4936   }
   4937   case Intrinsic::readcyclecounter: {
   4938     SDValue Op = getRoot();
   4939     Res = DAG.getNode(ISD::READCYCLECOUNTER, dl,
   4940                       DAG.getVTList(MVT::i64, MVT::Other),
   4941                       &Op, 1);
   4942     setValue(&I, Res);
   4943     DAG.setRoot(Res.getValue(1));
   4944     return 0;
   4945   }
   4946   case Intrinsic::bswap:
   4947     setValue(&I, DAG.getNode(ISD::BSWAP, dl,
   4948                              getValue(I.getArgOperand(0)).getValueType(),
   4949                              getValue(I.getArgOperand(0))));
   4950     return 0;
   4951   case Intrinsic::cttz: {
   4952     SDValue Arg = getValue(I.getArgOperand(0));
   4953     ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
   4954     EVT Ty = Arg.getValueType();
   4955     setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
   4956                              dl, Ty, Arg));
   4957     return 0;
   4958   }
   4959   case Intrinsic::ctlz: {
   4960     SDValue Arg = getValue(I.getArgOperand(0));
   4961     ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
   4962     EVT Ty = Arg.getValueType();
   4963     setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
   4964                              dl, Ty, Arg));
   4965     return 0;
   4966   }
   4967   case Intrinsic::ctpop: {
   4968     SDValue Arg = getValue(I.getArgOperand(0));
   4969     EVT Ty = Arg.getValueType();
   4970     setValue(&I, DAG.getNode(ISD::CTPOP, dl, Ty, Arg));
   4971     return 0;
   4972   }
   4973   case Intrinsic::stacksave: {
   4974     SDValue Op = getRoot();
   4975     Res = DAG.getNode(ISD::STACKSAVE, dl,
   4976                       DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1);
   4977     setValue(&I, Res);
   4978     DAG.setRoot(Res.getValue(1));
   4979     return 0;
   4980   }
   4981   case Intrinsic::stackrestore: {
   4982     Res = getValue(I.getArgOperand(0));
   4983     DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res));
   4984     return 0;
   4985   }
   4986   case Intrinsic::stackprotector: {
   4987     // Emit code into the DAG to store the stack guard onto the stack.
   4988     MachineFunction &MF = DAG.getMachineFunction();
   4989     MachineFrameInfo *MFI = MF.getFrameInfo();
   4990     EVT PtrTy = TLI.getPointerTy();
   4991 
   4992     SDValue Src = getValue(I.getArgOperand(0));   // The guard's value.
   4993     AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
   4994 
   4995     int FI = FuncInfo.StaticAllocaMap[Slot];
   4996     MFI->setStackProtectorIndex(FI);
   4997 
   4998     SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
   4999 
   5000     // Store the stack protector onto the stack.
   5001     Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
   5002                        MachinePointerInfo::getFixedStack(FI),
   5003                        true, false, 0);
   5004     setValue(&I, Res);
   5005     DAG.setRoot(Res);
   5006     return 0;
   5007   }
   5008   case Intrinsic::objectsize: {
   5009     // If we don't know by now, we're never going to know.
   5010     ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
   5011 
   5012     assert(CI && "Non-constant type in __builtin_object_size?");
   5013 
   5014     SDValue Arg = getValue(I.getCalledValue());
   5015     EVT Ty = Arg.getValueType();
   5016 
   5017     if (CI->isZero())
   5018       Res = DAG.getConstant(-1ULL, Ty);
   5019     else
   5020       Res = DAG.getConstant(0, Ty);
   5021 
   5022     setValue(&I, Res);
   5023     return 0;
   5024   }
   5025   case Intrinsic::var_annotation:
   5026     // Discard annotate attributes
   5027     return 0;
   5028 
   5029   case Intrinsic::init_trampoline: {
   5030     const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts());
   5031 
   5032     SDValue Ops[6];
   5033     Ops[0] = getRoot();
   5034     Ops[1] = getValue(I.getArgOperand(0));
   5035     Ops[2] = getValue(I.getArgOperand(1));
   5036     Ops[3] = getValue(I.getArgOperand(2));
   5037     Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
   5038     Ops[5] = DAG.getSrcValue(F);
   5039 
   5040     Res = DAG.getNode(ISD::INIT_TRAMPOLINE, dl, MVT::Other, Ops, 6);
   5041 
   5042     DAG.setRoot(Res);
   5043     return 0;
   5044   }
   5045   case Intrinsic::adjust_trampoline: {
   5046     setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, dl,
   5047                              TLI.getPointerTy(),
   5048                              getValue(I.getArgOperand(0))));
   5049     return 0;
   5050   }
   5051   case Intrinsic::gcroot:
   5052     if (GFI) {
   5053       const Value *Alloca = I.getArgOperand(0);
   5054       const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));
   5055 
   5056       FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
   5057       GFI->addStackRoot(FI->getIndex(), TypeMap);
   5058     }
   5059     return 0;
   5060   case Intrinsic::gcread:
   5061   case Intrinsic::gcwrite:
   5062     llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
   5063   case Intrinsic::flt_rounds:
   5064     setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
   5065     return 0;
   5066 
   5067   case Intrinsic::expect: {
   5068     // Just replace __builtin_expect(exp, c) with EXP.
   5069     setValue(&I, getValue(I.getArgOperand(0)));
   5070     return 0;
   5071   }
   5072 
   5073   case Intrinsic::trap: {
   5074     StringRef TrapFuncName = TM.Options.getTrapFunctionName();
   5075     if (TrapFuncName.empty()) {
   5076       DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
   5077       return 0;
   5078     }
   5079     TargetLowering::ArgListTy Args;
   5080     std::pair<SDValue, SDValue> Result =
   5081       TLI.LowerCallTo(getRoot(), I.getType(),
   5082                  false, false, false, false, 0, CallingConv::C,
   5083                  /*isTailCall=*/false,
   5084                  /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
   5085                  DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
   5086                  Args, DAG, getCurDebugLoc());
   5087     DAG.setRoot(Result.second);
   5088     return 0;
   5089   }
   5090   case Intrinsic::uadd_with_overflow:
   5091   case Intrinsic::sadd_with_overflow:
   5092   case Intrinsic::usub_with_overflow:
   5093   case Intrinsic::ssub_with_overflow:
   5094   case Intrinsic::umul_with_overflow:
   5095   case Intrinsic::smul_with_overflow: {
   5096     ISD::NodeType Op;
   5097     switch (Intrinsic) {
   5098     default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
   5099     case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break;
   5100     case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break;
   5101     case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break;
   5102     case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break;
   5103     case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break;
   5104     case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break;
   5105     }
   5106     SDValue Op1 = getValue(I.getArgOperand(0));
   5107     SDValue Op2 = getValue(I.getArgOperand(1));
   5108 
   5109     SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
   5110     setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
   5111     return 0;
   5112   }
   5113   case Intrinsic::prefetch: {
   5114     SDValue Ops[5];
   5115     unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
   5116     Ops[0] = getRoot();
   5117     Ops[1] = getValue(I.getArgOperand(0));
   5118     Ops[2] = getValue(I.getArgOperand(1));
   5119     Ops[3] = getValue(I.getArgOperand(2));
   5120     Ops[4] = getValue(I.getArgOperand(3));
   5121     DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl,
   5122                                         DAG.getVTList(MVT::Other),
   5123                                         &Ops[0], 5,
   5124                                         EVT::getIntegerVT(*Context, 8),
   5125                                         MachinePointerInfo(I.getArgOperand(0)),
   5126                                         0, /* align */
   5127                                         false, /* volatile */
   5128                                         rw==0, /* read */
   5129                                         rw==1)); /* write */
   5130     return 0;
   5131   }
   5132 
   5133   case Intrinsic::invariant_start:
   5134   case Intrinsic::lifetime_start:
   5135     // Discard region information.
   5136     setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
   5137     return 0;
   5138   case Intrinsic::invariant_end:
   5139   case Intrinsic::lifetime_end:
   5140     // Discard region information.
   5141     return 0;
   5142   }
   5143 }
   5144 
   5145 void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
   5146                                       bool isTailCall,
   5147                                       MachineBasicBlock *LandingPad) {
   5148   PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
   5149   FunctionType *FTy = cast<FunctionType>(PT->getElementType());
   5150   Type *RetTy = FTy->getReturnType();
   5151   MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
   5152   MCSymbol *BeginLabel = 0;
   5153 
   5154   TargetLowering::ArgListTy Args;
   5155   TargetLowering::ArgListEntry Entry;
   5156   Args.reserve(CS.arg_size());
   5157 
   5158   // Check whether the function can return without sret-demotion.
   5159   SmallVector<ISD::OutputArg, 4> Outs;
   5160   SmallVector<uint64_t, 4> Offsets;
   5161   GetReturnInfo(RetTy, CS.getAttributes().getRetAttributes(),
   5162                 Outs, TLI, &Offsets);
   5163 
   5164   bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
   5165 					   DAG.getMachineFunction(),
   5166 					   FTy->isVarArg(), Outs,
   5167 					   FTy->getContext());
   5168 
   5169   SDValue DemoteStackSlot;
   5170   int DemoteStackIdx = -100;
   5171 
   5172   if (!CanLowerReturn) {
   5173     uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
   5174                       FTy->getReturnType());
   5175     unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(
   5176                       FTy->getReturnType());
   5177     MachineFunction &MF = DAG.getMachineFunction();
   5178     DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
   5179     Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
   5180 
   5181     DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy());
   5182     Entry.Node = DemoteStackSlot;
   5183     Entry.Ty = StackSlotPtrType;
   5184     Entry.isSExt = false;
   5185     Entry.isZExt = false;
   5186     Entry.isInReg = false;
   5187     Entry.isSRet = true;
   5188     Entry.isNest = false;
   5189     Entry.isByVal = false;
   5190     Entry.Alignment = Align;
   5191     Args.push_back(Entry);
   5192     RetTy = Type::getVoidTy(FTy->getContext());
   5193   }
   5194 
   5195   for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
   5196        i != e; ++i) {
   5197     const Value *V = *i;
   5198 
   5199     // Skip empty types
   5200     if (V->getType()->isEmptyTy())
   5201       continue;
   5202 
   5203     SDValue ArgNode = getValue(V);
   5204     Entry.Node = ArgNode; Entry.Ty = V->getType();
   5205 
   5206     unsigned attrInd = i - CS.arg_begin() + 1;
   5207     Entry.isSExt  = CS.paramHasAttr(attrInd, Attribute::SExt);
   5208     Entry.isZExt  = CS.paramHasAttr(attrInd, Attribute::ZExt);
   5209     Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
   5210     Entry.isSRet  = CS.paramHasAttr(attrInd, Attribute::StructRet);
   5211     Entry.isNest  = CS.paramHasAttr(attrInd, Attribute::Nest);
   5212     Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
   5213     Entry.Alignment = CS.getParamAlignment(attrInd);
   5214     Args.push_back(Entry);
   5215   }
   5216 
   5217   if (LandingPad) {
   5218     // Insert a label before the invoke call to mark the try range.  This can be
   5219     // used to detect deletion of the invoke via the MachineModuleInfo.
   5220     BeginLabel = MMI.getContext().CreateTempSymbol();
   5221 
   5222     // For SjLj, keep track of which landing pads go with which invokes
   5223     // so as to maintain the ordering of pads in the LSDA.
   5224     unsigned CallSiteIndex = MMI.getCurrentCallSite();
   5225     if (CallSiteIndex) {
   5226       MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
   5227       LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex);
   5228 
   5229       // Now that the call site is handled, stop tracking it.
   5230       MMI.setCurrentCallSite(0);
   5231     }
   5232 
   5233     // Both PendingLoads and PendingExports must be flushed here;
   5234     // this call might not return.
   5235     (void)getRoot();
   5236     DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getControlRoot(), BeginLabel));
   5237   }
   5238 
   5239   // Check if target-independent constraints permit a tail call here.
   5240   // Target-dependent constraints are checked within TLI.LowerCallTo.
   5241   if (isTailCall &&
   5242       !isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI))
   5243     isTailCall = false;
   5244 
   5245   // If there's a possibility that fast-isel has already selected some amount
   5246   // of the current basic block, don't emit a tail call.
   5247   if (isTailCall && TM.Options.EnableFastISel)
   5248     isTailCall = false;
   5249 
   5250   std::pair<SDValue,SDValue> Result =
   5251     TLI.LowerCallTo(getRoot(), RetTy,
   5252                     CS.paramHasAttr(0, Attribute::SExt),
   5253                     CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(),
   5254                     CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
   5255                     CS.getCallingConv(),
   5256                     isTailCall,
   5257                     CS.doesNotReturn(),
   5258                     !CS.getInstruction()->use_empty(),
   5259                     Callee, Args, DAG, getCurDebugLoc());
   5260   assert((isTailCall || Result.second.getNode()) &&
   5261          "Non-null chain expected with non-tail call!");
   5262   assert((Result.second.getNode() || !Result.first.getNode()) &&
   5263          "Null value expected with tail call!");
   5264   if (Result.first.getNode()) {
   5265     setValue(CS.getInstruction(), Result.first);
   5266   } else if (!CanLowerReturn && Result.second.getNode()) {
   5267     // The instruction result is the result of loading from the
   5268     // hidden sret parameter.
   5269     SmallVector<EVT, 1> PVTs;
   5270     Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
   5271 
   5272     ComputeValueVTs(TLI, PtrRetTy, PVTs);
   5273     assert(PVTs.size() == 1 && "Pointers should fit in one register");
   5274     EVT PtrVT = PVTs[0];
   5275     unsigned NumValues = Outs.size();
   5276     SmallVector<SDValue, 4> Values(NumValues);
   5277     SmallVector<SDValue, 4> Chains(NumValues);
   5278 
   5279     for (unsigned i = 0; i < NumValues; ++i) {
   5280       SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT,
   5281                                 DemoteStackSlot,
   5282                                 DAG.getConstant(Offsets[i], PtrVT));
   5283       SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second,
   5284                               Add,
   5285                   MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
   5286                               false, false, false, 1);
   5287       Values[i] = L;
   5288       Chains[i] = L.getValue(1);
   5289     }
   5290 
   5291     SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
   5292                                 MVT::Other, &Chains[0], NumValues);
   5293     PendingLoads.push_back(Chain);
   5294 
   5295     // Collect the legal value parts into potentially illegal values
   5296     // that correspond to the original function's return values.
   5297     SmallVector<EVT, 4> RetTys;
   5298     RetTy = FTy->getReturnType();
   5299     ComputeValueVTs(TLI, RetTy, RetTys);
   5300     ISD::NodeType AssertOp = ISD::DELETED_NODE;
   5301     SmallVector<SDValue, 4> ReturnValues;
   5302     unsigned CurReg = 0;
   5303     for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
   5304       EVT VT = RetTys[I];
   5305       EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT);
   5306       unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT);
   5307 
   5308       SDValue ReturnValue =
   5309         getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs,
   5310                          RegisterVT, VT, AssertOp);
   5311       ReturnValues.push_back(ReturnValue);
   5312       CurReg += NumRegs;
   5313     }
   5314 
   5315     setValue(CS.getInstruction(),
   5316              DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
   5317                          DAG.getVTList(&RetTys[0], RetTys.size()),
   5318                          &ReturnValues[0], ReturnValues.size()));
   5319   }
   5320 
   5321   // Assign order to nodes here. If the call does not produce a result, it won't
   5322   // be mapped to a SDNode and visit() will not assign it an order number.
   5323   if (!Result.second.getNode()) {
   5324     // As a special case, a null chain means that a tail call has been emitted and
   5325     // the DAG root is already updated.
   5326     HasTailCall = true;
   5327     ++SDNodeOrder;
   5328     AssignOrderingToNode(DAG.getRoot().getNode());
   5329   } else {
   5330     DAG.setRoot(Result.second);
   5331     ++SDNodeOrder;
   5332     AssignOrderingToNode(Result.second.getNode());
   5333   }
   5334 
   5335   if (LandingPad) {
   5336     // Insert a label at the end of the invoke call to mark the try range.  This
   5337     // can be used to detect deletion of the invoke via the MachineModuleInfo.
   5338     MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol();
   5339     DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getRoot(), EndLabel));
   5340 
   5341     // Inform MachineModuleInfo of range.
   5342     MMI.addInvoke(LandingPad, BeginLabel, EndLabel);
   5343   }
   5344 }
   5345 
   5346 /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
   5347 /// value is equal or not-equal to zero.
   5348 static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
   5349   for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
   5350        UI != E; ++UI) {
   5351     if (const ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
   5352       if (IC->isEquality())
   5353         if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
   5354           if (C->isNullValue())
   5355             continue;
   5356     // Unknown instruction.
   5357     return false;
   5358   }
   5359   return true;
   5360 }
   5361 
   5362 static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
   5363                              Type *LoadTy,
   5364                              SelectionDAGBuilder &Builder) {
   5365 
   5366   // Check to see if this load can be trivially constant folded, e.g. if the
   5367   // input is from a string literal.
   5368   if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
   5369     // Cast pointer to the type we really want to load.
   5370     LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
   5371                                          PointerType::getUnqual(LoadTy));
   5372 
   5373     if (const Constant *LoadCst =
   5374           ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput),
   5375                                        Builder.TD))
   5376       return Builder.getValue(LoadCst);
   5377   }
   5378 
   5379   // Otherwise, we have to emit the load.  If the pointer is to unfoldable but
   5380   // still constant memory, the input chain can be the entry node.
   5381   SDValue Root;
   5382   bool ConstantMemory = false;
   5383 
   5384   // Do not serialize (non-volatile) loads of constant memory with anything.
   5385   if (Builder.AA->pointsToConstantMemory(PtrVal)) {
   5386     Root = Builder.DAG.getEntryNode();
   5387     ConstantMemory = true;
   5388   } else {
   5389     // Do not serialize non-volatile loads against each other.
   5390     Root = Builder.DAG.getRoot();
   5391   }
   5392 
   5393   SDValue Ptr = Builder.getValue(PtrVal);
   5394   SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
   5395                                         Ptr, MachinePointerInfo(PtrVal),
   5396                                         false /*volatile*/,
   5397                                         false /*nontemporal*/,
   5398                                         false /*isinvariant*/, 1 /* align=1 */);
   5399 
   5400   if (!ConstantMemory)
   5401     Builder.PendingLoads.push_back(LoadVal.getValue(1));
   5402   return LoadVal;
   5403 }
   5404 
   5405 
   5406 /// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form.
   5407 /// If so, return true and lower it, otherwise return false and it will be
   5408 /// lowered like a normal call.
   5409 bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
   5410   // Verify that the prototype makes sense.  int memcmp(void*,void*,size_t)
   5411   if (I.getNumArgOperands() != 3)
   5412     return false;
   5413 
   5414   const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
   5415   if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() ||
   5416       !I.getArgOperand(2)->getType()->isIntegerTy() ||
   5417       !I.getType()->isIntegerTy())
   5418     return false;
   5419 
   5420   const ConstantInt *Size = dyn_cast<ConstantInt>(I.getArgOperand(2));
   5421 
   5422   // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS)  != 0
   5423   // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS)  != 0
   5424   if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) {
   5425     bool ActuallyDoIt = true;
   5426     MVT LoadVT;
   5427     Type *LoadTy;
   5428     switch (Size->getZExtValue()) {
   5429     default:
   5430       LoadVT = MVT::Other;
   5431       LoadTy = 0;
   5432       ActuallyDoIt = false;
   5433       break;
   5434     case 2:
   5435       LoadVT = MVT::i16;
   5436       LoadTy = Type::getInt16Ty(Size->getContext());
   5437       break;
   5438     case 4:
   5439       LoadVT = MVT::i32;
   5440       LoadTy = Type::getInt32Ty(Size->getContext());
   5441       break;
   5442     case 8:
   5443       LoadVT = MVT::i64;
   5444       LoadTy = Type::getInt64Ty(Size->getContext());
   5445       break;
   5446         /*
   5447     case 16:
   5448       LoadVT = MVT::v4i32;
   5449       LoadTy = Type::getInt32Ty(Size->getContext());
   5450       LoadTy = VectorType::get(LoadTy, 4);
   5451       break;
   5452          */
   5453     }
   5454 
   5455     // This turns into unaligned loads.  We only do this if the target natively
   5456     // supports the MVT we'll be loading or if it is small enough (<= 4) that
   5457     // we'll only produce a small number of byte loads.
   5458 
   5459     // Require that we can find a legal MVT, and only do this if the target
   5460     // supports unaligned loads of that type.  Expanding into byte loads would
   5461     // bloat the code.
   5462     if (ActuallyDoIt && Size->getZExtValue() > 4) {
   5463       // TODO: Handle 5 byte compare as 4-byte + 1 byte.
   5464       // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
   5465       if (!TLI.isTypeLegal(LoadVT) ||!TLI.allowsUnalignedMemoryAccesses(LoadVT))
   5466         ActuallyDoIt = false;
   5467     }
   5468 
   5469     if (ActuallyDoIt) {
   5470       SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this);
   5471       SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this);
   5472 
   5473       SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal,
   5474                                  ISD::SETNE);
   5475       EVT CallVT = TLI.getValueType(I.getType(), true);
   5476       setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT));
   5477       return true;
   5478     }
   5479   }
   5480 
   5481 
   5482   return false;
   5483 }
   5484 
   5485 
   5486 void SelectionDAGBuilder::visitCall(const CallInst &I) {
   5487   // Handle inline assembly differently.
   5488   if (isa<InlineAsm>(I.getCalledValue())) {
   5489     visitInlineAsm(&I);
   5490     return;
   5491   }
   5492 
   5493   MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
   5494   ComputeUsesVAFloatArgument(I, &MMI);
   5495 
   5496   const char *RenameFn = 0;
   5497   if (Function *F = I.getCalledFunction()) {
   5498     if (F->isDeclaration()) {
   5499       if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) {
   5500         if (unsigned IID = II->getIntrinsicID(F)) {
   5501           RenameFn = visitIntrinsicCall(I, IID);
   5502           if (!RenameFn)
   5503             return;
   5504         }
   5505       }
   5506       if (unsigned IID = F->getIntrinsicID()) {
   5507         RenameFn = visitIntrinsicCall(I, IID);
   5508         if (!RenameFn)
   5509           return;
   5510       }
   5511     }
   5512 
   5513     // Check for well-known libc/libm calls.  If the function is internal, it
   5514     // can't be a library call.
   5515     if (!F->hasLocalLinkage() && F->hasName()) {
   5516       StringRef Name = F->getName();
   5517       if ((LibInfo->has(LibFunc::copysign) && Name == "copysign") ||
   5518           (LibInfo->has(LibFunc::copysignf) && Name == "copysignf") ||
   5519           (LibInfo->has(LibFunc::copysignl) && Name == "copysignl")) {
   5520         if (I.getNumArgOperands() == 2 &&   // Basic sanity checks.
   5521             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
   5522             I.getType() == I.getArgOperand(0)->getType() &&
   5523             I.getType() == I.getArgOperand(1)->getType()) {
   5524           SDValue LHS = getValue(I.getArgOperand(0));
   5525           SDValue RHS = getValue(I.getArgOperand(1));
   5526           setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(),
   5527                                    LHS.getValueType(), LHS, RHS));
   5528           return;
   5529         }
   5530       } else if ((LibInfo->has(LibFunc::fabs) && Name == "fabs") ||
   5531                  (LibInfo->has(LibFunc::fabsf) && Name == "fabsf") ||
   5532                  (LibInfo->has(LibFunc::fabsl) && Name == "fabsl")) {
   5533         if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
   5534             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
   5535             I.getType() == I.getArgOperand(0)->getType()) {
   5536           SDValue Tmp = getValue(I.getArgOperand(0));
   5537           setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(),
   5538                                    Tmp.getValueType(), Tmp));
   5539           return;
   5540         }
   5541       } else if ((LibInfo->has(LibFunc::sin) && Name == "sin") ||
   5542                  (LibInfo->has(LibFunc::sinf) && Name == "sinf") ||
   5543                  (LibInfo->has(LibFunc::sinl) && Name == "sinl")) {
   5544         if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
   5545             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
   5546             I.getType() == I.getArgOperand(0)->getType() &&
   5547             I.onlyReadsMemory()) {
   5548           SDValue Tmp = getValue(I.getArgOperand(0));
   5549           setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(),
   5550                                    Tmp.getValueType(), Tmp));
   5551           return;
   5552         }
   5553       } else if ((LibInfo->has(LibFunc::cos) && Name == "cos") ||
   5554                  (LibInfo->has(LibFunc::cosf) && Name == "cosf") ||
   5555                  (LibInfo->has(LibFunc::cosl) && Name == "cosl")) {
   5556         if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
   5557             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
   5558             I.getType() == I.getArgOperand(0)->getType() &&
   5559             I.onlyReadsMemory()) {
   5560           SDValue Tmp = getValue(I.getArgOperand(0));
   5561           setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(),
   5562                                    Tmp.getValueType(), Tmp));
   5563           return;
   5564         }
   5565       } else if ((LibInfo->has(LibFunc::sqrt) && Name == "sqrt") ||
   5566                  (LibInfo->has(LibFunc::sqrtf) && Name == "sqrtf") ||
   5567                  (LibInfo->has(LibFunc::sqrtl) && Name == "sqrtl")) {
   5568         if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
   5569             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
   5570             I.getType() == I.getArgOperand(0)->getType() &&
   5571             I.onlyReadsMemory()) {
   5572           SDValue Tmp = getValue(I.getArgOperand(0));
   5573           setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(),
   5574                                    Tmp.getValueType(), Tmp));
   5575           return;
   5576         }
   5577       } else if ((LibInfo->has(LibFunc::floor) && Name == "floor") ||
   5578                  (LibInfo->has(LibFunc::floorf) && Name == "floorf") ||
   5579                  (LibInfo->has(LibFunc::floorl) && Name == "floorl")) {
   5580         if (I.getNumArgOperands() == 1 && // Basic sanity checks.
   5581             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
   5582             I.getType() == I.getArgOperand(0)->getType()) {
   5583           SDValue Tmp = getValue(I.getArgOperand(0));
   5584           setValue(&I, DAG.getNode(ISD::FFLOOR, getCurDebugLoc(),
   5585                                    Tmp.getValueType(), Tmp));
   5586           return;
   5587         }
   5588       } else if ((LibInfo->has(LibFunc::nearbyint) && Name == "nearbyint") ||
   5589                  (LibInfo->has(LibFunc::nearbyintf) && Name == "nearbyintf") ||
   5590                  (LibInfo->has(LibFunc::nearbyintl) && Name == "nearbyintl")) {
   5591         if (I.getNumArgOperands() == 1 && // Basic sanity checks.
   5592             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
   5593             I.getType() == I.getArgOperand(0)->getType()) {
   5594           SDValue Tmp = getValue(I.getArgOperand(0));
   5595           setValue(&I, DAG.getNode(ISD::FNEARBYINT, getCurDebugLoc(),
   5596                                    Tmp.getValueType(), Tmp));
   5597           return;
   5598         }
   5599       } else if ((LibInfo->has(LibFunc::ceil) && Name == "ceil") ||
   5600                  (LibInfo->has(LibFunc::ceilf) && Name == "ceilf") ||
   5601                  (LibInfo->has(LibFunc::ceill) && Name == "ceill")) {
   5602         if (I.getNumArgOperands() == 1 && // Basic sanity checks.
   5603             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
   5604             I.getType() == I.getArgOperand(0)->getType()) {
   5605           SDValue Tmp = getValue(I.getArgOperand(0));
   5606           setValue(&I, DAG.getNode(ISD::FCEIL, getCurDebugLoc(),
   5607                                    Tmp.getValueType(), Tmp));
   5608           return;
   5609         }
   5610       } else if ((LibInfo->has(LibFunc::rint) && Name == "rint") ||
   5611                  (LibInfo->has(LibFunc::rintf) && Name == "rintf") ||
   5612                  (LibInfo->has(LibFunc::rintl) && Name == "rintl")) {
   5613         if (I.getNumArgOperands() == 1 && // Basic sanity checks.
   5614             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
   5615             I.getType() == I.getArgOperand(0)->getType()) {
   5616           SDValue Tmp = getValue(I.getArgOperand(0));
   5617           setValue(&I, DAG.getNode(ISD::FRINT, getCurDebugLoc(),
   5618                                    Tmp.getValueType(), Tmp));
   5619           return;
   5620         }
   5621       } else if ((LibInfo->has(LibFunc::trunc) && Name == "trunc") ||
   5622                  (LibInfo->has(LibFunc::truncf) && Name == "truncf") ||
   5623                  (LibInfo->has(LibFunc::truncl) && Name == "truncl")) {
   5624         if (I.getNumArgOperands() == 1 && // Basic sanity checks.
   5625             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
   5626             I.getType() == I.getArgOperand(0)->getType()) {
   5627           SDValue Tmp = getValue(I.getArgOperand(0));
   5628           setValue(&I, DAG.getNode(ISD::FTRUNC, getCurDebugLoc(),
   5629                                    Tmp.getValueType(), Tmp));
   5630           return;
   5631         }
   5632       } else if ((LibInfo->has(LibFunc::log2) && Name == "log2") ||
   5633                  (LibInfo->has(LibFunc::log2f) && Name == "log2f") ||
   5634                  (LibInfo->has(LibFunc::log2l) && Name == "log2l")) {
   5635         if (I.getNumArgOperands() == 1 && // Basic sanity checks.
   5636             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
   5637             I.getType() == I.getArgOperand(0)->getType() &&
   5638             I.onlyReadsMemory()) {
   5639           SDValue Tmp = getValue(I.getArgOperand(0));
   5640           setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(),
   5641                                    Tmp.getValueType(), Tmp));
   5642           return;
   5643         }
   5644       } else if ((LibInfo->has(LibFunc::exp2) && Name == "exp2") ||
   5645                  (LibInfo->has(LibFunc::exp2f) && Name == "exp2f") ||
   5646                  (LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) {
   5647         if (I.getNumArgOperands() == 1 && // Basic sanity checks.
   5648             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
   5649             I.getType() == I.getArgOperand(0)->getType() &&
   5650             I.onlyReadsMemory()) {
   5651           SDValue Tmp = getValue(I.getArgOperand(0));
   5652           setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(),
   5653                                    Tmp.getValueType(), Tmp));
   5654           return;
   5655         }
   5656       } else if (Name == "memcmp") {
   5657         if (visitMemCmpCall(I))
   5658           return;
   5659       }
   5660     }
   5661   }
   5662 
   5663   SDValue Callee;
   5664   if (!RenameFn)
   5665     Callee = getValue(I.getCalledValue());
   5666   else
   5667     Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
   5668 
   5669   // Check if we can potentially perform a tail call. More detailed checking is
   5670   // be done within LowerCallTo, after more information about the call is known.
   5671   LowerCallTo(&I, Callee, I.isTailCall());
   5672 }
   5673 
   5674 namespace {
   5675 
   5676 /// AsmOperandInfo - This contains information for each constraint that we are
   5677 /// lowering.
   5678 class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo {
   5679 public:
   5680   /// CallOperand - If this is the result output operand or a clobber
   5681   /// this is null, otherwise it is the incoming operand to the CallInst.
   5682   /// This gets modified as the asm is processed.
   5683   SDValue CallOperand;
   5684 
   5685   /// AssignedRegs - If this is a register or register class operand, this
   5686   /// contains the set of register corresponding to the operand.
   5687   RegsForValue AssignedRegs;
   5688 
   5689   explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
   5690     : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
   5691   }
   5692 
   5693   /// getCallOperandValEVT - Return the EVT of the Value* that this operand
   5694   /// corresponds to.  If there is no Value* for this operand, it returns
   5695   /// MVT::Other.
   5696   EVT getCallOperandValEVT(LLVMContext &Context,
   5697                            const TargetLowering &TLI,
   5698                            const TargetData *TD) const {
   5699     if (CallOperandVal == 0) return MVT::Other;
   5700 
   5701     if (isa<BasicBlock>(CallOperandVal))
   5702       return TLI.getPointerTy();
   5703 
   5704     llvm::Type *OpTy = CallOperandVal->getType();
   5705 
   5706     // FIXME: code duplicated from TargetLowering::ParseConstraints().
   5707     // If this is an indirect operand, the operand is a pointer to the
   5708     // accessed type.
   5709     if (isIndirect) {
   5710       llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
   5711       if (!PtrTy)
   5712         report_fatal_error("Indirect operand for inline asm not a pointer!");
   5713       OpTy = PtrTy->getElementType();
   5714     }
   5715 
   5716     // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
   5717     if (StructType *STy = dyn_cast<StructType>(OpTy))
   5718       if (STy->getNumElements() == 1)
   5719         OpTy = STy->getElementType(0);
   5720 
   5721     // If OpTy is not a single value, it may be a struct/union that we
   5722     // can tile with integers.
   5723     if (!OpTy->isSingleValueType() && OpTy->isSized()) {
   5724       unsigned BitSize = TD->getTypeSizeInBits(OpTy);
   5725       switch (BitSize) {
   5726       default: break;
   5727       case 1:
   5728       case 8:
   5729       case 16:
   5730       case 32:
   5731       case 64:
   5732       case 128:
   5733         OpTy = IntegerType::get(Context, BitSize);
   5734         break;
   5735       }
   5736     }
   5737 
   5738     return TLI.getValueType(OpTy, true);
   5739   }
   5740 };
   5741 
   5742 typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
   5743 
   5744 } // end anonymous namespace
   5745 
   5746 /// GetRegistersForValue - Assign registers (virtual or physical) for the
   5747 /// specified operand.  We prefer to assign virtual registers, to allow the
   5748 /// register allocator to handle the assignment process.  However, if the asm
   5749 /// uses features that we can't model on machineinstrs, we have SDISel do the
   5750 /// allocation.  This produces generally horrible, but correct, code.
   5751 ///
   5752 ///   OpInfo describes the operand.
   5753 ///
   5754 static void GetRegistersForValue(SelectionDAG &DAG,
   5755                                  const TargetLowering &TLI,
   5756                                  DebugLoc DL,
   5757                                  SDISelAsmOperandInfo &OpInfo) {
   5758   LLVMContext &Context = *DAG.getContext();
   5759 
   5760   MachineFunction &MF = DAG.getMachineFunction();
   5761   SmallVector<unsigned, 4> Regs;
   5762 
   5763   // If this is a constraint for a single physreg, or a constraint for a
   5764   // register class, find it.
   5765   std::pair<unsigned, const TargetRegisterClass*> PhysReg =
   5766     TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
   5767                                      OpInfo.ConstraintVT);
   5768 
   5769   unsigned NumRegs = 1;
   5770   if (OpInfo.ConstraintVT != MVT::Other) {
   5771     // If this is a FP input in an integer register (or visa versa) insert a bit
   5772     // cast of the input value.  More generally, handle any case where the input
   5773     // value disagrees with the register class we plan to stick this in.
   5774     if (OpInfo.Type == InlineAsm::isInput &&
   5775         PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) {
   5776       // Try to convert to the first EVT that the reg class contains.  If the
   5777       // types are identical size, use a bitcast to convert (e.g. two differing
   5778       // vector types).
   5779       EVT RegVT = *PhysReg.second->vt_begin();
   5780       if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
   5781         OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
   5782                                          RegVT, OpInfo.CallOperand);
   5783         OpInfo.ConstraintVT = RegVT;
   5784       } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
   5785         // If the input is a FP value and we want it in FP registers, do a
   5786         // bitcast to the corresponding integer type.  This turns an f64 value
   5787         // into i64, which can be passed with two i32 values on a 32-bit
   5788         // machine.
   5789         RegVT = EVT::getIntegerVT(Context,
   5790                                   OpInfo.ConstraintVT.getSizeInBits());
   5791         OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
   5792                                          RegVT, OpInfo.CallOperand);
   5793         OpInfo.ConstraintVT = RegVT;
   5794       }
   5795     }
   5796 
   5797     NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
   5798   }
   5799 
   5800   EVT RegVT;
   5801   EVT ValueVT = OpInfo.ConstraintVT;
   5802 
   5803   // If this is a constraint for a specific physical register, like {r17},
   5804   // assign it now.
   5805   if (unsigned AssignedReg = PhysReg.first) {
   5806     const TargetRegisterClass *RC = PhysReg.second;
   5807     if (OpInfo.ConstraintVT == MVT::Other)
   5808       ValueVT = *RC->vt_begin();
   5809 
   5810     // Get the actual register value type.  This is important, because the user
   5811     // may have asked for (e.g.) the AX register in i32 type.  We need to
   5812     // remember that AX is actually i16 to get the right extension.
   5813     RegVT = *RC->vt_begin();
   5814 
   5815     // This is a explicit reference to a physical register.
   5816     Regs.push_back(AssignedReg);
   5817 
   5818     // If this is an expanded reference, add the rest of the regs to Regs.
   5819     if (NumRegs != 1) {
   5820       TargetRegisterClass::iterator I = RC->begin();
   5821       for (; *I != AssignedReg; ++I)
   5822         assert(I != RC->end() && "Didn't find reg!");
   5823 
   5824       // Already added the first reg.
   5825       --NumRegs; ++I;
   5826       for (; NumRegs; --NumRegs, ++I) {
   5827         assert(I != RC->end() && "Ran out of registers to allocate!");
   5828         Regs.push_back(*I);
   5829       }
   5830     }
   5831 
   5832     OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
   5833     return;
   5834   }
   5835 
   5836   // Otherwise, if this was a reference to an LLVM register class, create vregs
   5837   // for this reference.
   5838   if (const TargetRegisterClass *RC = PhysReg.second) {
   5839     RegVT = *RC->vt_begin();
   5840     if (OpInfo.ConstraintVT == MVT::Other)
   5841       ValueVT = RegVT;
   5842 
   5843     // Create the appropriate number of virtual registers.
   5844     MachineRegisterInfo &RegInfo = MF.getRegInfo();
   5845     for (; NumRegs; --NumRegs)
   5846       Regs.push_back(RegInfo.createVirtualRegister(RC));
   5847 
   5848     OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
   5849     return;
   5850   }
   5851 
   5852   // Otherwise, we couldn't allocate enough registers for this.
   5853 }
   5854 
   5855 /// visitInlineAsm - Handle a call to an InlineAsm object.
   5856 ///
   5857 void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   5858   const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
   5859 
   5860   /// ConstraintOperands - Information about all of the constraints.
   5861   SDISelAsmOperandInfoVector ConstraintOperands;
   5862 
   5863   TargetLowering::AsmOperandInfoVector
   5864     TargetConstraints = TLI.ParseConstraints(CS);
   5865 
   5866   bool hasMemory = false;
   5867 
   5868   unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
   5869   unsigned ResNo = 0;   // ResNo - The result number of the next output.
   5870   for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
   5871     ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i]));
   5872     SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
   5873 
   5874     EVT OpVT = MVT::Other;
   5875 
   5876     // Compute the value type for each operand.
   5877     switch (OpInfo.Type) {
   5878     case InlineAsm::isOutput:
   5879       // Indirect outputs just consume an argument.
   5880       if (OpInfo.isIndirect) {
   5881         OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
   5882         break;
   5883       }
   5884 
   5885       // The return value of the call is this value.  As such, there is no
   5886       // corresponding argument.
   5887       assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
   5888       if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
   5889         OpVT = TLI.getValueType(STy->getElementType(ResNo));
   5890       } else {
   5891         assert(ResNo == 0 && "Asm only has one result!");
   5892         OpVT = TLI.getValueType(CS.getType());
   5893       }
   5894       ++ResNo;
   5895       break;
   5896     case InlineAsm::isInput:
   5897       OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
   5898       break;
   5899     case InlineAsm::isClobber:
   5900       // Nothing to do.
   5901       break;
   5902     }
   5903 
   5904     // If this is an input or an indirect output, process the call argument.
   5905     // BasicBlocks are labels, currently appearing only in asm's.
   5906     if (OpInfo.CallOperandVal) {
   5907       if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
   5908         OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
   5909       } else {
   5910         OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
   5911       }
   5912 
   5913       OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD);
   5914     }
   5915 
   5916     OpInfo.ConstraintVT = OpVT;
   5917 
   5918     // Indirect operand accesses access memory.
   5919     if (OpInfo.isIndirect)
   5920       hasMemory = true;
   5921     else {
   5922       for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) {
   5923         TargetLowering::ConstraintType
   5924           CType = TLI.getConstraintType(OpInfo.Codes[j]);
   5925         if (CType == TargetLowering::C_Memory) {
   5926           hasMemory = true;
   5927           break;
   5928         }
   5929       }
   5930     }
   5931   }
   5932 
   5933   SDValue Chain, Flag;
   5934 
   5935   // We won't need to flush pending loads if this asm doesn't touch
   5936   // memory and is nonvolatile.
   5937   if (hasMemory || IA->hasSideEffects())
   5938     Chain = getRoot();
   5939   else
   5940     Chain = DAG.getRoot();
   5941 
   5942   // Second pass over the constraints: compute which constraint option to use
   5943   // and assign registers to constraints that want a specific physreg.
   5944   for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
   5945     SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
   5946 
   5947     // If this is an output operand with a matching input operand, look up the
   5948     // matching input. If their types mismatch, e.g. one is an integer, the
   5949     // other is floating point, or their sizes are different, flag it as an
   5950     // error.
   5951     if (OpInfo.hasMatchingInput()) {
   5952       SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
   5953 
   5954       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
   5955 	std::pair<unsigned, const TargetRegisterClass*> MatchRC =
   5956 	  TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
   5957                                            OpInfo.ConstraintVT);
   5958 	std::pair<unsigned, const TargetRegisterClass*> InputRC =
   5959 	  TLI.getRegForInlineAsmConstraint(Input.ConstraintCode,
   5960                                            Input.ConstraintVT);
   5961         if ((OpInfo.ConstraintVT.isInteger() !=
   5962              Input.ConstraintVT.isInteger()) ||
   5963             (MatchRC.second != InputRC.second)) {
   5964           report_fatal_error("Unsupported asm: input constraint"
   5965                              " with a matching output constraint of"
   5966                              " incompatible type!");
   5967         }
   5968         Input.ConstraintVT = OpInfo.ConstraintVT;
   5969       }
   5970     }
   5971 
   5972     // Compute the constraint code and ConstraintType to use.
   5973     TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
   5974 
   5975     // If this is a memory input, and if the operand is not indirect, do what we
   5976     // need to to provide an address for the memory input.
   5977     if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
   5978         !OpInfo.isIndirect) {
   5979       assert((OpInfo.isMultipleAlternative ||
   5980               (OpInfo.Type == InlineAsm::isInput)) &&
   5981              "Can only indirectify direct input operands!");
   5982 
   5983       // Memory operands really want the address of the value.  If we don't have
   5984       // an indirect input, put it in the constpool if we can, otherwise spill
   5985       // it to a stack slot.
   5986       // TODO: This isn't quite right. We need to handle these according to
   5987       // the addressing mode that the constraint wants. Also, this may take
   5988       // an additional register for the computation and we don't want that
   5989       // either.
   5990 
   5991       // If the operand is a float, integer, or vector constant, spill to a
   5992       // constant pool entry to get its address.
   5993       const Value *OpVal = OpInfo.CallOperandVal;
   5994       if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
   5995           isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
   5996         OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
   5997                                                  TLI.getPointerTy());
   5998       } else {
   5999         // Otherwise, create a stack slot and emit a store to it before the
   6000         // asm.
   6001         Type *Ty = OpVal->getType();
   6002         uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
   6003         unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(Ty);
   6004         MachineFunction &MF = DAG.getMachineFunction();
   6005         int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
   6006         SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
   6007         Chain = DAG.getStore(Chain, getCurDebugLoc(),
   6008                              OpInfo.CallOperand, StackSlot,
   6009                              MachinePointerInfo::getFixedStack(SSFI),
   6010                              false, false, 0);
   6011         OpInfo.CallOperand = StackSlot;
   6012       }
   6013 
   6014       // There is no longer a Value* corresponding to this operand.
   6015       OpInfo.CallOperandVal = 0;
   6016 
   6017       // It is now an indirect operand.
   6018       OpInfo.isIndirect = true;
   6019     }
   6020 
   6021     // If this constraint is for a specific register, allocate it before
   6022     // anything else.
   6023     if (OpInfo.ConstraintType == TargetLowering::C_Register)
   6024       GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo);
   6025   }
   6026 
   6027   // Second pass - Loop over all of the operands, assigning virtual or physregs
   6028   // to register class operands.
   6029   for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
   6030     SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
   6031 
   6032     // C_Register operands have already been allocated, Other/Memory don't need
   6033     // to be.
   6034     if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
   6035       GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo);
   6036   }
   6037 
   6038   // AsmNodeOperands - The operands for the ISD::INLINEASM node.
   6039   std::vector<SDValue> AsmNodeOperands;
   6040   AsmNodeOperands.push_back(SDValue());  // reserve space for input chain
   6041   AsmNodeOperands.push_back(
   6042           DAG.getTargetExternalSymbol(IA->getAsmString().c_str(),
   6043                                       TLI.getPointerTy()));
   6044 
   6045   // If we have a !srcloc metadata node associated with it, we want to attach
   6046   // this to the ultimately generated inline asm machineinstr.  To do this, we
   6047   // pass in the third operand as this (potentially null) inline asm MDNode.
   6048   const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
   6049   AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
   6050 
   6051   // Remember the HasSideEffect and AlignStack bits as operand 3.
   6052   unsigned ExtraInfo = 0;
   6053   if (IA->hasSideEffects())
   6054     ExtraInfo |= InlineAsm::Extra_HasSideEffects;
   6055   if (IA->isAlignStack())
   6056     ExtraInfo |= InlineAsm::Extra_IsAlignStack;
   6057   AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo,
   6058                                                   TLI.getPointerTy()));
   6059 
   6060   // Loop over all of the inputs, copying the operand values into the
   6061   // appropriate registers and processing the output regs.
   6062   RegsForValue RetValRegs;
   6063 
   6064   // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
   6065   std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;
   6066 
   6067   for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
   6068     SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
   6069 
   6070     switch (OpInfo.Type) {
   6071     case InlineAsm::isOutput: {
   6072       if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
   6073           OpInfo.ConstraintType != TargetLowering::C_Register) {
   6074         // Memory output, or 'other' output (e.g. 'X' constraint).
   6075         assert(OpInfo.isIndirect && "Memory output must be indirect operand");
   6076 
   6077         // Add information to the INLINEASM node to know about this output.
   6078         unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
   6079         AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags,
   6080                                                         TLI.getPointerTy()));
   6081         AsmNodeOperands.push_back(OpInfo.CallOperand);
   6082         break;
   6083       }
   6084 
   6085       // Otherwise, this is a register or register class output.
   6086 
   6087       // Copy the output from the appropriate register.  Find a register that
   6088       // we can use.
   6089       if (OpInfo.AssignedRegs.Regs.empty()) {
   6090         LLVMContext &Ctx = *DAG.getContext();
   6091         Ctx.emitError(CS.getInstruction(),
   6092                       "couldn't allocate output register for constraint '" +
   6093                            Twine(OpInfo.ConstraintCode) + "'");
   6094         break;
   6095       }
   6096 
   6097       // If this is an indirect operand, store through the pointer after the
   6098       // asm.
   6099       if (OpInfo.isIndirect) {
   6100         IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
   6101                                                       OpInfo.CallOperandVal));
   6102       } else {
   6103         // This is the result value of the call.
   6104         assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
   6105         // Concatenate this output onto the outputs list.
   6106         RetValRegs.append(OpInfo.AssignedRegs);
   6107       }
   6108 
   6109       // Add information to the INLINEASM node to know that this register is
   6110       // set.
   6111       OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ?
   6112                                            InlineAsm::Kind_RegDefEarlyClobber :
   6113                                                InlineAsm::Kind_RegDef,
   6114                                                false,
   6115                                                0,
   6116                                                DAG,
   6117                                                AsmNodeOperands);
   6118       break;
   6119     }
   6120     case InlineAsm::isInput: {
   6121       SDValue InOperandVal = OpInfo.CallOperand;
   6122 
   6123       if (OpInfo.isMatchingInputConstraint()) {   // Matching constraint?
   6124         // If this is required to match an output register we have already set,
   6125         // just use its register.
   6126         unsigned OperandNo = OpInfo.getMatchedOperand();
   6127 
   6128         // Scan until we find the definition we already emitted of this operand.
   6129         // When we find it, create a RegsForValue operand.
   6130         unsigned CurOp = InlineAsm::Op_FirstOperand;
   6131         for (; OperandNo; --OperandNo) {
   6132           // Advance to the next operand.
   6133           unsigned OpFlag =
   6134             cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
   6135           assert((InlineAsm::isRegDefKind(OpFlag) ||
   6136                   InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
   6137                   InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?");
   6138           CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1;
   6139         }
   6140 
   6141         unsigned OpFlag =
   6142           cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
   6143         if (InlineAsm::isRegDefKind(OpFlag) ||
   6144             InlineAsm::isRegDefEarlyClobberKind(OpFlag)) {
   6145           // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
   6146           if (OpInfo.isIndirect) {
   6147             // This happens on gcc/testsuite/gcc.dg/pr8788-1.c
   6148             LLVMContext &Ctx = *DAG.getContext();
   6149             Ctx.emitError(CS.getInstruction(),  "inline asm not supported yet:"
   6150                           " don't know how to handle tied "
   6151                           "indirect register inputs");
   6152           }
   6153 
   6154           RegsForValue MatchedRegs;
   6155           MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
   6156           EVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
   6157           MatchedRegs.RegVTs.push_back(RegVT);
   6158           MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
   6159           for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
   6160                i != e; ++i)
   6161             MatchedRegs.Regs.push_back
   6162               (RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)));
   6163 
   6164           // Use the produced MatchedRegs object to
   6165           MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
   6166                                     Chain, &Flag);
   6167           MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
   6168                                            true, OpInfo.getMatchedOperand(),
   6169                                            DAG, AsmNodeOperands);
   6170           break;
   6171         }
   6172 
   6173         assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
   6174         assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
   6175                "Unexpected number of operands");
   6176         // Add information to the INLINEASM node to know about this input.
   6177         // See InlineAsm.h isUseOperandTiedToDef.
   6178         OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
   6179                                                     OpInfo.getMatchedOperand());
   6180         AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
   6181                                                         TLI.getPointerTy()));
   6182         AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
   6183         break;
   6184       }
   6185 
   6186       // Treat indirect 'X' constraint as memory.
   6187       if (OpInfo.ConstraintType == TargetLowering::C_Other &&
   6188           OpInfo.isIndirect)
   6189         OpInfo.ConstraintType = TargetLowering::C_Memory;
   6190 
   6191       if (OpInfo.ConstraintType == TargetLowering::C_Other) {
   6192         std::vector<SDValue> Ops;
   6193         TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
   6194                                          Ops, DAG);
   6195         if (Ops.empty()) {
   6196           LLVMContext &Ctx = *DAG.getContext();
   6197           Ctx.emitError(CS.getInstruction(),
   6198                         "invalid operand for inline asm constraint '" +
   6199                         Twine(OpInfo.ConstraintCode) + "'");
   6200           break;
   6201         }
   6202 
   6203         // Add information to the INLINEASM node to know about this input.
   6204         unsigned ResOpType =
   6205           InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
   6206         AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
   6207                                                         TLI.getPointerTy()));
   6208         AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
   6209         break;
   6210       }
   6211 
   6212       if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
   6213         assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
   6214         assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
   6215                "Memory operands expect pointer values");
   6216 
   6217         // Add information to the INLINEASM node to know about this input.
   6218         unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
   6219         AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
   6220                                                         TLI.getPointerTy()));
   6221         AsmNodeOperands.push_back(InOperandVal);
   6222         break;
   6223       }
   6224 
   6225       assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
   6226               OpInfo.ConstraintType == TargetLowering::C_Register) &&
   6227              "Unknown constraint type!");
   6228       assert(!OpInfo.isIndirect &&
   6229              "Don't know how to handle indirect register inputs yet!");
   6230 
   6231       // Copy the input into the appropriate registers.
   6232       if (OpInfo.AssignedRegs.Regs.empty()) {
   6233         LLVMContext &Ctx = *DAG.getContext();
   6234         Ctx.emitError(CS.getInstruction(),
   6235                       "couldn't allocate input reg for constraint '" +
   6236                            Twine(OpInfo.ConstraintCode) + "'");
   6237         break;
   6238       }
   6239 
   6240       OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
   6241                                         Chain, &Flag);
   6242 
   6243       OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
   6244                                                DAG, AsmNodeOperands);
   6245       break;
   6246     }
   6247     case InlineAsm::isClobber: {
   6248       // Add the clobbered value to the operand list, so that the register
   6249       // allocator is aware that the physreg got clobbered.
   6250       if (!OpInfo.AssignedRegs.Regs.empty())
   6251         OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
   6252                                                  false, 0, DAG,
   6253                                                  AsmNodeOperands);
   6254       break;
   6255     }
   6256     }
   6257   }
   6258 
   6259   // Finish up input operands.  Set the input chain and add the flag last.
   6260   AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
   6261   if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
   6262 
   6263   Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
   6264                       DAG.getVTList(MVT::Other, MVT::Glue),
   6265                       &AsmNodeOperands[0], AsmNodeOperands.size());
   6266   Flag = Chain.getValue(1);
   6267 
   6268   // If this asm returns a register value, copy the result from that register
   6269   // and set it as the value of the call.
   6270   if (!RetValRegs.Regs.empty()) {
   6271     SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
   6272                                              Chain, &Flag);
   6273 
   6274     // FIXME: Why don't we do this for inline asms with MRVs?
   6275     if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
   6276       EVT ResultType = TLI.getValueType(CS.getType());
   6277 
   6278       // If any of the results of the inline asm is a vector, it may have the
   6279       // wrong width/num elts.  This can happen for register classes that can
   6280       // contain multiple different value types.  The preg or vreg allocated may
   6281       // not have the same VT as was expected.  Convert it to the right type
   6282       // with bit_convert.
   6283       if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
   6284         Val = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
   6285                           ResultType, Val);
   6286 
   6287       } else if (ResultType != Val.getValueType() &&
   6288                  ResultType.isInteger() && Val.getValueType().isInteger()) {
   6289         // If a result value was tied to an input value, the computed result may
   6290         // have a wider width than the expected result.  Extract the relevant
   6291         // portion.
   6292         Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val);
   6293       }
   6294 
   6295       assert(ResultType == Val.getValueType() && "Asm result value mismatch!");
   6296     }
   6297 
   6298     setValue(CS.getInstruction(), Val);
   6299     // Don't need to use this as a chain in this case.
   6300     if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())
   6301       return;
   6302   }
   6303 
   6304   std::vector<std::pair<SDValue, const Value *> > StoresToEmit;
   6305 
   6306   // Process indirect outputs, first output all of the flagged copies out of
   6307   // physregs.
   6308   for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
   6309     RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
   6310     const Value *Ptr = IndirectStoresToEmit[i].second;
   6311     SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
   6312                                              Chain, &Flag);
   6313     StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
   6314   }
   6315 
   6316   // Emit the non-flagged stores from the physregs.
   6317   SmallVector<SDValue, 8> OutChains;
   6318   for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) {
   6319     SDValue Val = DAG.getStore(Chain, getCurDebugLoc(),
   6320                                StoresToEmit[i].first,
   6321                                getValue(StoresToEmit[i].second),
   6322                                MachinePointerInfo(StoresToEmit[i].second),
   6323                                false, false, 0);
   6324     OutChains.push_back(Val);
   6325   }
   6326 
   6327   if (!OutChains.empty())
   6328     Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
   6329                         &OutChains[0], OutChains.size());
   6330 
   6331   DAG.setRoot(Chain);
   6332 }
   6333 
   6334 void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
   6335   DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(),
   6336                           MVT::Other, getRoot(),
   6337                           getValue(I.getArgOperand(0)),
   6338                           DAG.getSrcValue(I.getArgOperand(0))));
   6339 }
   6340 
   6341 void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
   6342   const TargetData &TD = *TLI.getTargetData();
   6343   SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
   6344                            getRoot(), getValue(I.getOperand(0)),
   6345                            DAG.getSrcValue(I.getOperand(0)),
   6346                            TD.getABITypeAlignment(I.getType()));
   6347   setValue(&I, V);
   6348   DAG.setRoot(V.getValue(1));
   6349 }
   6350 
   6351 void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
   6352   DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(),
   6353                           MVT::Other, getRoot(),
   6354                           getValue(I.getArgOperand(0)),
   6355                           DAG.getSrcValue(I.getArgOperand(0))));
   6356 }
   6357 
   6358 void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
   6359   DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(),
   6360                           MVT::Other, getRoot(),
   6361                           getValue(I.getArgOperand(0)),
   6362                           getValue(I.getArgOperand(1)),
   6363                           DAG.getSrcValue(I.getArgOperand(0)),
   6364                           DAG.getSrcValue(I.getArgOperand(1))));
   6365 }
   6366 
   6367 /// TargetLowering::LowerCallTo - This is the default LowerCallTo
   6368 /// implementation, which just calls LowerCall.
   6369 /// FIXME: When all targets are
   6370 /// migrated to using LowerCall, this hook should be integrated into SDISel.
   6371 std::pair<SDValue, SDValue>
   6372 TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
   6373                             bool RetSExt, bool RetZExt, bool isVarArg,
   6374                             bool isInreg, unsigned NumFixedArgs,
   6375                             CallingConv::ID CallConv, bool isTailCall,
   6376                             bool doesNotRet, bool isReturnValueUsed,
   6377                             SDValue Callee,
   6378                             ArgListTy &Args, SelectionDAG &DAG,
   6379                             DebugLoc dl) const {
   6380   // Handle all of the outgoing arguments.
   6381   SmallVector<ISD::OutputArg, 32> Outs;
   6382   SmallVector<SDValue, 32> OutVals;
   6383   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
   6384     SmallVector<EVT, 4> ValueVTs;
   6385     ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
   6386     for (unsigned Value = 0, NumValues = ValueVTs.size();
   6387          Value != NumValues; ++Value) {
   6388       EVT VT = ValueVTs[Value];
   6389       Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
   6390       SDValue Op = SDValue(Args[i].Node.getNode(),
   6391                            Args[i].Node.getResNo() + Value);
   6392       ISD::ArgFlagsTy Flags;
   6393       unsigned OriginalAlignment =
   6394         getTargetData()->getABITypeAlignment(ArgTy);
   6395 
   6396       if (Args[i].isZExt)
   6397         Flags.setZExt();
   6398       if (Args[i].isSExt)
   6399         Flags.setSExt();
   6400       if (Args[i].isInReg)
   6401         Flags.setInReg();
   6402       if (Args[i].isSRet)
   6403         Flags.setSRet();
   6404       if (Args[i].isByVal) {
   6405         Flags.setByVal();
   6406         PointerType *Ty = cast<PointerType>(Args[i].Ty);
   6407         Type *ElementTy = Ty->getElementType();
   6408         Flags.setByValSize(getTargetData()->getTypeAllocSize(ElementTy));
   6409         // For ByVal, alignment should come from FE.  BE will guess if this
   6410         // info is not there but there are cases it cannot get right.
   6411         unsigned FrameAlign;
   6412         if (Args[i].Alignment)
   6413           FrameAlign = Args[i].Alignment;
   6414         else
   6415           FrameAlign = getByValTypeAlignment(ElementTy);
   6416         Flags.setByValAlign(FrameAlign);
   6417       }
   6418       if (Args[i].isNest)
   6419         Flags.setNest();
   6420       Flags.setOrigAlign(OriginalAlignment);
   6421 
   6422       EVT PartVT = getRegisterType(RetTy->getContext(), VT);
   6423       unsigned NumParts = getNumRegisters(RetTy->getContext(), VT);
   6424       SmallVector<SDValue, 4> Parts(NumParts);
   6425       ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
   6426 
   6427       if (Args[i].isSExt)
   6428         ExtendKind = ISD::SIGN_EXTEND;
   6429       else if (Args[i].isZExt)
   6430         ExtendKind = ISD::ZERO_EXTEND;
   6431 
   6432       getCopyToParts(DAG, dl, Op, &Parts[0], NumParts,
   6433                      PartVT, ExtendKind);
   6434 
   6435       for (unsigned j = 0; j != NumParts; ++j) {
   6436         // if it isn't first piece, alignment must be 1
   6437         ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(),
   6438                                i < NumFixedArgs);
   6439         if (NumParts > 1 && j == 0)
   6440           MyFlags.Flags.setSplit();
   6441         else if (j != 0)
   6442           MyFlags.Flags.setOrigAlign(1);
   6443 
   6444         Outs.push_back(MyFlags);
   6445         OutVals.push_back(Parts[j]);
   6446       }
   6447     }
   6448   }
   6449 
   6450   // Handle the incoming return values from the call.
   6451   SmallVector<ISD::InputArg, 32> Ins;
   6452   SmallVector<EVT, 4> RetTys;
   6453   ComputeValueVTs(*this, RetTy, RetTys);
   6454   for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
   6455     EVT VT = RetTys[I];
   6456     EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
   6457     unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
   6458     for (unsigned i = 0; i != NumRegs; ++i) {
   6459       ISD::InputArg MyFlags;
   6460       MyFlags.VT = RegisterVT.getSimpleVT();
   6461       MyFlags.Used = isReturnValueUsed;
   6462       if (RetSExt)
   6463         MyFlags.Flags.setSExt();
   6464       if (RetZExt)
   6465         MyFlags.Flags.setZExt();
   6466       if (isInreg)
   6467         MyFlags.Flags.setInReg();
   6468       Ins.push_back(MyFlags);
   6469     }
   6470   }
   6471 
   6472   SmallVector<SDValue, 4> InVals;
   6473   Chain = LowerCall(Chain, Callee, CallConv, isVarArg, doesNotRet, isTailCall,
   6474                     Outs, OutVals, Ins, dl, DAG, InVals);
   6475 
   6476   // Verify that the target's LowerCall behaved as expected.
   6477   assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
   6478          "LowerCall didn't return a valid chain!");
   6479   assert((!isTailCall || InVals.empty()) &&
   6480          "LowerCall emitted a return value for a tail call!");
   6481   assert((isTailCall || InVals.size() == Ins.size()) &&
   6482          "LowerCall didn't emit the correct number of values!");
   6483 
   6484   // For a tail call, the return value is merely live-out and there aren't
   6485   // any nodes in the DAG representing it. Return a special value to
   6486   // indicate that a tail call has been emitted and no more Instructions
   6487   // should be processed in the current block.
   6488   if (isTailCall) {
   6489     DAG.setRoot(Chain);
   6490     return std::make_pair(SDValue(), SDValue());
   6491   }
   6492 
   6493   DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
   6494           assert(InVals[i].getNode() &&
   6495                  "LowerCall emitted a null value!");
   6496           assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
   6497                  "LowerCall emitted a value with the wrong type!");
   6498         });
   6499 
   6500   // Collect the legal value parts into potentially illegal values
   6501   // that correspond to the original function's return values.
   6502   ISD::NodeType AssertOp = ISD::DELETED_NODE;
   6503   if (RetSExt)
   6504     AssertOp = ISD::AssertSext;
   6505   else if (RetZExt)
   6506     AssertOp = ISD::AssertZext;
   6507   SmallVector<SDValue, 4> ReturnValues;
   6508   unsigned CurReg = 0;
   6509   for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
   6510     EVT VT = RetTys[I];
   6511     EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
   6512     unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
   6513 
   6514     ReturnValues.push_back(getCopyFromParts(DAG, dl, &InVals[CurReg],
   6515                                             NumRegs, RegisterVT, VT,
   6516                                             AssertOp));
   6517     CurReg += NumRegs;
   6518   }
   6519 
   6520   // For a function returning void, there is no return value. We can't create
   6521   // such a node, so we just return a null return value in that case. In
   6522   // that case, nothing will actually look at the value.
   6523   if (ReturnValues.empty())
   6524     return std::make_pair(SDValue(), Chain);
   6525 
   6526   SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
   6527                             DAG.getVTList(&RetTys[0], RetTys.size()),
   6528                             &ReturnValues[0], ReturnValues.size());
   6529   return std::make_pair(Res, Chain);
   6530 }
   6531 
   6532 void TargetLowering::LowerOperationWrapper(SDNode *N,
   6533                                            SmallVectorImpl<SDValue> &Results,
   6534                                            SelectionDAG &DAG) const {
   6535   SDValue Res = LowerOperation(SDValue(N, 0), DAG);
   6536   if (Res.getNode())
   6537     Results.push_back(Res);
   6538 }
   6539 
   6540 SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   6541   llvm_unreachable("LowerOperation not implemented for this target!");
   6542 }
   6543 
   6544 void
   6545 SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
   6546   SDValue Op = getNonRegisterValue(V);
   6547   assert((Op.getOpcode() != ISD::CopyFromReg ||
   6548           cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
   6549          "Copy from a reg to the same reg!");
   6550   assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
   6551 
   6552   RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
   6553   SDValue Chain = DAG.getEntryNode();
   6554   RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0);
   6555   PendingExports.push_back(Chain);
   6556 }
   6557 
   6558 #include "llvm/CodeGen/SelectionDAGISel.h"
   6559 
   6560 /// isOnlyUsedInEntryBlock - If the specified argument is only used in the
   6561 /// entry block, return true.  This includes arguments used by switches, since
   6562 /// the switch may expand into multiple basic blocks.
   6563 static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
   6564   // With FastISel active, we may be splitting blocks, so force creation
   6565   // of virtual registers for all non-dead arguments.
   6566   if (FastISel)
   6567     return A->use_empty();
   6568 
   6569   const BasicBlock *Entry = A->getParent()->begin();
   6570   for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end();
   6571        UI != E; ++UI) {
   6572     const User *U = *UI;
   6573     if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U))
   6574       return false;  // Use not in entry block.
   6575   }
   6576   return true;
   6577 }
   6578 
   6579 void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
   6580   // If this is the entry block, emit arguments.
   6581   const Function &F = *LLVMBB->getParent();
   6582   SelectionDAG &DAG = SDB->DAG;
   6583   DebugLoc dl = SDB->getCurDebugLoc();
   6584   const TargetData *TD = TLI.getTargetData();
   6585   SmallVector<ISD::InputArg, 16> Ins;
   6586 
   6587   // Check whether the function can return without sret-demotion.
   6588   SmallVector<ISD::OutputArg, 4> Outs;
   6589   GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
   6590                 Outs, TLI);
   6591 
   6592   if (!FuncInfo->CanLowerReturn) {
   6593     // Put in an sret pointer parameter before all the other parameters.
   6594     SmallVector<EVT, 1> ValueVTs;
   6595     ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
   6596 
   6597     // NOTE: Assuming that a pointer will never break down to more than one VT
   6598     // or one register.
   6599     ISD::ArgFlagsTy Flags;
   6600     Flags.setSRet();
   6601     EVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]);
   6602     ISD::InputArg RetArg(Flags, RegisterVT, true);
   6603     Ins.push_back(RetArg);
   6604   }
   6605 
   6606   // Set up the incoming argument description vector.
   6607   unsigned Idx = 1;
   6608   for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
   6609        I != E; ++I, ++Idx) {
   6610     SmallVector<EVT, 4> ValueVTs;
   6611     ComputeValueVTs(TLI, I->getType(), ValueVTs);
   6612     bool isArgValueUsed = !I->use_empty();
   6613     for (unsigned Value = 0, NumValues = ValueVTs.size();
   6614          Value != NumValues; ++Value) {
   6615       EVT VT = ValueVTs[Value];
   6616       Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
   6617       ISD::ArgFlagsTy Flags;
   6618       unsigned OriginalAlignment =
   6619         TD->getABITypeAlignment(ArgTy);
   6620 
   6621       if (F.paramHasAttr(Idx, Attribute::ZExt))
   6622         Flags.setZExt();
   6623       if (F.paramHasAttr(Idx, Attribute::SExt))
   6624         Flags.setSExt();
   6625       if (F.paramHasAttr(Idx, Attribute::InReg))
   6626         Flags.setInReg();
   6627       if (F.paramHasAttr(Idx, Attribute::StructRet))
   6628         Flags.setSRet();
   6629       if (F.paramHasAttr(Idx, Attribute::ByVal)) {
   6630         Flags.setByVal();
   6631         PointerType *Ty = cast<PointerType>(I->getType());
   6632         Type *ElementTy = Ty->getElementType();
   6633         Flags.setByValSize(TD->getTypeAllocSize(ElementTy));
   6634         // For ByVal, alignment should be passed from FE.  BE will guess if
   6635         // this info is not there but there are cases it cannot get right.
   6636         unsigned FrameAlign;
   6637         if (F.getParamAlignment(Idx))
   6638           FrameAlign = F.getParamAlignment(Idx);
   6639         else
   6640           FrameAlign = TLI.getByValTypeAlignment(ElementTy);
   6641         Flags.setByValAlign(FrameAlign);
   6642       }
   6643       if (F.paramHasAttr(Idx, Attribute::Nest))
   6644         Flags.setNest();
   6645       Flags.setOrigAlign(OriginalAlignment);
   6646 
   6647       EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
   6648       unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT);
   6649       for (unsigned i = 0; i != NumRegs; ++i) {
   6650         ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed);
   6651         if (NumRegs > 1 && i == 0)
   6652           MyFlags.Flags.setSplit();
   6653         // if it isn't first piece, alignment must be 1
   6654         else if (i > 0)
   6655           MyFlags.Flags.setOrigAlign(1);
   6656         Ins.push_back(MyFlags);
   6657       }
   6658     }
   6659   }
   6660 
   6661   // Call the target to set up the argument values.
   6662   SmallVector<SDValue, 8> InVals;
   6663   SDValue NewRoot = TLI.LowerFormalArguments(DAG.getRoot(), F.getCallingConv(),
   6664                                              F.isVarArg(), Ins,
   6665                                              dl, DAG, InVals);
   6666 
   6667   // Verify that the target's LowerFormalArguments behaved as expected.
   6668   assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
   6669          "LowerFormalArguments didn't return a valid chain!");
   6670   assert(InVals.size() == Ins.size() &&
   6671          "LowerFormalArguments didn't emit the correct number of values!");
   6672   DEBUG({
   6673       for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
   6674         assert(InVals[i].getNode() &&
   6675                "LowerFormalArguments emitted a null value!");
   6676         assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
   6677                "LowerFormalArguments emitted a value with the wrong type!");
   6678       }
   6679     });
   6680 
   6681   // Update the DAG with the new chain value resulting from argument lowering.
   6682   DAG.setRoot(NewRoot);
   6683 
   6684   // Set up the argument values.
   6685   unsigned i = 0;
   6686   Idx = 1;
   6687   if (!FuncInfo->CanLowerReturn) {
   6688     // Create a virtual register for the sret pointer, and put in a copy
   6689     // from the sret argument into it.
   6690     SmallVector<EVT, 1> ValueVTs;
   6691     ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
   6692     EVT VT = ValueVTs[0];
   6693     EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
   6694     ISD::NodeType AssertOp = ISD::DELETED_NODE;
   6695     SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
   6696                                         RegVT, VT, AssertOp);
   6697 
   6698     MachineFunction& MF = SDB->DAG.getMachineFunction();
   6699     MachineRegisterInfo& RegInfo = MF.getRegInfo();
   6700     unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT));
   6701     FuncInfo->DemoteRegister = SRetReg;
   6702     NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(),
   6703                                     SRetReg, ArgValue);
   6704     DAG.setRoot(NewRoot);
   6705 
   6706     // i indexes lowered arguments.  Bump it past the hidden sret argument.
   6707     // Idx indexes LLVM arguments.  Don't touch it.
   6708     ++i;
   6709   }
   6710 
   6711   for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
   6712       ++I, ++Idx) {
   6713     SmallVector<SDValue, 4> ArgValues;
   6714     SmallVector<EVT, 4> ValueVTs;
   6715     ComputeValueVTs(TLI, I->getType(), ValueVTs);
   6716     unsigned NumValues = ValueVTs.size();
   6717 
   6718     // If this argument is unused then remember its value. It is used to generate
   6719     // debugging information.
   6720     if (I->use_empty() && NumValues)
   6721       SDB->setUnusedArgValue(I, InVals[i]);
   6722 
   6723     for (unsigned Val = 0; Val != NumValues; ++Val) {
   6724       EVT VT = ValueVTs[Val];
   6725       EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
   6726       unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT);
   6727 
   6728       if (!I->use_empty()) {
   6729         ISD::NodeType AssertOp = ISD::DELETED_NODE;
   6730         if (F.paramHasAttr(Idx, Attribute::SExt))
   6731           AssertOp = ISD::AssertSext;
   6732         else if (F.paramHasAttr(Idx, Attribute::ZExt))
   6733           AssertOp = ISD::AssertZext;
   6734 
   6735         ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
   6736                                              NumParts, PartVT, VT,
   6737                                              AssertOp));
   6738       }
   6739 
   6740       i += NumParts;
   6741     }
   6742 
   6743     // We don't need to do anything else for unused arguments.
   6744     if (ArgValues.empty())
   6745       continue;
   6746 
   6747     // Note down frame index.
   6748     if (FrameIndexSDNode *FI =
   6749 	dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
   6750       FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
   6751 
   6752     SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues,
   6753                                      SDB->getCurDebugLoc());
   6754 
   6755     SDB->setValue(I, Res);
   6756     if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
   6757       if (LoadSDNode *LNode =
   6758           dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
   6759         if (FrameIndexSDNode *FI =
   6760             dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
   6761         FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
   6762     }
   6763 
   6764     // If this argument is live outside of the entry block, insert a copy from
   6765     // wherever we got it to the vreg that other BB's will reference it as.
   6766     if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
   6767       // If we can, though, try to skip creating an unnecessary vreg.
   6768       // FIXME: This isn't very clean... it would be nice to make this more
   6769       // general.  It's also subtly incompatible with the hacks FastISel
   6770       // uses with vregs.
   6771       unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
   6772       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
   6773         FuncInfo->ValueMap[I] = Reg;
   6774         continue;
   6775       }
   6776     }
   6777     if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) {
   6778       FuncInfo->InitializeRegForValue(I);
   6779       SDB->CopyToExportRegsIfNeeded(I);
   6780     }
   6781   }
   6782 
   6783   assert(i == InVals.size() && "Argument register count mismatch!");
   6784 
   6785   // Finally, if the target has anything special to do, allow it to do so.
   6786   // FIXME: this should insert code into the DAG!
   6787   EmitFunctionEntryCode();
   6788 }
   6789 
   6790 /// Handle PHI nodes in successor blocks.  Emit code into the SelectionDAG to
   6791 /// ensure constants are generated when needed.  Remember the virtual registers
   6792 /// that need to be added to the Machine PHI nodes as input.  We cannot just
   6793 /// directly add them, because expansion might result in multiple MBB's for one
   6794 /// BB.  As such, the start of the BB might correspond to a different MBB than
   6795 /// the end.
   6796 ///
   6797 void
   6798 SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
   6799   const TerminatorInst *TI = LLVMBB->getTerminator();
   6800 
   6801   SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
   6802 
   6803   // Check successor nodes' PHI nodes that expect a constant to be available
   6804   // from this block.
   6805   for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
   6806     const BasicBlock *SuccBB = TI->getSuccessor(succ);
   6807     if (!isa<PHINode>(SuccBB->begin())) continue;
   6808     MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
   6809 
   6810     // If this terminator has multiple identical successors (common for
   6811     // switches), only handle each succ once.
   6812     if (!SuccsHandled.insert(SuccMBB)) continue;
   6813 
   6814     MachineBasicBlock::iterator MBBI = SuccMBB->begin();
   6815 
   6816     // At this point we know that there is a 1-1 correspondence between LLVM PHI
   6817     // nodes and Machine PHI nodes, but the incoming operands have not been
   6818     // emitted yet.
   6819     for (BasicBlock::const_iterator I = SuccBB->begin();
   6820          const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
   6821       // Ignore dead phi's.
   6822       if (PN->use_empty()) continue;
   6823 
   6824       // Skip empty types
   6825       if (PN->getType()->isEmptyTy())
   6826         continue;
   6827 
   6828       unsigned Reg;
   6829       const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
   6830 
   6831       if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
   6832         unsigned &RegOut = ConstantsOut[C];
   6833         if (RegOut == 0) {
   6834           RegOut = FuncInfo.CreateRegs(C->getType());
   6835           CopyValueToVirtualRegister(C, RegOut);
   6836         }
   6837         Reg = RegOut;
   6838       } else {
   6839         DenseMap<const Value *, unsigned>::iterator I =
   6840           FuncInfo.ValueMap.find(PHIOp);
   6841         if (I != FuncInfo.ValueMap.end())
   6842           Reg = I->second;
   6843         else {
   6844           assert(isa<AllocaInst>(PHIOp) &&
   6845                  FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
   6846                  "Didn't codegen value into a register!??");
   6847           Reg = FuncInfo.CreateRegs(PHIOp->getType());
   6848           CopyValueToVirtualRegister(PHIOp, Reg);
   6849         }
   6850       }
   6851 
   6852       // Remember that this register needs to added to the machine PHI node as
   6853       // the input for this MBB.
   6854       SmallVector<EVT, 4> ValueVTs;
   6855       ComputeValueVTs(TLI, PN->getType(), ValueVTs);
   6856       for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
   6857         EVT VT = ValueVTs[vti];
   6858         unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
   6859         for (unsigned i = 0, e = NumRegisters; i != e; ++i)
   6860           FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
   6861         Reg += NumRegisters;
   6862       }
   6863     }
   6864   }
   6865   ConstantsOut.clear();
   6866 }
   6867