Home | History | Annotate | Download | only in Hexagon
      1 //===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements the interfaces that Hexagon uses to lower LLVM code
     11 // into a selection DAG.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "HexagonISelLowering.h"
     16 #include "Hexagon.h"
     17 #include "HexagonMachineFunctionInfo.h"
     18 #include "HexagonRegisterInfo.h"
     19 #include "HexagonSubtarget.h"
     20 #include "HexagonTargetMachine.h"
     21 #include "HexagonTargetObjectFile.h"
     22 #include "llvm/ADT/APInt.h"
     23 #include "llvm/ADT/ArrayRef.h"
     24 #include "llvm/ADT/SmallVector.h"
     25 #include "llvm/CodeGen/CallingConvLower.h"
     26 #include "llvm/CodeGen/MachineFrameInfo.h"
     27 #include "llvm/CodeGen/MachineFunction.h"
     28 #include "llvm/CodeGen/MachineMemOperand.h"
     29 #include "llvm/CodeGen/MachineRegisterInfo.h"
     30 #include "llvm/CodeGen/RuntimeLibcalls.h"
     31 #include "llvm/CodeGen/SelectionDAG.h"
     32 #include "llvm/CodeGen/TargetCallingConv.h"
     33 #include "llvm/CodeGen/ValueTypes.h"
     34 #include "llvm/IR/BasicBlock.h"
     35 #include "llvm/IR/CallingConv.h"
     36 #include "llvm/IR/DataLayout.h"
     37 #include "llvm/IR/DerivedTypes.h"
     38 #include "llvm/IR/Function.h"
     39 #include "llvm/IR/GlobalValue.h"
     40 #include "llvm/IR/InlineAsm.h"
     41 #include "llvm/IR/Instructions.h"
     42 #include "llvm/IR/Intrinsics.h"
     43 #include "llvm/IR/IntrinsicInst.h"
     44 #include "llvm/IR/Module.h"
     45 #include "llvm/IR/Type.h"
     46 #include "llvm/IR/Value.h"
     47 #include "llvm/MC/MCRegisterInfo.h"
     48 #include "llvm/Support/Casting.h"
     49 #include "llvm/Support/CodeGen.h"
     50 #include "llvm/Support/CommandLine.h"
     51 #include "llvm/Support/Debug.h"
     52 #include "llvm/Support/ErrorHandling.h"
     53 #include "llvm/Support/MathExtras.h"
     54 #include "llvm/Support/raw_ostream.h"
     55 #include "llvm/Target/TargetMachine.h"
     56 #include <algorithm>
     57 #include <cassert>
     58 #include <cstddef>
     59 #include <cstdint>
     60 #include <limits>
     61 #include <utility>
     62 
     63 using namespace llvm;
     64 
     65 #define DEBUG_TYPE "hexagon-lowering"
     66 
     67 static cl::opt<bool> EmitJumpTables("hexagon-emit-jump-tables",
     68   cl::init(true), cl::Hidden,
     69   cl::desc("Control jump table emission on Hexagon target"));
     70 
     71 static cl::opt<bool> EnableHexSDNodeSched("enable-hexagon-sdnode-sched",
     72   cl::Hidden, cl::ZeroOrMore, cl::init(false),
     73   cl::desc("Enable Hexagon SDNode scheduling"));
     74 
     75 static cl::opt<bool> EnableFastMath("ffast-math",
     76   cl::Hidden, cl::ZeroOrMore, cl::init(false),
     77   cl::desc("Enable Fast Math processing"));
     78 
     79 static cl::opt<int> MinimumJumpTables("minimum-jump-tables",
     80   cl::Hidden, cl::ZeroOrMore, cl::init(5),
     81   cl::desc("Set minimum jump tables"));
     82 
     83 static cl::opt<int> MaxStoresPerMemcpyCL("max-store-memcpy",
     84   cl::Hidden, cl::ZeroOrMore, cl::init(6),
     85   cl::desc("Max #stores to inline memcpy"));
     86 
     87 static cl::opt<int> MaxStoresPerMemcpyOptSizeCL("max-store-memcpy-Os",
     88   cl::Hidden, cl::ZeroOrMore, cl::init(4),
     89   cl::desc("Max #stores to inline memcpy"));
     90 
     91 static cl::opt<int> MaxStoresPerMemmoveCL("max-store-memmove",
     92   cl::Hidden, cl::ZeroOrMore, cl::init(6),
     93   cl::desc("Max #stores to inline memmove"));
     94 
     95 static cl::opt<int> MaxStoresPerMemmoveOptSizeCL("max-store-memmove-Os",
     96   cl::Hidden, cl::ZeroOrMore, cl::init(4),
     97   cl::desc("Max #stores to inline memmove"));
     98 
     99 static cl::opt<int> MaxStoresPerMemsetCL("max-store-memset",
    100   cl::Hidden, cl::ZeroOrMore, cl::init(8),
    101   cl::desc("Max #stores to inline memset"));
    102 
    103 static cl::opt<int> MaxStoresPerMemsetOptSizeCL("max-store-memset-Os",
    104   cl::Hidden, cl::ZeroOrMore, cl::init(4),
    105   cl::desc("Max #stores to inline memset"));
    106 
    107 static cl::opt<bool> AlignLoads("hexagon-align-loads",
    108   cl::Hidden, cl::init(false),
    109   cl::desc("Rewrite unaligned loads as a pair of aligned loads"));
    110 
    111 
    112 namespace {
    113 
    114   class HexagonCCState : public CCState {
    115     unsigned NumNamedVarArgParams = 0;
    116 
    117   public:
    118     HexagonCCState(CallingConv::ID CC, bool IsVarArg, MachineFunction &MF,
    119                    SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
    120                    unsigned NumNamedArgs)
    121         : CCState(CC, IsVarArg, MF, locs, C),
    122           NumNamedVarArgParams(NumNamedArgs) {}
    123     unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
    124   };
    125 
    126 } // end anonymous namespace
    127 
    128 
    129 // Implement calling convention for Hexagon.
    130 
    131 static bool CC_SkipOdd(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
    132                        CCValAssign::LocInfo &LocInfo,
    133                        ISD::ArgFlagsTy &ArgFlags, CCState &State) {
    134   static const MCPhysReg ArgRegs[] = {
    135     Hexagon::R0, Hexagon::R1, Hexagon::R2,
    136     Hexagon::R3, Hexagon::R4, Hexagon::R5
    137   };
    138   const unsigned NumArgRegs = array_lengthof(ArgRegs);
    139   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
    140 
    141   // RegNum is an index into ArgRegs: skip a register if RegNum is odd.
    142   if (RegNum != NumArgRegs && RegNum % 2 == 1)
    143     State.AllocateReg(ArgRegs[RegNum]);
    144 
    145   // Always return false here, as this function only makes sure that the first
    146   // unallocated register has an even register number and does not actually
    147   // allocate a register for the current argument.
    148   return false;
    149 }
    150 
    151 #include "HexagonGenCallingConv.inc"
    152 
    153 
    154 void HexagonTargetLowering::promoteLdStType(MVT VT, MVT PromotedLdStVT) {
    155   if (VT != PromotedLdStVT) {
    156     setOperationAction(ISD::LOAD, VT, Promote);
    157     AddPromotedToType(ISD::LOAD, VT, PromotedLdStVT);
    158 
    159     setOperationAction(ISD::STORE, VT, Promote);
    160     AddPromotedToType(ISD::STORE, VT, PromotedLdStVT);
    161   }
    162 }
    163 
    164 SDValue
    165 HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
    166       const {
    167   return SDValue();
    168 }
    169 
    170 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
    171 /// by "Src" to address "Dst" of size "Size".  Alignment information is
    172 /// specified by the specific parameter attribute. The copy will be passed as
    173 /// a byval function parameter.  Sometimes what we are copying is the end of a
    174 /// larger object, the part that does not fit in registers.
    175 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
    176                                          SDValue Chain, ISD::ArgFlagsTy Flags,
    177                                          SelectionDAG &DAG, const SDLoc &dl) {
    178   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
    179   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
    180                        /*isVolatile=*/false, /*AlwaysInline=*/false,
    181                        /*isTailCall=*/false,
    182                        MachinePointerInfo(), MachinePointerInfo());
    183 }
    184 
    185 bool
    186 HexagonTargetLowering::CanLowerReturn(
    187     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
    188     const SmallVectorImpl<ISD::OutputArg> &Outs,
    189     LLVMContext &Context) const {
    190   SmallVector<CCValAssign, 16> RVLocs;
    191   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
    192 
    193   if (MF.getSubtarget<HexagonSubtarget>().useHVXOps())
    194     return CCInfo.CheckReturn(Outs, RetCC_Hexagon_HVX);
    195   return CCInfo.CheckReturn(Outs, RetCC_Hexagon);
    196 }
    197 
    198 // LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
    199 // passed by value, the function prototype is modified to return void and
    200 // the value is stored in memory pointed by a pointer passed by caller.
    201 SDValue
    202 HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
    203                                    bool IsVarArg,
    204                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
    205                                    const SmallVectorImpl<SDValue> &OutVals,
    206                                    const SDLoc &dl, SelectionDAG &DAG) const {
    207   // CCValAssign - represent the assignment of the return value to locations.
    208   SmallVector<CCValAssign, 16> RVLocs;
    209 
    210   // CCState - Info about the registers and stack slot.
    211   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
    212                  *DAG.getContext());
    213 
    214   // Analyze return values of ISD::RET
    215   if (Subtarget.useHVXOps())
    216     CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon_HVX);
    217   else
    218     CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
    219 
    220   SDValue Flag;
    221   SmallVector<SDValue, 4> RetOps(1, Chain);
    222 
    223   // Copy the result values into the output registers.
    224   for (unsigned i = 0; i != RVLocs.size(); ++i) {
    225     CCValAssign &VA = RVLocs[i];
    226 
    227     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag);
    228 
    229     // Guarantee that all emitted copies are stuck together with flags.
    230     Flag = Chain.getValue(1);
    231     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
    232   }
    233 
    234   RetOps[0] = Chain;  // Update chain.
    235 
    236   // Add the flag if we have it.
    237   if (Flag.getNode())
    238     RetOps.push_back(Flag);
    239 
    240   return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, RetOps);
    241 }
    242 
    243 bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
    244   // If either no tail call or told not to tail call at all, don't.
    245   auto Attr =
    246       CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
    247   if (!CI->isTailCall() || Attr.getValueAsString() == "true")
    248     return false;
    249 
    250   return true;
    251 }
    252 
    253 /// LowerCallResult - Lower the result values of an ISD::CALL into the
    254 /// appropriate copies out of appropriate physical registers.  This assumes that
    255 /// Chain/Glue are the input chain/glue to use, and that TheCall is the call
    256 /// being lowered. Returns a SDNode with the same number of values as the
    257 /// ISD::CALL.
    258 SDValue HexagonTargetLowering::LowerCallResult(
    259     SDValue Chain, SDValue Glue, CallingConv::ID CallConv, bool IsVarArg,
    260     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
    261     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
    262     const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const {
    263   // Assign locations to each value returned by this call.
    264   SmallVector<CCValAssign, 16> RVLocs;
    265 
    266   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
    267                  *DAG.getContext());
    268 
    269   if (Subtarget.useHVXOps())
    270     CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon_HVX);
    271   else
    272     CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);
    273 
    274   // Copy all of the result registers out of their specified physreg.
    275   for (unsigned i = 0; i != RVLocs.size(); ++i) {
    276     SDValue RetVal;
    277     if (RVLocs[i].getValVT() == MVT::i1) {
    278       // Return values of type MVT::i1 require special handling. The reason
    279       // is that MVT::i1 is associated with the PredRegs register class, but
    280       // values of that type are still returned in R0. Generate an explicit
    281       // copy into a predicate register from R0, and treat the value of the
    282       // predicate register as the call result.
    283       auto &MRI = DAG.getMachineFunction().getRegInfo();
    284       SDValue FR0 = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
    285                                        MVT::i32, Glue);
    286       // FR0 = (Value, Chain, Glue)
    287       unsigned PredR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
    288       SDValue TPR = DAG.getCopyToReg(FR0.getValue(1), dl, PredR,
    289                                      FR0.getValue(0), FR0.getValue(2));
    290       // TPR = (Chain, Glue)
    291       // Don't glue this CopyFromReg, because it copies from a virtual
    292       // register. If it is glued to the call, InstrEmitter will add it
    293       // as an implicit def to the call (EmitMachineNode).
    294       RetVal = DAG.getCopyFromReg(TPR.getValue(0), dl, PredR, MVT::i1);
    295       Glue = TPR.getValue(1);
    296       Chain = TPR.getValue(0);
    297     } else {
    298       RetVal = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
    299                                   RVLocs[i].getValVT(), Glue);
    300       Glue = RetVal.getValue(2);
    301       Chain = RetVal.getValue(1);
    302     }
    303     InVals.push_back(RetVal.getValue(0));
    304   }
    305 
    306   return Chain;
    307 }
    308 
    309 /// LowerCall - Functions arguments are copied from virtual regs to
    310 /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
    311 SDValue
    312 HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
    313                                  SmallVectorImpl<SDValue> &InVals) const {
    314   SelectionDAG &DAG                     = CLI.DAG;
    315   SDLoc &dl                             = CLI.DL;
    316   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
    317   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
    318   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
    319   SDValue Chain                         = CLI.Chain;
    320   SDValue Callee                        = CLI.Callee;
    321   CallingConv::ID CallConv              = CLI.CallConv;
    322   bool IsVarArg                         = CLI.IsVarArg;
    323   bool DoesNotReturn                    = CLI.DoesNotReturn;
    324 
    325   bool IsStructRet    = Outs.empty() ? false : Outs[0].Flags.isSRet();
    326   MachineFunction &MF = DAG.getMachineFunction();
    327   MachineFrameInfo &MFI = MF.getFrameInfo();
    328   auto PtrVT = getPointerTy(MF.getDataLayout());
    329 
    330   unsigned NumParams = CLI.CS.getInstruction()
    331                         ? CLI.CS.getFunctionType()->getNumParams()
    332                         : 0;
    333   if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee))
    334     Callee = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, MVT::i32);
    335 
    336   // Analyze operands of the call, assigning locations to each operand.
    337   SmallVector<CCValAssign, 16> ArgLocs;
    338   HexagonCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext(),
    339                         NumParams);
    340 
    341   if (Subtarget.useHVXOps())
    342     CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_HVX);
    343   else
    344     CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
    345 
    346   auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
    347   if (Attr.getValueAsString() == "true")
    348     CLI.IsTailCall = false;
    349 
    350   if (CLI.IsTailCall) {
    351     bool StructAttrFlag = MF.getFunction().hasStructRetAttr();
    352     CLI.IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
    353                         IsVarArg, IsStructRet, StructAttrFlag, Outs,
    354                         OutVals, Ins, DAG);
    355     for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
    356       CCValAssign &VA = ArgLocs[i];
    357       if (VA.isMemLoc()) {
    358         CLI.IsTailCall = false;
    359         break;
    360       }
    361     }
    362     LLVM_DEBUG(dbgs() << (CLI.IsTailCall ? "Eligible for Tail Call\n"
    363                                          : "Argument must be passed on stack. "
    364                                            "Not eligible for Tail Call\n"));
    365   }
    366   // Get a count of how many bytes are to be pushed on the stack.
    367   unsigned NumBytes = CCInfo.getNextStackOffset();
    368   SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
    369   SmallVector<SDValue, 8> MemOpChains;
    370 
    371   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
    372   SDValue StackPtr =
    373       DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), PtrVT);
    374 
    375   bool NeedsArgAlign = false;
    376   unsigned LargestAlignSeen = 0;
    377   // Walk the register/memloc assignments, inserting copies/loads.
    378   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
    379     CCValAssign &VA = ArgLocs[i];
    380     SDValue Arg = OutVals[i];
    381     ISD::ArgFlagsTy Flags = Outs[i].Flags;
    382     // Record if we need > 8 byte alignment on an argument.
    383     bool ArgAlign = Subtarget.isHVXVectorType(VA.getValVT());
    384     NeedsArgAlign |= ArgAlign;
    385 
    386     // Promote the value if needed.
    387     switch (VA.getLocInfo()) {
    388       default:
    389         // Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
    390         llvm_unreachable("Unknown loc info!");
    391       case CCValAssign::Full:
    392         break;
    393       case CCValAssign::BCvt:
    394         Arg = DAG.getBitcast(VA.getLocVT(), Arg);
    395         break;
    396       case CCValAssign::SExt:
    397         Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
    398         break;
    399       case CCValAssign::ZExt:
    400         Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
    401         break;
    402       case CCValAssign::AExt:
    403         Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
    404         break;
    405     }
    406 
    407     if (VA.isMemLoc()) {
    408       unsigned LocMemOffset = VA.getLocMemOffset();
    409       SDValue MemAddr = DAG.getConstant(LocMemOffset, dl,
    410                                         StackPtr.getValueType());
    411       MemAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, MemAddr);
    412       if (ArgAlign)
    413         LargestAlignSeen = std::max(LargestAlignSeen,
    414                                     VA.getLocVT().getStoreSizeInBits() >> 3);
    415       if (Flags.isByVal()) {
    416         // The argument is a struct passed by value. According to LLVM, "Arg"
    417         // is a pointer.
    418         MemOpChains.push_back(CreateCopyOfByValArgument(Arg, MemAddr, Chain,
    419                                                         Flags, DAG, dl));
    420       } else {
    421         MachinePointerInfo LocPI = MachinePointerInfo::getStack(
    422             DAG.getMachineFunction(), LocMemOffset);
    423         SDValue S = DAG.getStore(Chain, dl, Arg, MemAddr, LocPI);
    424         MemOpChains.push_back(S);
    425       }
    426       continue;
    427     }
    428 
    429     // Arguments that can be passed on register must be kept at RegsToPass
    430     // vector.
    431     if (VA.isRegLoc())
    432       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
    433   }
    434 
    435   if (NeedsArgAlign && Subtarget.hasV60Ops()) {
    436     LLVM_DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
    437     unsigned VecAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
    438     LargestAlignSeen = std::max(LargestAlignSeen, VecAlign);
    439     MFI.ensureMaxAlignment(LargestAlignSeen);
    440   }
    441   // Transform all store nodes into one single node because all store
    442   // nodes are independent of each other.
    443   if (!MemOpChains.empty())
    444     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
    445 
    446   SDValue Glue;
    447   if (!CLI.IsTailCall) {
    448     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
    449     Glue = Chain.getValue(1);
    450   }
    451 
    452   // Build a sequence of copy-to-reg nodes chained together with token
    453   // chain and flag operands which copy the outgoing args into registers.
    454   // The Glue is necessary since all emitted instructions must be
    455   // stuck together.
    456   if (!CLI.IsTailCall) {
    457     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
    458       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
    459                                RegsToPass[i].second, Glue);
    460       Glue = Chain.getValue(1);
    461     }
    462   } else {
    463     // For tail calls lower the arguments to the 'real' stack slot.
    464     //
    465     // Force all the incoming stack arguments to be loaded from the stack
    466     // before any new outgoing arguments are stored to the stack, because the
    467     // outgoing stack slots may alias the incoming argument stack slots, and
    468     // the alias isn't otherwise explicit. This is slightly more conservative
    469     // than necessary, because it means that each store effectively depends
    470     // on every argument instead of just those arguments it would clobber.
    471     //
    472     // Do not flag preceding copytoreg stuff together with the following stuff.
    473     Glue = SDValue();
    474     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
    475       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
    476                                RegsToPass[i].second, Glue);
    477       Glue = Chain.getValue(1);
    478     }
    479     Glue = SDValue();
    480   }
    481 
    482   bool LongCalls = MF.getSubtarget<HexagonSubtarget>().useLongCalls();
    483   unsigned Flags = LongCalls ? HexagonII::HMOTF_ConstExtended : 0;
    484 
    485   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
    486   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
    487   // node so that legalize doesn't hack it.
    488   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
    489     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT, 0, Flags);
    490   } else if (ExternalSymbolSDNode *S =
    491              dyn_cast<ExternalSymbolSDNode>(Callee)) {
    492     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, Flags);
    493   }
    494 
    495   // Returns a chain & a flag for retval copy to use.
    496   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
    497   SmallVector<SDValue, 8> Ops;
    498   Ops.push_back(Chain);
    499   Ops.push_back(Callee);
    500 
    501   // Add argument registers to the end of the list so that they are
    502   // known live into the call.
    503   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
    504     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
    505                                   RegsToPass[i].second.getValueType()));
    506   }
    507 
    508   const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallConv);
    509   assert(Mask && "Missing call preserved mask for calling convention");
    510   Ops.push_back(DAG.getRegisterMask(Mask));
    511 
    512   if (Glue.getNode())
    513     Ops.push_back(Glue);
    514 
    515   if (CLI.IsTailCall) {
    516     MFI.setHasTailCall();
    517     return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops);
    518   }
    519 
    520   // Set this here because we need to know this for "hasFP" in frame lowering.
    521   // The target-independent code calls getFrameRegister before setting it, and
    522   // getFrameRegister uses hasFP to determine whether the function has FP.
    523   MFI.setHasCalls(true);
    524 
    525   unsigned OpCode = DoesNotReturn ? HexagonISD::CALLnr : HexagonISD::CALL;
    526   Chain = DAG.getNode(OpCode, dl, NodeTys, Ops);
    527   Glue = Chain.getValue(1);
    528 
    529   // Create the CALLSEQ_END node.
    530   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
    531                              DAG.getIntPtrConstant(0, dl, true), Glue, dl);
    532   Glue = Chain.getValue(1);
    533 
    534   // Handle result values, copying them out of physregs into vregs that we
    535   // return.
    536   return LowerCallResult(Chain, Glue, CallConv, IsVarArg, Ins, dl, DAG,
    537                          InVals, OutVals, Callee);
    538 }
    539 
    540 /// Returns true by value, base pointer and offset pointer and addressing
    541 /// mode by reference if this node can be combined with a load / store to
    542 /// form a post-indexed load / store.
    543 bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
    544       SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM,
    545       SelectionDAG &DAG) const {
    546   LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(N);
    547   if (!LSN)
    548     return false;
    549   EVT VT = LSN->getMemoryVT();
    550   if (!VT.isSimple())
    551     return false;
    552   bool IsLegalType = VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
    553                      VT == MVT::i64 || VT == MVT::f32 || VT == MVT::f64 ||
    554                      VT == MVT::v2i16 || VT == MVT::v2i32 || VT == MVT::v4i8 ||
    555                      VT == MVT::v4i16 || VT == MVT::v8i8 ||
    556                      Subtarget.isHVXVectorType(VT.getSimpleVT());
    557   if (!IsLegalType)
    558     return false;
    559 
    560   if (Op->getOpcode() != ISD::ADD)
    561     return false;
    562   Base = Op->getOperand(0);
    563   Offset = Op->getOperand(1);
    564   if (!isa<ConstantSDNode>(Offset.getNode()))
    565     return false;
    566   AM = ISD::POST_INC;
    567 
    568   int32_t V = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
    569   return Subtarget.getInstrInfo()->isValidAutoIncImm(VT, V);
    570 }
    571 
    572 SDValue
    573 HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
    574   MachineFunction &MF = DAG.getMachineFunction();
    575   auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
    576   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
    577   unsigned LR = HRI.getRARegister();
    578 
    579   if (Op.getOpcode() != ISD::INLINEASM || HMFI.hasClobberLR())
    580     return Op;
    581 
    582   unsigned NumOps = Op.getNumOperands();
    583   if (Op.getOperand(NumOps-1).getValueType() == MVT::Glue)
    584     --NumOps;  // Ignore the flag operand.
    585 
    586   for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
    587     unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
    588     unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
    589     ++i;  // Skip the ID value.
    590 
    591     switch (InlineAsm::getKind(Flags)) {
    592       default:
    593         llvm_unreachable("Bad flags!");
    594       case InlineAsm::Kind_RegUse:
    595       case InlineAsm::Kind_Imm:
    596       case InlineAsm::Kind_Mem:
    597         i += NumVals;
    598         break;
    599       case InlineAsm::Kind_Clobber:
    600       case InlineAsm::Kind_RegDef:
    601       case InlineAsm::Kind_RegDefEarlyClobber: {
    602         for (; NumVals; --NumVals, ++i) {
    603           unsigned Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
    604           if (Reg != LR)
    605             continue;
    606           HMFI.setHasClobberLR(true);
    607           return Op;
    608         }
    609         break;
    610       }
    611     }
    612   }
    613 
    614   return Op;
    615 }
    616 
    617 // Need to transform ISD::PREFETCH into something that doesn't inherit
    618 // all of the properties of ISD::PREFETCH, specifically SDNPMayLoad and
    619 // SDNPMayStore.
    620 SDValue HexagonTargetLowering::LowerPREFETCH(SDValue Op,
    621                                              SelectionDAG &DAG) const {
    622   SDValue Chain = Op.getOperand(0);
    623   SDValue Addr = Op.getOperand(1);
    624   // Lower it to DCFETCH($reg, #0).  A "pat" will try to merge the offset in,
    625   // if the "reg" is fed by an "add".
    626   SDLoc DL(Op);
    627   SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
    628   return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero);
    629 }
    630 
    631 // Custom-handle ISD::READCYCLECOUNTER because the target-independent SDNode
    632 // is marked as having side-effects, while the register read on Hexagon does
    633 // not have any. TableGen refuses to accept the direct pattern from that node
    634 // to the A4_tfrcpp.
    635 SDValue HexagonTargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
    636                                                      SelectionDAG &DAG) const {
    637   SDValue Chain = Op.getOperand(0);
    638   SDLoc dl(Op);
    639   SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
    640   return DAG.getNode(HexagonISD::READCYCLE, dl, VTs, Chain);
    641 }
    642 
    643 SDValue HexagonTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
    644       SelectionDAG &DAG) const {
    645   SDValue Chain = Op.getOperand(0);
    646   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
    647   // Lower the hexagon_prefetch builtin to DCFETCH, as above.
    648   if (IntNo == Intrinsic::hexagon_prefetch) {
    649     SDValue Addr = Op.getOperand(2);
    650     SDLoc DL(Op);
    651     SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
    652     return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero);
    653   }
    654   return SDValue();
    655 }
    656 
    657 SDValue
    658 HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
    659                                                SelectionDAG &DAG) const {
    660   SDValue Chain = Op.getOperand(0);
    661   SDValue Size = Op.getOperand(1);
    662   SDValue Align = Op.getOperand(2);
    663   SDLoc dl(Op);
    664 
    665   ConstantSDNode *AlignConst = dyn_cast<ConstantSDNode>(Align);
    666   assert(AlignConst && "Non-constant Align in LowerDYNAMIC_STACKALLOC");
    667 
    668   unsigned A = AlignConst->getSExtValue();
    669   auto &HFI = *Subtarget.getFrameLowering();
    670   // "Zero" means natural stack alignment.
    671   if (A == 0)
    672     A = HFI.getStackAlignment();
    673 
    674   LLVM_DEBUG({
    675     dbgs () << __func__ << " Align: " << A << " Size: ";
    676     Size.getNode()->dump(&DAG);
    677     dbgs() << "\n";
    678   });
    679 
    680   SDValue AC = DAG.getConstant(A, dl, MVT::i32);
    681   SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
    682   SDValue AA = DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC);
    683 
    684   DAG.ReplaceAllUsesOfValueWith(Op, AA);
    685   return AA;
    686 }
    687 
    688 SDValue HexagonTargetLowering::LowerFormalArguments(
    689     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
    690     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
    691     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
    692   MachineFunction &MF = DAG.getMachineFunction();
    693   MachineFrameInfo &MFI = MF.getFrameInfo();
    694   MachineRegisterInfo &MRI = MF.getRegInfo();
    695 
    696   // Assign locations to all of the incoming arguments.
    697   SmallVector<CCValAssign, 16> ArgLocs;
    698   HexagonCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext(),
    699                         MF.getFunction().getFunctionType()->getNumParams());
    700 
    701   if (Subtarget.useHVXOps())
    702     CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon_HVX);
    703   else
    704     CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
    705 
    706   // For LLVM, in the case when returning a struct by value (>8byte),
    707   // the first argument is a pointer that points to the location on caller's
    708   // stack where the return value will be stored. For Hexagon, the location on
    709   // caller's stack is passed only when the struct size is smaller than (and
    710   // equal to) 8 bytes. If not, no address will be passed into callee and
    711   // callee return the result direclty through R0/R1.
    712 
    713   auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
    714 
    715   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
    716     CCValAssign &VA = ArgLocs[i];
    717     ISD::ArgFlagsTy Flags = Ins[i].Flags;
    718     bool ByVal = Flags.isByVal();
    719 
    720     // Arguments passed in registers:
    721     // 1. 32- and 64-bit values and HVX vectors are passed directly,
    722     // 2. Large structs are passed via an address, and the address is
    723     //    passed in a register.
    724     if (VA.isRegLoc() && ByVal && Flags.getByValSize() <= 8)
    725       llvm_unreachable("ByValSize must be bigger than 8 bytes");
    726 
    727     bool InReg = VA.isRegLoc() &&
    728                  (!ByVal || (ByVal && Flags.getByValSize() > 8));
    729 
    730     if (InReg) {
    731       MVT RegVT = VA.getLocVT();
    732       if (VA.getLocInfo() == CCValAssign::BCvt)
    733         RegVT = VA.getValVT();
    734 
    735       const TargetRegisterClass *RC = getRegClassFor(RegVT);
    736       unsigned VReg = MRI.createVirtualRegister(RC);
    737       SDValue Copy = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);
    738 
    739       // Treat values of type MVT::i1 specially: they are passed in
    740       // registers of type i32, but they need to remain as values of
    741       // type i1 for consistency of the argument lowering.
    742       if (VA.getValVT() == MVT::i1) {
    743         assert(RegVT.getSizeInBits() <= 32);
    744         SDValue T = DAG.getNode(ISD::AND, dl, RegVT,
    745                                 Copy, DAG.getConstant(1, dl, RegVT));
    746         Copy = DAG.getSetCC(dl, MVT::i1, T, DAG.getConstant(0, dl, RegVT),
    747                             ISD::SETNE);
    748       } else {
    749 #ifndef NDEBUG
    750         unsigned RegSize = RegVT.getSizeInBits();
    751         assert(RegSize == 32 || RegSize == 64 ||
    752                Subtarget.isHVXVectorType(RegVT));
    753 #endif
    754       }
    755       InVals.push_back(Copy);
    756       MRI.addLiveIn(VA.getLocReg(), VReg);
    757     } else {
    758       assert(VA.isMemLoc() && "Argument should be passed in memory");
    759 
    760       // If it's a byval parameter, then we need to compute the
    761       // "real" size, not the size of the pointer.
    762       unsigned ObjSize = Flags.isByVal()
    763                             ? Flags.getByValSize()
    764                             : VA.getLocVT().getStoreSizeInBits() / 8;
    765 
    766       // Create the frame index object for this incoming parameter.
    767       int Offset = HEXAGON_LRFP_SIZE + VA.getLocMemOffset();
    768       int FI = MFI.CreateFixedObject(ObjSize, Offset, true);
    769       SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
    770 
    771       if (Flags.isByVal()) {
    772         // If it's a pass-by-value aggregate, then do not dereference the stack
    773         // location. Instead, we should generate a reference to the stack
    774         // location.
    775         InVals.push_back(FIN);
    776       } else {
    777         SDValue L = DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
    778                                 MachinePointerInfo::getFixedStack(MF, FI, 0));
    779         InVals.push_back(L);
    780       }
    781     }
    782   }
    783 
    784 
    785   if (IsVarArg) {
    786     // This will point to the next argument passed via stack.
    787     int Offset = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
    788     int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
    789     HMFI.setVarArgsFrameIndex(FI);
    790   }
    791 
    792   return Chain;
    793 }
    794 
    795 SDValue
    796 HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
    797   // VASTART stores the address of the VarArgsFrameIndex slot into the
    798   // memory location argument.
    799   MachineFunction &MF = DAG.getMachineFunction();
    800   HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>();
    801   SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32);
    802   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
    803   return DAG.getStore(Op.getOperand(0), SDLoc(Op), Addr, Op.getOperand(1),
    804                       MachinePointerInfo(SV));
    805 }
    806 
    807 SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
    808   const SDLoc &dl(Op);
    809   SDValue LHS = Op.getOperand(0);
    810   SDValue RHS = Op.getOperand(1);
    811   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
    812   MVT ResTy = ty(Op);
    813   MVT OpTy = ty(LHS);
    814 
    815   if (OpTy == MVT::v2i16 || OpTy == MVT::v4i8) {
    816     MVT ElemTy = OpTy.getVectorElementType();
    817     assert(ElemTy.isScalarInteger());
    818     MVT WideTy = MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy.getSizeInBits()),
    819                                   OpTy.getVectorNumElements());
    820     return DAG.getSetCC(dl, ResTy,
    821                         DAG.getSExtOrTrunc(LHS, SDLoc(LHS), WideTy),
    822                         DAG.getSExtOrTrunc(RHS, SDLoc(RHS), WideTy), CC);
    823   }
    824 
    825   // Treat all other vector types as legal.
    826   if (ResTy.isVector())
    827     return Op;
    828 
    829   // Comparisons of short integers should use sign-extend, not zero-extend,
    830   // since we can represent small negative values in the compare instructions.
    831   // The LLVM default is to use zero-extend arbitrarily in these cases.
    832   auto isSExtFree = [this](SDValue N) {
    833     switch (N.getOpcode()) {
    834       case ISD::TRUNCATE: {
    835         // A sign-extend of a truncate of a sign-extend is free.
    836         SDValue Op = N.getOperand(0);
    837         if (Op.getOpcode() != ISD::AssertSext)
    838           return false;
    839         EVT OrigTy = cast<VTSDNode>(Op.getOperand(1))->getVT();
    840         unsigned ThisBW = ty(N).getSizeInBits();
    841         unsigned OrigBW = OrigTy.getSizeInBits();
    842         // The type that was sign-extended to get the AssertSext must be
    843         // narrower than the type of N (so that N has still the same value
    844         // as the original).
    845         return ThisBW >= OrigBW;
    846       }
    847       case ISD::LOAD:
    848         // We have sign-extended loads.
    849         return true;
    850     }
    851     return false;
    852   };
    853 
    854   if (OpTy == MVT::i8 || OpTy == MVT::i16) {
    855     ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
    856     bool IsNegative = C && C->getAPIntValue().isNegative();
    857     if (IsNegative || isSExtFree(LHS) || isSExtFree(RHS))
    858       return DAG.getSetCC(dl, ResTy,
    859                           DAG.getSExtOrTrunc(LHS, SDLoc(LHS), MVT::i32),
    860                           DAG.getSExtOrTrunc(RHS, SDLoc(RHS), MVT::i32), CC);
    861   }
    862 
    863   return SDValue();
    864 }
    865 
    866 SDValue
    867 HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
    868   SDValue PredOp = Op.getOperand(0);
    869   SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2);
    870   EVT OpVT = Op1.getValueType();
    871   SDLoc DL(Op);
    872 
    873   if (OpVT == MVT::v2i16) {
    874     SDValue X1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op1);
    875     SDValue X2 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i32, Op2);
    876     SDValue SL = DAG.getNode(ISD::VSELECT, DL, MVT::v2i32, PredOp, X1, X2);
    877     SDValue TR = DAG.getNode(ISD::TRUNCATE, DL, MVT::v2i16, SL);
    878     return TR;
    879   }
    880 
    881   return SDValue();
    882 }
    883 
    884 static Constant *convert_i1_to_i8(const Constant *ConstVal) {
    885   SmallVector<Constant *, 128> NewConst;
    886   const ConstantVector *CV = dyn_cast<ConstantVector>(ConstVal);
    887   if (!CV)
    888     return nullptr;
    889 
    890   LLVMContext &Ctx = ConstVal->getContext();
    891   IRBuilder<> IRB(Ctx);
    892   unsigned NumVectorElements = CV->getNumOperands();
    893   assert(isPowerOf2_32(NumVectorElements) &&
    894          "conversion only supported for pow2 VectorSize!");
    895 
    896   for (unsigned i = 0; i < NumVectorElements / 8; ++i) {
    897     uint8_t x = 0;
    898     for (unsigned j = 0; j < 8; ++j) {
    899       uint8_t y = CV->getOperand(i * 8 + j)->getUniqueInteger().getZExtValue();
    900       x |= y << (7 - j);
    901     }
    902     assert((x == 0 || x == 255) && "Either all 0's or all 1's expected!");
    903     NewConst.push_back(IRB.getInt8(x));
    904   }
    905   return ConstantVector::get(NewConst);
    906 }
    907 
    908 SDValue
    909 HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
    910   EVT ValTy = Op.getValueType();
    911   ConstantPoolSDNode *CPN = cast<ConstantPoolSDNode>(Op);
    912   Constant *CVal = nullptr;
    913   bool isVTi1Type = false;
    914   if (const Constant *ConstVal = dyn_cast<Constant>(CPN->getConstVal())) {
    915     Type *CValTy = ConstVal->getType();
    916     if (CValTy->isVectorTy() &&
    917         CValTy->getVectorElementType()->isIntegerTy(1)) {
    918       CVal = convert_i1_to_i8(ConstVal);
    919       isVTi1Type = (CVal != nullptr);
    920     }
    921   }
    922   unsigned Align = CPN->getAlignment();
    923   bool IsPositionIndependent = isPositionIndependent();
    924   unsigned char TF = IsPositionIndependent ? HexagonII::MO_PCREL : 0;
    925 
    926   unsigned Offset = 0;
    927   SDValue T;
    928   if (CPN->isMachineConstantPoolEntry())
    929     T = DAG.getTargetConstantPool(CPN->getMachineCPVal(), ValTy, Align, Offset,
    930                                   TF);
    931   else if (isVTi1Type)
    932     T = DAG.getTargetConstantPool(CVal, ValTy, Align, Offset, TF);
    933   else
    934     T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Align, Offset, TF);
    935 
    936   assert(cast<ConstantPoolSDNode>(T)->getTargetFlags() == TF &&
    937          "Inconsistent target flag encountered");
    938 
    939   if (IsPositionIndependent)
    940     return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), ValTy, T);
    941   return DAG.getNode(HexagonISD::CP, SDLoc(Op), ValTy, T);
    942 }
    943 
    944 SDValue
    945 HexagonTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
    946   EVT VT = Op.getValueType();
    947   int Idx = cast<JumpTableSDNode>(Op)->getIndex();
    948   if (isPositionIndependent()) {
    949     SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
    950     return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), VT, T);
    951   }
    952 
    953   SDValue T = DAG.getTargetJumpTable(Idx, VT);
    954   return DAG.getNode(HexagonISD::JT, SDLoc(Op), VT, T);
    955 }
    956 
    957 SDValue
    958 HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
    959   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
    960   MachineFunction &MF = DAG.getMachineFunction();
    961   MachineFrameInfo &MFI = MF.getFrameInfo();
    962   MFI.setReturnAddressIsTaken(true);
    963 
    964   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
    965     return SDValue();
    966 
    967   EVT VT = Op.getValueType();
    968   SDLoc dl(Op);
    969   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
    970   if (Depth) {
    971     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
    972     SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
    973     return DAG.getLoad(VT, dl, DAG.getEntryNode(),
    974                        DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
    975                        MachinePointerInfo());
    976   }
    977 
    978   // Return LR, which contains the return address. Mark it an implicit live-in.
    979   unsigned Reg = MF.addLiveIn(HRI.getRARegister(), getRegClassFor(MVT::i32));
    980   return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
    981 }
    982 
    983 SDValue
    984 HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
    985   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
    986   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
    987   MFI.setFrameAddressIsTaken(true);
    988 
    989   EVT VT = Op.getValueType();
    990   SDLoc dl(Op);
    991   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
    992   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
    993                                          HRI.getFrameRegister(), VT);
    994   while (Depth--)
    995     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
    996                             MachinePointerInfo());
    997   return FrameAddr;
    998 }
    999 
   1000 SDValue
   1001 HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const {
   1002   SDLoc dl(Op);
   1003   return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
   1004 }
   1005 
   1006 SDValue
   1007 HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const {
   1008   SDLoc dl(Op);
   1009   auto *GAN = cast<GlobalAddressSDNode>(Op);
   1010   auto PtrVT = getPointerTy(DAG.getDataLayout());
   1011   auto *GV = GAN->getGlobal();
   1012   int64_t Offset = GAN->getOffset();
   1013 
   1014   auto &HLOF = *HTM.getObjFileLowering();
   1015   Reloc::Model RM = HTM.getRelocationModel();
   1016 
   1017   if (RM == Reloc::Static) {
   1018     SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
   1019     const GlobalObject *GO = GV->getBaseObject();
   1020     if (GO && Subtarget.useSmallData() && HLOF.isGlobalInSmallSection(GO, HTM))
   1021       return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA);
   1022     return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA);
   1023   }
   1024 
   1025   bool UsePCRel = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
   1026   if (UsePCRel) {
   1027     SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset,
   1028                                             HexagonII::MO_PCREL);
   1029     return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, GA);
   1030   }
   1031 
   1032   // Use GOT index.
   1033   SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
   1034   SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, HexagonII::MO_GOT);
   1035   SDValue Off = DAG.getConstant(Offset, dl, MVT::i32);
   1036   return DAG.getNode(HexagonISD::AT_GOT, dl, PtrVT, GOT, GA, Off);
   1037 }
   1038 
   1039 // Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
   1040 SDValue
   1041 HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
   1042   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
   1043   SDLoc dl(Op);
   1044   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   1045 
   1046   Reloc::Model RM = HTM.getRelocationModel();
   1047   if (RM == Reloc::Static) {
   1048     SDValue A = DAG.getTargetBlockAddress(BA, PtrVT);
   1049     return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, A);
   1050   }
   1051 
   1052   SDValue A = DAG.getTargetBlockAddress(BA, PtrVT, 0, HexagonII::MO_PCREL);
   1053   return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, A);
   1054 }
   1055 
   1056 SDValue
   1057 HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG)
   1058       const {
   1059   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   1060   SDValue GOTSym = DAG.getTargetExternalSymbol(HEXAGON_GOT_SYM_NAME, PtrVT,
   1061                                                HexagonII::MO_PCREL);
   1062   return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), PtrVT, GOTSym);
   1063 }
   1064 
   1065 SDValue
   1066 HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain,
   1067       GlobalAddressSDNode *GA, SDValue Glue, EVT PtrVT, unsigned ReturnReg,
   1068       unsigned char OperandFlags) const {
   1069   MachineFunction &MF = DAG.getMachineFunction();
   1070   MachineFrameInfo &MFI = MF.getFrameInfo();
   1071   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   1072   SDLoc dl(GA);
   1073   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
   1074                                            GA->getValueType(0),
   1075                                            GA->getOffset(),
   1076                                            OperandFlags);
   1077   // Create Operands for the call.The Operands should have the following:
   1078   // 1. Chain SDValue
   1079   // 2. Callee which in this case is the Global address value.
   1080   // 3. Registers live into the call.In this case its R0, as we
   1081   //    have just one argument to be passed.
   1082   // 4. Glue.
   1083   // Note: The order is important.
   1084 
   1085   const auto &HRI = *Subtarget.getRegisterInfo();
   1086   const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallingConv::C);
   1087   assert(Mask && "Missing call preserved mask for calling convention");
   1088   SDValue Ops[] = { Chain, TGA, DAG.getRegister(Hexagon::R0, PtrVT),
   1089                     DAG.getRegisterMask(Mask), Glue };
   1090   Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops);
   1091 
   1092   // Inform MFI that function has calls.
   1093   MFI.setAdjustsStack(true);
   1094 
   1095   Glue = Chain.getValue(1);
   1096   return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
   1097 }
   1098 
   1099 //
   1100 // Lower using the intial executable model for TLS addresses
   1101 //
   1102 SDValue
   1103 HexagonTargetLowering::LowerToTLSInitialExecModel(GlobalAddressSDNode *GA,
   1104       SelectionDAG &DAG) const {
   1105   SDLoc dl(GA);
   1106   int64_t Offset = GA->getOffset();
   1107   auto PtrVT = getPointerTy(DAG.getDataLayout());
   1108 
   1109   // Get the thread pointer.
   1110   SDValue TP = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Hexagon::UGP, PtrVT);
   1111 
   1112   bool IsPositionIndependent = isPositionIndependent();
   1113   unsigned char TF =
   1114       IsPositionIndependent ? HexagonII::MO_IEGOT : HexagonII::MO_IE;
   1115 
   1116   // First generate the TLS symbol address
   1117   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT,
   1118                                            Offset, TF);
   1119 
   1120   SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
   1121 
   1122   if (IsPositionIndependent) {
   1123     // Generate the GOT pointer in case of position independent code
   1124     SDValue GOT = LowerGLOBAL_OFFSET_TABLE(Sym, DAG);
   1125 
   1126     // Add the TLS Symbol address to GOT pointer.This gives
   1127     // GOT relative relocation for the symbol.
   1128     Sym = DAG.getNode(ISD::ADD, dl, PtrVT, GOT, Sym);
   1129   }
   1130 
   1131   // Load the offset value for TLS symbol.This offset is relative to
   1132   // thread pointer.
   1133   SDValue LoadOffset =
   1134       DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Sym, MachinePointerInfo());
   1135 
   1136   // Address of the thread local variable is the add of thread
   1137   // pointer and the offset of the variable.
   1138   return DAG.getNode(ISD::ADD, dl, PtrVT, TP, LoadOffset);
   1139 }
   1140 
   1141 //
   1142 // Lower using the local executable model for TLS addresses
   1143 //
   1144 SDValue
   1145 HexagonTargetLowering::LowerToTLSLocalExecModel(GlobalAddressSDNode *GA,
   1146       SelectionDAG &DAG) const {
   1147   SDLoc dl(GA);
   1148   int64_t Offset = GA->getOffset();
   1149   auto PtrVT = getPointerTy(DAG.getDataLayout());
   1150 
   1151   // Get the thread pointer.
   1152   SDValue TP = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Hexagon::UGP, PtrVT);
   1153   // Generate the TLS symbol address
   1154   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT, Offset,
   1155                                            HexagonII::MO_TPREL);
   1156   SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
   1157 
   1158   // Address of the thread local variable is the add of thread
   1159   // pointer and the offset of the variable.
   1160   return DAG.getNode(ISD::ADD, dl, PtrVT, TP, Sym);
   1161 }
   1162 
   1163 //
   1164 // Lower using the general dynamic model for TLS addresses
   1165 //
   1166 SDValue
   1167 HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
   1168       SelectionDAG &DAG) const {
   1169   SDLoc dl(GA);
   1170   int64_t Offset = GA->getOffset();
   1171   auto PtrVT = getPointerTy(DAG.getDataLayout());
   1172 
   1173   // First generate the TLS symbol address
   1174   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT, Offset,
   1175                                            HexagonII::MO_GDGOT);
   1176 
   1177   // Then, generate the GOT pointer
   1178   SDValue GOT = LowerGLOBAL_OFFSET_TABLE(TGA, DAG);
   1179 
   1180   // Add the TLS symbol and the GOT pointer
   1181   SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
   1182   SDValue Chain = DAG.getNode(ISD::ADD, dl, PtrVT, GOT, Sym);
   1183 
   1184   // Copy over the argument to R0
   1185   SDValue InFlag;
   1186   Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, Hexagon::R0, Chain, InFlag);
   1187   InFlag = Chain.getValue(1);
   1188 
   1189   unsigned Flags =
   1190       static_cast<const HexagonSubtarget &>(DAG.getSubtarget()).useLongCalls()
   1191           ? HexagonII::MO_GDPLT | HexagonII::HMOTF_ConstExtended
   1192           : HexagonII::MO_GDPLT;
   1193 
   1194   return GetDynamicTLSAddr(DAG, Chain, GA, InFlag, PtrVT,
   1195                            Hexagon::R0, Flags);
   1196 }
   1197 
   1198 //
   1199 // Lower TLS addresses.
   1200 //
   1201 // For now for dynamic models, we only support the general dynamic model.
   1202 //
   1203 SDValue
   1204 HexagonTargetLowering::LowerGlobalTLSAddress(SDValue Op,
   1205       SelectionDAG &DAG) const {
   1206   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
   1207 
   1208   switch (HTM.getTLSModel(GA->getGlobal())) {
   1209     case TLSModel::GeneralDynamic:
   1210     case TLSModel::LocalDynamic:
   1211       return LowerToTLSGeneralDynamicModel(GA, DAG);
   1212     case TLSModel::InitialExec:
   1213       return LowerToTLSInitialExecModel(GA, DAG);
   1214     case TLSModel::LocalExec:
   1215       return LowerToTLSLocalExecModel(GA, DAG);
   1216   }
   1217   llvm_unreachable("Bogus TLS model");
   1218 }
   1219 
   1220 //===----------------------------------------------------------------------===//
   1221 // TargetLowering Implementation
   1222 //===----------------------------------------------------------------------===//
   1223 
   1224 HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
   1225                                              const HexagonSubtarget &ST)
   1226     : TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
   1227       Subtarget(ST) {
   1228   bool IsV4 = !Subtarget.hasV5Ops();
   1229   auto &HRI = *Subtarget.getRegisterInfo();
   1230 
   1231   setPrefLoopAlignment(4);
   1232   setPrefFunctionAlignment(4);
   1233   setMinFunctionAlignment(2);
   1234   setStackPointerRegisterToSaveRestore(HRI.getStackRegister());
   1235   setBooleanContents(TargetLoweringBase::UndefinedBooleanContent);
   1236   setBooleanVectorContents(TargetLoweringBase::UndefinedBooleanContent);
   1237 
   1238   setMaxAtomicSizeInBitsSupported(64);
   1239   setMinCmpXchgSizeInBits(32);
   1240 
   1241   if (EnableHexSDNodeSched)
   1242     setSchedulingPreference(Sched::VLIW);
   1243   else
   1244     setSchedulingPreference(Sched::Source);
   1245 
   1246   // Limits for inline expansion of memcpy/memmove
   1247   MaxStoresPerMemcpy = MaxStoresPerMemcpyCL;
   1248   MaxStoresPerMemcpyOptSize = MaxStoresPerMemcpyOptSizeCL;
   1249   MaxStoresPerMemmove = MaxStoresPerMemmoveCL;
   1250   MaxStoresPerMemmoveOptSize = MaxStoresPerMemmoveOptSizeCL;
   1251   MaxStoresPerMemset = MaxStoresPerMemsetCL;
   1252   MaxStoresPerMemsetOptSize = MaxStoresPerMemsetOptSizeCL;
   1253 
   1254   //
   1255   // Set up register classes.
   1256   //
   1257 
   1258   addRegisterClass(MVT::i1,    &Hexagon::PredRegsRegClass);
   1259   addRegisterClass(MVT::v2i1,  &Hexagon::PredRegsRegClass);  // bbbbaaaa
   1260   addRegisterClass(MVT::v4i1,  &Hexagon::PredRegsRegClass);  // ddccbbaa
   1261   addRegisterClass(MVT::v8i1,  &Hexagon::PredRegsRegClass);  // hgfedcba
   1262   addRegisterClass(MVT::i32,   &Hexagon::IntRegsRegClass);
   1263   addRegisterClass(MVT::v2i16, &Hexagon::IntRegsRegClass);
   1264   addRegisterClass(MVT::v4i8,  &Hexagon::IntRegsRegClass);
   1265   addRegisterClass(MVT::i64,   &Hexagon::DoubleRegsRegClass);
   1266   addRegisterClass(MVT::v8i8,  &Hexagon::DoubleRegsRegClass);
   1267   addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass);
   1268   addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass);
   1269 
   1270   if (Subtarget.hasV5Ops()) {
   1271     addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
   1272     addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
   1273   }
   1274 
   1275   //
   1276   // Handling of scalar operations.
   1277   //
   1278   // All operations default to "legal", except:
   1279   // - indexed loads and stores (pre-/post-incremented),
   1280   // - ANY_EXTEND_VECTOR_INREG, ATOMIC_CMP_SWAP_WITH_SUCCESS, CONCAT_VECTORS,
   1281   //   ConstantFP, DEBUGTRAP, FCEIL, FCOPYSIGN, FEXP, FEXP2, FFLOOR, FGETSIGN,
   1282   //   FLOG, FLOG2, FLOG10, FMAXNUM, FMINNUM, FNEARBYINT, FRINT, FROUND, TRAP,
   1283   //   FTRUNC, PREFETCH, SIGN_EXTEND_VECTOR_INREG, ZERO_EXTEND_VECTOR_INREG,
   1284   // which default to "expand" for at least one type.
   1285 
   1286   // Misc operations.
   1287   setOperationAction(ISD::ConstantFP, MVT::f32, Legal); // Default: expand
   1288   setOperationAction(ISD::ConstantFP, MVT::f64, Legal); // Default: expand
   1289 
   1290   setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
   1291   setOperationAction(ISD::JumpTable, MVT::i32, Custom);
   1292   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
   1293   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
   1294   setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
   1295   setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
   1296   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
   1297   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
   1298   setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
   1299   setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
   1300   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
   1301   setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
   1302 
   1303   // Custom legalize GlobalAddress nodes into CONST32.
   1304   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
   1305   setOperationAction(ISD::GlobalAddress, MVT::i8,  Custom);
   1306   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
   1307 
   1308   // Hexagon needs to optimize cases with negative constants.
   1309   setOperationAction(ISD::SETCC, MVT::i8,    Custom);
   1310   setOperationAction(ISD::SETCC, MVT::i16,   Custom);
   1311   setOperationAction(ISD::SETCC, MVT::v4i8,  Custom);
   1312   setOperationAction(ISD::SETCC, MVT::v2i16, Custom);
   1313 
   1314   // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
   1315   setOperationAction(ISD::VASTART, MVT::Other, Custom);
   1316   setOperationAction(ISD::VAEND,   MVT::Other, Expand);
   1317   setOperationAction(ISD::VAARG,   MVT::Other, Expand);
   1318   setOperationAction(ISD::VACOPY,  MVT::Other, Expand);
   1319 
   1320   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
   1321   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
   1322   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
   1323 
   1324   if (EmitJumpTables)
   1325     setMinimumJumpTableEntries(MinimumJumpTables);
   1326   else
   1327     setMinimumJumpTableEntries(std::numeric_limits<int>::max());
   1328   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
   1329 
   1330   setOperationAction(ISD::ABS, MVT::i32, Legal);
   1331   setOperationAction(ISD::ABS, MVT::i64, Legal);
   1332 
   1333   // Hexagon has A4_addp_c and A4_subp_c that take and generate a carry bit,
   1334   // but they only operate on i64.
   1335   for (MVT VT : MVT::integer_valuetypes()) {
   1336     setOperationAction(ISD::UADDO,    VT, Expand);
   1337     setOperationAction(ISD::USUBO,    VT, Expand);
   1338     setOperationAction(ISD::SADDO,    VT, Expand);
   1339     setOperationAction(ISD::SSUBO,    VT, Expand);
   1340     setOperationAction(ISD::ADDCARRY, VT, Expand);
   1341     setOperationAction(ISD::SUBCARRY, VT, Expand);
   1342   }
   1343   setOperationAction(ISD::ADDCARRY, MVT::i64, Custom);
   1344   setOperationAction(ISD::SUBCARRY, MVT::i64, Custom);
   1345 
   1346   setOperationAction(ISD::CTLZ, MVT::i8,  Promote);
   1347   setOperationAction(ISD::CTLZ, MVT::i16, Promote);
   1348   setOperationAction(ISD::CTTZ, MVT::i8,  Promote);
   1349   setOperationAction(ISD::CTTZ, MVT::i16, Promote);
   1350 
   1351   // In V5, popcount can count # of 1s in i64 but returns i32.
   1352   // On V4 it will be expanded (set later).
   1353   setOperationAction(ISD::CTPOP, MVT::i8,  Promote);
   1354   setOperationAction(ISD::CTPOP, MVT::i16, Promote);
   1355   setOperationAction(ISD::CTPOP, MVT::i32, Promote);
   1356   setOperationAction(ISD::CTPOP, MVT::i64, Legal);
   1357 
   1358   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
   1359   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
   1360   setOperationAction(ISD::BSWAP, MVT::i32, Legal);
   1361   setOperationAction(ISD::BSWAP, MVT::i64, Legal);
   1362 
   1363   for (unsigned IntExpOp :
   1364        {ISD::SDIV,      ISD::UDIV,      ISD::SREM,      ISD::UREM,
   1365         ISD::SDIVREM,   ISD::UDIVREM,   ISD::ROTL,      ISD::ROTR,
   1366         ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS,
   1367         ISD::SMUL_LOHI, ISD::UMUL_LOHI}) {
   1368     for (MVT VT : MVT::integer_valuetypes())
   1369       setOperationAction(IntExpOp, VT, Expand);
   1370   }
   1371 
   1372   for (unsigned FPExpOp :
   1373        {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FSINCOS,
   1374         ISD::FPOW, ISD::FCOPYSIGN}) {
   1375     for (MVT VT : MVT::fp_valuetypes())
   1376       setOperationAction(FPExpOp, VT, Expand);
   1377   }
   1378 
   1379   // No extending loads from i32.
   1380   for (MVT VT : MVT::integer_valuetypes()) {
   1381     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand);
   1382     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
   1383     setLoadExtAction(ISD::EXTLOAD,  VT, MVT::i32, Expand);
   1384   }
   1385   // Turn FP truncstore into trunc + store.
   1386   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
   1387   // Turn FP extload into load/fpextend.
   1388   for (MVT VT : MVT::fp_valuetypes())
   1389     setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
   1390 
   1391   // Expand BR_CC and SELECT_CC for all integer and fp types.
   1392   for (MVT VT : MVT::integer_valuetypes()) {
   1393     setOperationAction(ISD::BR_CC,     VT, Expand);
   1394     setOperationAction(ISD::SELECT_CC, VT, Expand);
   1395   }
   1396   for (MVT VT : MVT::fp_valuetypes()) {
   1397     setOperationAction(ISD::BR_CC,     VT, Expand);
   1398     setOperationAction(ISD::SELECT_CC, VT, Expand);
   1399   }
   1400   setOperationAction(ISD::BR_CC, MVT::Other, Expand);
   1401 
   1402   //
   1403   // Handling of vector operations.
   1404   //
   1405 
   1406   promoteLdStType(MVT::v4i8,  MVT::i32);
   1407   promoteLdStType(MVT::v2i16, MVT::i32);
   1408   promoteLdStType(MVT::v8i8,  MVT::i64);
   1409   promoteLdStType(MVT::v4i16, MVT::i64);
   1410   promoteLdStType(MVT::v2i32, MVT::i64);
   1411 
   1412   // Set the action for vector operations to "expand", then override it with
   1413   // either "custom" or "legal" for specific cases.
   1414   static const unsigned VectExpOps[] = {
   1415     // Integer arithmetic:
   1416     ISD::ADD,     ISD::SUB,     ISD::MUL,     ISD::SDIV,      ISD::UDIV,
   1417     ISD::SREM,    ISD::UREM,    ISD::SDIVREM, ISD::UDIVREM,   ISD::SADDO,
   1418     ISD::UADDO,   ISD::SSUBO,   ISD::USUBO,   ISD::SMUL_LOHI, ISD::UMUL_LOHI,
   1419     // Logical/bit:
   1420     ISD::AND,     ISD::OR,      ISD::XOR,     ISD::ROTL,    ISD::ROTR,
   1421     ISD::CTPOP,   ISD::CTLZ,    ISD::CTTZ,
   1422     // Floating point arithmetic/math functions:
   1423     ISD::FADD,    ISD::FSUB,    ISD::FMUL,    ISD::FMA,     ISD::FDIV,
   1424     ISD::FREM,    ISD::FNEG,    ISD::FABS,    ISD::FSQRT,   ISD::FSIN,
   1425     ISD::FCOS,    ISD::FPOW,    ISD::FLOG,    ISD::FLOG2,
   1426     ISD::FLOG10,  ISD::FEXP,    ISD::FEXP2,   ISD::FCEIL,   ISD::FTRUNC,
   1427     ISD::FRINT,   ISD::FNEARBYINT,            ISD::FROUND,  ISD::FFLOOR,
   1428     ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS,
   1429     // Misc:
   1430     ISD::BR_CC,   ISD::SELECT_CC,             ISD::ConstantPool,
   1431     // Vector:
   1432     ISD::BUILD_VECTOR,          ISD::SCALAR_TO_VECTOR,
   1433     ISD::EXTRACT_VECTOR_ELT,    ISD::INSERT_VECTOR_ELT,
   1434     ISD::EXTRACT_SUBVECTOR,     ISD::INSERT_SUBVECTOR,
   1435     ISD::CONCAT_VECTORS,        ISD::VECTOR_SHUFFLE
   1436   };
   1437 
   1438   for (MVT VT : MVT::vector_valuetypes()) {
   1439     for (unsigned VectExpOp : VectExpOps)
   1440       setOperationAction(VectExpOp, VT, Expand);
   1441 
   1442     // Expand all extending loads and truncating stores:
   1443     for (MVT TargetVT : MVT::vector_valuetypes()) {
   1444       if (TargetVT == VT)
   1445         continue;
   1446       setLoadExtAction(ISD::EXTLOAD, TargetVT, VT, Expand);
   1447       setLoadExtAction(ISD::ZEXTLOAD, TargetVT, VT, Expand);
   1448       setLoadExtAction(ISD::SEXTLOAD, TargetVT, VT, Expand);
   1449       setTruncStoreAction(VT, TargetVT, Expand);
   1450     }
   1451 
   1452     // Normalize all inputs to SELECT to be vectors of i32.
   1453     if (VT.getVectorElementType() != MVT::i32) {
   1454       MVT VT32 = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32);
   1455       setOperationAction(ISD::SELECT, VT, Promote);
   1456       AddPromotedToType(ISD::SELECT, VT, VT32);
   1457     }
   1458     setOperationAction(ISD::SRA, VT, Custom);
   1459     setOperationAction(ISD::SHL, VT, Custom);
   1460     setOperationAction(ISD::SRL, VT, Custom);
   1461   }
   1462 
   1463   // Extending loads from (native) vectors of i8 into (native) vectors of i16
   1464   // are legal.
   1465   setLoadExtAction(ISD::EXTLOAD,  MVT::v2i16, MVT::v2i8, Legal);
   1466   setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
   1467   setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
   1468   setLoadExtAction(ISD::EXTLOAD,  MVT::v4i16, MVT::v4i8, Legal);
   1469   setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
   1470   setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
   1471 
   1472   // Types natively supported:
   1473   for (MVT NativeVT : {MVT::v8i1, MVT::v4i1, MVT::v2i1, MVT::v4i8,
   1474                        MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
   1475     setOperationAction(ISD::BUILD_VECTOR,       NativeVT, Custom);
   1476     setOperationAction(ISD::EXTRACT_VECTOR_ELT, NativeVT, Custom);
   1477     setOperationAction(ISD::INSERT_VECTOR_ELT,  NativeVT, Custom);
   1478     setOperationAction(ISD::EXTRACT_SUBVECTOR,  NativeVT, Custom);
   1479     setOperationAction(ISD::INSERT_SUBVECTOR,   NativeVT, Custom);
   1480     setOperationAction(ISD::CONCAT_VECTORS,     NativeVT, Custom);
   1481 
   1482     setOperationAction(ISD::ADD, NativeVT, Legal);
   1483     setOperationAction(ISD::SUB, NativeVT, Legal);
   1484     setOperationAction(ISD::MUL, NativeVT, Legal);
   1485     setOperationAction(ISD::AND, NativeVT, Legal);
   1486     setOperationAction(ISD::OR,  NativeVT, Legal);
   1487     setOperationAction(ISD::XOR, NativeVT, Legal);
   1488   }
   1489 
   1490   // Custom lower unaligned loads.
   1491   for (MVT VecVT : {MVT::i32, MVT::v4i8, MVT::i64, MVT::v8i8,
   1492                     MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
   1493     setOperationAction(ISD::LOAD, VecVT, Custom);
   1494   }
   1495 
   1496   for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v2i32, MVT::v4i16, MVT::v2i32}) {
   1497     setCondCodeAction(ISD::SETLT,  VT, Expand);
   1498     setCondCodeAction(ISD::SETLE,  VT, Expand);
   1499     setCondCodeAction(ISD::SETULT, VT, Expand);
   1500     setCondCodeAction(ISD::SETULE, VT, Expand);
   1501   }
   1502 
   1503   // Custom-lower bitcasts from i8 to v8i1.
   1504   setOperationAction(ISD::BITCAST,        MVT::i8,    Custom);
   1505   setOperationAction(ISD::SETCC,          MVT::v2i16, Custom);
   1506   setOperationAction(ISD::VSELECT,        MVT::v2i16, Custom);
   1507   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8,  Custom);
   1508   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
   1509   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8,  Custom);
   1510 
   1511   // Subtarget-specific operation actions.
   1512   //
   1513   if (Subtarget.hasV60Ops()) {
   1514     setOperationAction(ISD::ROTL, MVT::i32, Custom);
   1515     setOperationAction(ISD::ROTL, MVT::i64, Custom);
   1516   }
   1517   if (Subtarget.hasV5Ops()) {
   1518     setOperationAction(ISD::FMA,  MVT::f64, Expand);
   1519     setOperationAction(ISD::FADD, MVT::f64, Expand);
   1520     setOperationAction(ISD::FSUB, MVT::f64, Expand);
   1521     setOperationAction(ISD::FMUL, MVT::f64, Expand);
   1522 
   1523     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
   1524     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
   1525 
   1526     setOperationAction(ISD::FP_TO_UINT, MVT::i1,  Promote);
   1527     setOperationAction(ISD::FP_TO_UINT, MVT::i8,  Promote);
   1528     setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
   1529     setOperationAction(ISD::FP_TO_SINT, MVT::i1,  Promote);
   1530     setOperationAction(ISD::FP_TO_SINT, MVT::i8,  Promote);
   1531     setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
   1532     setOperationAction(ISD::UINT_TO_FP, MVT::i1,  Promote);
   1533     setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
   1534     setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
   1535     setOperationAction(ISD::SINT_TO_FP, MVT::i1,  Promote);
   1536     setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
   1537     setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
   1538   } else { // V4
   1539     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
   1540     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Expand);
   1541     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
   1542     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
   1543     setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
   1544     setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
   1545     setOperationAction(ISD::FP_EXTEND,  MVT::f32, Expand);
   1546     setOperationAction(ISD::FP_ROUND,   MVT::f64, Expand);
   1547     setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
   1548 
   1549     setOperationAction(ISD::CTPOP, MVT::i8,  Expand);
   1550     setOperationAction(ISD::CTPOP, MVT::i16, Expand);
   1551     setOperationAction(ISD::CTPOP, MVT::i32, Expand);
   1552     setOperationAction(ISD::CTPOP, MVT::i64, Expand);
   1553 
   1554     // Expand these operations for both f32 and f64:
   1555     for (unsigned FPExpOpV4 :
   1556          {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FABS, ISD::FNEG, ISD::FMA}) {
   1557       setOperationAction(FPExpOpV4, MVT::f32, Expand);
   1558       setOperationAction(FPExpOpV4, MVT::f64, Expand);
   1559     }
   1560 
   1561     for (ISD::CondCode FPExpCCV4 :
   1562          {ISD::SETOEQ, ISD::SETOGT, ISD::SETOLT, ISD::SETOGE, ISD::SETOLE,
   1563           ISD::SETUO,  ISD::SETO}) {
   1564       setCondCodeAction(FPExpCCV4, MVT::f32, Expand);
   1565       setCondCodeAction(FPExpCCV4, MVT::f64, Expand);
   1566     }
   1567   }
   1568 
   1569   // Handling of indexed loads/stores: default is "expand".
   1570   //
   1571   for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64, MVT::f32, MVT::f64,
   1572                  MVT::v2i16, MVT::v2i32, MVT::v4i8, MVT::v4i16, MVT::v8i8}) {
   1573     setIndexedLoadAction(ISD::POST_INC, VT, Legal);
   1574     setIndexedStoreAction(ISD::POST_INC, VT, Legal);
   1575   }
   1576 
   1577   if (Subtarget.useHVXOps())
   1578     initializeHVXLowering();
   1579 
   1580   computeRegisterProperties(&HRI);
   1581 
   1582   //
   1583   // Library calls for unsupported operations
   1584   //
   1585   bool FastMath  = EnableFastMath;
   1586 
   1587   setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3");
   1588   setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3");
   1589   setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3");
   1590   setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3");
   1591   setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
   1592   setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3");
   1593   setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3");
   1594   setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3");
   1595 
   1596   setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf");
   1597   setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf");
   1598   setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti");
   1599   setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti");
   1600   setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
   1601   setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
   1602 
   1603   if (IsV4) {
   1604     // Handle single-precision floating point operations on V4.
   1605     if (FastMath) {
   1606       setLibcallName(RTLIB::ADD_F32, "__hexagon_fast_addsf3");
   1607       setLibcallName(RTLIB::SUB_F32, "__hexagon_fast_subsf3");
   1608       setLibcallName(RTLIB::MUL_F32, "__hexagon_fast_mulsf3");
   1609       setLibcallName(RTLIB::OGT_F32, "__hexagon_fast_gtsf2");
   1610       setLibcallName(RTLIB::OLT_F32, "__hexagon_fast_ltsf2");
   1611       // Double-precision compares.
   1612       setLibcallName(RTLIB::OGT_F64, "__hexagon_fast_gtdf2");
   1613       setLibcallName(RTLIB::OLT_F64, "__hexagon_fast_ltdf2");
   1614     } else {
   1615       setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
   1616       setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
   1617       setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
   1618       setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
   1619       setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
   1620       // Double-precision compares.
   1621       setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
   1622       setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
   1623     }
   1624   }
   1625 
   1626   // This is the only fast library function for sqrtd.
   1627   if (FastMath)
   1628     setLibcallName(RTLIB::SQRT_F64, "__hexagon_fast2_sqrtdf2");
   1629 
   1630   // Prefix is: nothing  for "slow-math",
   1631   //            "fast2_" for V4 fast-math and V5+ fast-math double-precision
   1632   // (actually, keep fast-math and fast-math2 separate for now)
   1633   if (FastMath) {
   1634     setLibcallName(RTLIB::ADD_F64, "__hexagon_fast_adddf3");
   1635     setLibcallName(RTLIB::SUB_F64, "__hexagon_fast_subdf3");
   1636     setLibcallName(RTLIB::MUL_F64, "__hexagon_fast_muldf3");
   1637     setLibcallName(RTLIB::DIV_F64, "__hexagon_fast_divdf3");
   1638     // Calling __hexagon_fast2_divsf3 with fast-math on V5 (ok).
   1639     setLibcallName(RTLIB::DIV_F32, "__hexagon_fast_divsf3");
   1640   } else {
   1641     setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
   1642     setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
   1643     setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
   1644     setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3");
   1645     setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
   1646   }
   1647 
   1648   if (Subtarget.hasV5Ops()) {
   1649     if (FastMath)
   1650       setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf");
   1651     else
   1652       setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf");
   1653   } else {
   1654     // V4
   1655     setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
   1656     setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
   1657     setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
   1658     setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
   1659     setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
   1660     setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
   1661     setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
   1662     setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
   1663     setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi");
   1664     setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi");
   1665     setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi");
   1666     setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi");
   1667     setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
   1668     setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
   1669     setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
   1670     setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
   1671     setLibcallName(RTLIB::FPEXT_F32_F64,    "__hexagon_extendsfdf2");
   1672     setLibcallName(RTLIB::FPROUND_F64_F32,  "__hexagon_truncdfsf2");
   1673     setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2");
   1674     setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2");
   1675     setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2");
   1676     setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2");
   1677     setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
   1678     setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
   1679     setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
   1680     setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
   1681     setLibcallName(RTLIB::UO_F32,  "__hexagon_unordsf2");
   1682     setLibcallName(RTLIB::UO_F64,  "__hexagon_unorddf2");
   1683     setLibcallName(RTLIB::O_F32,   "__hexagon_unordsf2");
   1684     setLibcallName(RTLIB::O_F64,   "__hexagon_unorddf2");
   1685   }
   1686 
   1687   // These cause problems when the shift amount is non-constant.
   1688   setLibcallName(RTLIB::SHL_I128, nullptr);
   1689   setLibcallName(RTLIB::SRL_I128, nullptr);
   1690   setLibcallName(RTLIB::SRA_I128, nullptr);
   1691 }
   1692 
   1693 const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
   1694   switch ((HexagonISD::NodeType)Opcode) {
   1695   case HexagonISD::ADDC:          return "HexagonISD::ADDC";
   1696   case HexagonISD::SUBC:          return "HexagonISD::SUBC";
   1697   case HexagonISD::ALLOCA:        return "HexagonISD::ALLOCA";
   1698   case HexagonISD::AT_GOT:        return "HexagonISD::AT_GOT";
   1699   case HexagonISD::AT_PCREL:      return "HexagonISD::AT_PCREL";
   1700   case HexagonISD::BARRIER:       return "HexagonISD::BARRIER";
   1701   case HexagonISD::CALL:          return "HexagonISD::CALL";
   1702   case HexagonISD::CALLnr:        return "HexagonISD::CALLnr";
   1703   case HexagonISD::CALLR:         return "HexagonISD::CALLR";
   1704   case HexagonISD::COMBINE:       return "HexagonISD::COMBINE";
   1705   case HexagonISD::CONST32_GP:    return "HexagonISD::CONST32_GP";
   1706   case HexagonISD::CONST32:       return "HexagonISD::CONST32";
   1707   case HexagonISD::CP:            return "HexagonISD::CP";
   1708   case HexagonISD::DCFETCH:       return "HexagonISD::DCFETCH";
   1709   case HexagonISD::EH_RETURN:     return "HexagonISD::EH_RETURN";
   1710   case HexagonISD::TSTBIT:        return "HexagonISD::TSTBIT";
   1711   case HexagonISD::EXTRACTU:      return "HexagonISD::EXTRACTU";
   1712   case HexagonISD::INSERT:        return "HexagonISD::INSERT";
   1713   case HexagonISD::JT:            return "HexagonISD::JT";
   1714   case HexagonISD::RET_FLAG:      return "HexagonISD::RET_FLAG";
   1715   case HexagonISD::TC_RETURN:     return "HexagonISD::TC_RETURN";
   1716   case HexagonISD::VASL:          return "HexagonISD::VASL";
   1717   case HexagonISD::VASR:          return "HexagonISD::VASR";
   1718   case HexagonISD::VLSR:          return "HexagonISD::VLSR";
   1719   case HexagonISD::VSPLAT:        return "HexagonISD::VSPLAT";
   1720   case HexagonISD::VEXTRACTW:     return "HexagonISD::VEXTRACTW";
   1721   case HexagonISD::VINSERTW0:     return "HexagonISD::VINSERTW0";
   1722   case HexagonISD::VROR:          return "HexagonISD::VROR";
   1723   case HexagonISD::READCYCLE:     return "HexagonISD::READCYCLE";
   1724   case HexagonISD::VZERO:         return "HexagonISD::VZERO";
   1725   case HexagonISD::VSPLATW:       return "HexagonISD::VSPLATW";
   1726   case HexagonISD::D2P:           return "HexagonISD::D2P";
   1727   case HexagonISD::P2D:           return "HexagonISD::P2D";
   1728   case HexagonISD::V2Q:           return "HexagonISD::V2Q";
   1729   case HexagonISD::Q2V:           return "HexagonISD::Q2V";
   1730   case HexagonISD::QCAT:          return "HexagonISD::QCAT";
   1731   case HexagonISD::QTRUE:         return "HexagonISD::QTRUE";
   1732   case HexagonISD::QFALSE:        return "HexagonISD::QFALSE";
   1733   case HexagonISD::TYPECAST:      return "HexagonISD::TYPECAST";
   1734   case HexagonISD::VALIGN:        return "HexagonISD::VALIGN";
   1735   case HexagonISD::VALIGNADDR:    return "HexagonISD::VALIGNADDR";
   1736   case HexagonISD::OP_END:        break;
   1737   }
   1738   return nullptr;
   1739 }
   1740 
   1741 // Bit-reverse Load Intrinsic: Check if the instruction is a bit reverse load
   1742 // intrinsic.
   1743 static bool isBrevLdIntrinsic(const Value *Inst) {
   1744   unsigned ID = cast<IntrinsicInst>(Inst)->getIntrinsicID();
   1745   return (ID == Intrinsic::hexagon_L2_loadrd_pbr ||
   1746           ID == Intrinsic::hexagon_L2_loadri_pbr ||
   1747           ID == Intrinsic::hexagon_L2_loadrh_pbr ||
   1748           ID == Intrinsic::hexagon_L2_loadruh_pbr ||
   1749           ID == Intrinsic::hexagon_L2_loadrb_pbr ||
   1750           ID == Intrinsic::hexagon_L2_loadrub_pbr);
   1751 }
   1752 
   1753 // Bit-reverse Load Intrinsic :Crawl up and figure out the object from previous
   1754 // instruction. So far we only handle bitcast, extract value and bit reverse
   1755 // load intrinsic instructions. Should we handle CGEP ?
   1756 static Value *getBrevLdObject(Value *V) {
   1757   if (Operator::getOpcode(V) == Instruction::ExtractValue ||
   1758       Operator::getOpcode(V) == Instruction::BitCast)
   1759     V = cast<Operator>(V)->getOperand(0);
   1760   else if (isa<IntrinsicInst>(V) && isBrevLdIntrinsic(V))
   1761     V = cast<Instruction>(V)->getOperand(0);
   1762   return V;
   1763 }
   1764 
   1765 // Bit-reverse Load Intrinsic: For a PHI Node return either an incoming edge or
   1766 // a back edge. If the back edge comes from the intrinsic itself, the incoming
   1767 // edge is returned.
   1768 static Value *returnEdge(const PHINode *PN, Value *IntrBaseVal) {
   1769   const BasicBlock *Parent = PN->getParent();
   1770   int Idx = -1;
   1771   for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
   1772     BasicBlock *Blk = PN->getIncomingBlock(i);
   1773     // Determine if the back edge is originated from intrinsic.
   1774     if (Blk == Parent) {
   1775       Value *BackEdgeVal = PN->getIncomingValue(i);
   1776       Value *BaseVal;
   1777       // Loop over till we return the same Value or we hit the IntrBaseVal.
   1778       do {
   1779         BaseVal = BackEdgeVal;
   1780         BackEdgeVal = getBrevLdObject(BackEdgeVal);
   1781       } while ((BaseVal != BackEdgeVal) && (IntrBaseVal != BackEdgeVal));
   1782       // If the getBrevLdObject returns IntrBaseVal, we should return the
   1783       // incoming edge.
   1784       if (IntrBaseVal == BackEdgeVal)
   1785         continue;
   1786       Idx = i;
   1787       break;
   1788     } else // Set the node to incoming edge.
   1789       Idx = i;
   1790   }
   1791   assert(Idx >= 0 && "Unexpected index to incoming argument in PHI");
   1792   return PN->getIncomingValue(Idx);
   1793 }
   1794 
   1795 // Bit-reverse Load Intrinsic: Figure out the underlying object the base
   1796 // pointer points to, for the bit-reverse load intrinsic. Setting this to
   1797 // memoperand might help alias analysis to figure out the dependencies.
   1798 static Value *getUnderLyingObjectForBrevLdIntr(Value *V) {
   1799   Value *IntrBaseVal = V;
   1800   Value *BaseVal;
   1801   // Loop over till we return the same Value, implies we either figure out
   1802   // the object or we hit a PHI
   1803   do {
   1804     BaseVal = V;
   1805     V = getBrevLdObject(V);
   1806   } while (BaseVal != V);
   1807 
   1808   // Identify the object from PHINode.
   1809   if (const PHINode *PN = dyn_cast<PHINode>(V))
   1810     return returnEdge(PN, IntrBaseVal);
   1811   // For non PHI nodes, the object is the last value returned by getBrevLdObject
   1812   else
   1813     return V;
   1814 }
   1815 
   1816 /// Given an intrinsic, checks if on the target the intrinsic will need to map
   1817 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
   1818 /// true and store the intrinsic information into the IntrinsicInfo that was
   1819 /// passed to the function.
   1820 bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
   1821                                                const CallInst &I,
   1822                                                MachineFunction &MF,
   1823                                                unsigned Intrinsic) const {
   1824   switch (Intrinsic) {
   1825   case Intrinsic::hexagon_L2_loadrd_pbr:
   1826   case Intrinsic::hexagon_L2_loadri_pbr:
   1827   case Intrinsic::hexagon_L2_loadrh_pbr:
   1828   case Intrinsic::hexagon_L2_loadruh_pbr:
   1829   case Intrinsic::hexagon_L2_loadrb_pbr:
   1830   case Intrinsic::hexagon_L2_loadrub_pbr: {
   1831     Info.opc = ISD::INTRINSIC_W_CHAIN;
   1832     auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
   1833     auto &Cont = I.getCalledFunction()->getParent()->getContext();
   1834     // The intrinsic function call is of the form { ElTy, i8* }
   1835     // @llvm.hexagon.L2.loadXX.pbr(i8*, i32). The pointer and memory access type
   1836     // should be derived from ElTy.
   1837     PointerType *PtrTy = I.getCalledFunction()
   1838                              ->getReturnType()
   1839                              ->getContainedType(0)
   1840                              ->getPointerTo();
   1841     Info.memVT = MVT::getVT(PtrTy->getElementType());
   1842     llvm::Value *BasePtrVal = I.getOperand(0);
   1843     Info.ptrVal = getUnderLyingObjectForBrevLdIntr(BasePtrVal);
   1844     // The offset value comes through Modifier register. For now, assume the
   1845     // offset is 0.
   1846     Info.offset = 0;
   1847     Info.align = DL.getABITypeAlignment(Info.memVT.getTypeForEVT(Cont));
   1848     Info.flags = MachineMemOperand::MOLoad;
   1849     return true;
   1850   }
   1851   case Intrinsic::hexagon_V6_vgathermw:
   1852   case Intrinsic::hexagon_V6_vgathermw_128B:
   1853   case Intrinsic::hexagon_V6_vgathermh:
   1854   case Intrinsic::hexagon_V6_vgathermh_128B:
   1855   case Intrinsic::hexagon_V6_vgathermhw:
   1856   case Intrinsic::hexagon_V6_vgathermhw_128B:
   1857   case Intrinsic::hexagon_V6_vgathermwq:
   1858   case Intrinsic::hexagon_V6_vgathermwq_128B:
   1859   case Intrinsic::hexagon_V6_vgathermhq:
   1860   case Intrinsic::hexagon_V6_vgathermhq_128B:
   1861   case Intrinsic::hexagon_V6_vgathermhwq:
   1862   case Intrinsic::hexagon_V6_vgathermhwq_128B: {
   1863     const Module &M = *I.getParent()->getParent()->getParent();
   1864     Info.opc = ISD::INTRINSIC_W_CHAIN;
   1865     Type *VecTy = I.getArgOperand(1)->getType();
   1866     Info.memVT = MVT::getVT(VecTy);
   1867     Info.ptrVal = I.getArgOperand(0);
   1868     Info.offset = 0;
   1869     Info.align = M.getDataLayout().getTypeAllocSizeInBits(VecTy) / 8;
   1870     Info.flags = MachineMemOperand::MOLoad |
   1871                  MachineMemOperand::MOStore |
   1872                  MachineMemOperand::MOVolatile;
   1873     return true;
   1874   }
   1875   default:
   1876     break;
   1877   }
   1878   return false;
   1879 }
   1880 
   1881 bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
   1882   return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2));
   1883 }
   1884 
   1885 bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
   1886   if (!VT1.isSimple() || !VT2.isSimple())
   1887     return false;
   1888   return VT1.getSimpleVT() == MVT::i64 && VT2.getSimpleVT() == MVT::i32;
   1889 }
   1890 
   1891 bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
   1892   return isOperationLegalOrCustom(ISD::FMA, VT);
   1893 }
   1894 
   1895 // Should we expand the build vector with shuffles?
   1896 bool HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT,
   1897       unsigned DefinedValues) const {
   1898   return false;
   1899 }
   1900 
   1901 bool HexagonTargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask,
   1902                                                EVT VT) const {
   1903   return true;
   1904 }
   1905 
   1906 TargetLoweringBase::LegalizeTypeAction
   1907 HexagonTargetLowering::getPreferredVectorAction(EVT VT) const {
   1908   if (VT.getVectorNumElements() == 1)
   1909     return TargetLoweringBase::TypeScalarizeVector;
   1910 
   1911   // Always widen vectors of i1.
   1912   MVT ElemTy = VT.getSimpleVT().getVectorElementType();
   1913   if (ElemTy == MVT::i1)
   1914     return TargetLoweringBase::TypeWidenVector;
   1915 
   1916   if (Subtarget.useHVXOps()) {
   1917     // If the size of VT is at least half of the vector length,
   1918     // widen the vector. Note: the threshold was not selected in
   1919     // any scientific way.
   1920     ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
   1921     if (llvm::find(Tys, ElemTy) != Tys.end()) {
   1922       unsigned HwWidth = 8*Subtarget.getVectorLength();
   1923       unsigned VecWidth = VT.getSizeInBits();
   1924       if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
   1925         return TargetLoweringBase::TypeWidenVector;
   1926     }
   1927   }
   1928   return TargetLoweringBase::TypeSplitVector;
   1929 }
   1930 
   1931 std::pair<SDValue, int>
   1932 HexagonTargetLowering::getBaseAndOffset(SDValue Addr) const {
   1933   if (Addr.getOpcode() == ISD::ADD) {
   1934     SDValue Op1 = Addr.getOperand(1);
   1935     if (auto *CN = dyn_cast<const ConstantSDNode>(Op1.getNode()))
   1936       return { Addr.getOperand(0), CN->getSExtValue() };
   1937   }
   1938   return { Addr, 0 };
   1939 }
   1940 
   1941 // Lower a vector shuffle (V1, V2, V3).  V1 and V2 are the two vectors
   1942 // to select data from, V3 is the permutation.
   1943 SDValue
   1944 HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
   1945       const {
   1946   const auto *SVN = cast<ShuffleVectorSDNode>(Op);
   1947   ArrayRef<int> AM = SVN->getMask();
   1948   assert(AM.size() <= 8 && "Unexpected shuffle mask");
   1949   unsigned VecLen = AM.size();
   1950 
   1951   MVT VecTy = ty(Op);
   1952   assert(!Subtarget.isHVXVectorType(VecTy, true) &&
   1953          "HVX shuffles should be legal");
   1954   assert(VecTy.getSizeInBits() <= 64 && "Unexpected vector length");
   1955 
   1956   SDValue Op0 = Op.getOperand(0);
   1957   SDValue Op1 = Op.getOperand(1);
   1958   const SDLoc &dl(Op);
   1959 
   1960   // If the inputs are not the same as the output, bail. This is not an
   1961   // error situation, but complicates the handling and the default expansion
   1962   // (into BUILD_VECTOR) should be adequate.
   1963   if (ty(Op0) != VecTy || ty(Op1) != VecTy)
   1964     return SDValue();
   1965 
   1966   // Normalize the mask so that the first non-negative index comes from
   1967   // the first operand.
   1968   SmallVector<int,8> Mask(AM.begin(), AM.end());
   1969   unsigned F = llvm::find_if(AM, [](int M) { return M >= 0; }) - AM.data();
   1970   if (F == AM.size())
   1971     return DAG.getUNDEF(VecTy);
   1972   if (AM[F] >= int(VecLen)) {
   1973     ShuffleVectorSDNode::commuteMask(Mask);
   1974     std::swap(Op0, Op1);
   1975   }
   1976 
   1977   // Express the shuffle mask in terms of bytes.
   1978   SmallVector<int,8> ByteMask;
   1979   unsigned ElemBytes = VecTy.getVectorElementType().getSizeInBits() / 8;
   1980   for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
   1981     int M = Mask[i];
   1982     if (M < 0) {
   1983       for (unsigned j = 0; j != ElemBytes; ++j)
   1984         ByteMask.push_back(-1);
   1985     } else {
   1986       for (unsigned j = 0; j != ElemBytes; ++j)
   1987         ByteMask.push_back(M*ElemBytes + j);
   1988     }
   1989   }
   1990   assert(ByteMask.size() <= 8);
   1991 
   1992   // All non-undef (non-negative) indexes are well within [0..127], so they
   1993   // fit in a single byte. Build two 64-bit words:
   1994   // - MaskIdx where each byte is the corresponding index (for non-negative
   1995   //   indexes), and 0xFF for negative indexes, and
   1996   // - MaskUnd that has 0xFF for each negative index.
   1997   uint64_t MaskIdx = 0;
   1998   uint64_t MaskUnd = 0;
   1999   for (unsigned i = 0, e = ByteMask.size(); i != e; ++i) {
   2000     unsigned S = 8*i;
   2001     uint64_t M = ByteMask[i] & 0xFF;
   2002     if (M == 0xFF)
   2003       MaskUnd |= M << S;
   2004     MaskIdx |= M << S;
   2005   }
   2006 
   2007   if (ByteMask.size() == 4) {
   2008     // Identity.
   2009     if (MaskIdx == (0x03020100 | MaskUnd))
   2010       return Op0;
   2011     // Byte swap.
   2012     if (MaskIdx == (0x00010203 | MaskUnd)) {
   2013       SDValue T0 = DAG.getBitcast(MVT::i32, Op0);
   2014       SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i32, T0);
   2015       return DAG.getBitcast(VecTy, T1);
   2016     }
   2017 
   2018     // Byte packs.
   2019     SDValue Concat10 = DAG.getNode(HexagonISD::COMBINE, dl,
   2020                                    typeJoin({ty(Op1), ty(Op0)}), {Op1, Op0});
   2021     if (MaskIdx == (0x06040200 | MaskUnd))
   2022       return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat10}, DAG);
   2023     if (MaskIdx == (0x07050301 | MaskUnd))
   2024       return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat10}, DAG);
   2025 
   2026     SDValue Concat01 = DAG.getNode(HexagonISD::COMBINE, dl,
   2027                                    typeJoin({ty(Op0), ty(Op1)}), {Op0, Op1});
   2028     if (MaskIdx == (0x02000604 | MaskUnd))
   2029       return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat01}, DAG);
   2030     if (MaskIdx == (0x03010705 | MaskUnd))
   2031       return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat01}, DAG);
   2032   }
   2033 
   2034   if (ByteMask.size() == 8) {
   2035     // Identity.
   2036     if (MaskIdx == (0x0706050403020100ull | MaskUnd))
   2037       return Op0;
   2038     // Byte swap.
   2039     if (MaskIdx == (0x0001020304050607ull | MaskUnd)) {
   2040       SDValue T0 = DAG.getBitcast(MVT::i64, Op0);
   2041       SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i64, T0);
   2042       return DAG.getBitcast(VecTy, T1);
   2043     }
   2044 
   2045     // Halfword picks.
   2046     if (MaskIdx == (0x0d0c050409080100ull | MaskUnd))
   2047       return getInstr(Hexagon::S2_shuffeh, dl, VecTy, {Op1, Op0}, DAG);
   2048     if (MaskIdx == (0x0f0e07060b0a0302ull | MaskUnd))
   2049       return getInstr(Hexagon::S2_shuffoh, dl, VecTy, {Op1, Op0}, DAG);
   2050     if (MaskIdx == (0x0d0c090805040100ull | MaskUnd))
   2051       return getInstr(Hexagon::S2_vtrunewh, dl, VecTy, {Op1, Op0}, DAG);
   2052     if (MaskIdx == (0x0f0e0b0a07060302ull | MaskUnd))
   2053       return getInstr(Hexagon::S2_vtrunowh, dl, VecTy, {Op1, Op0}, DAG);
   2054     if (MaskIdx == (0x0706030205040100ull | MaskUnd)) {
   2055       VectorPair P = opSplit(Op0, dl, DAG);
   2056       return getInstr(Hexagon::S2_packhl, dl, VecTy, {P.second, P.first}, DAG);
   2057     }
   2058 
   2059     // Byte packs.
   2060     if (MaskIdx == (0x0e060c040a020800ull | MaskUnd))
   2061       return getInstr(Hexagon::S2_shuffeb, dl, VecTy, {Op1, Op0}, DAG);
   2062     if (MaskIdx == (0x0f070d050b030901ull | MaskUnd))
   2063       return getInstr(Hexagon::S2_shuffob, dl, VecTy, {Op1, Op0}, DAG);
   2064   }
   2065 
   2066   return SDValue();
   2067 }
   2068 
   2069 // Create a Hexagon-specific node for shifting a vector by an integer.
   2070 SDValue
   2071 HexagonTargetLowering::getVectorShiftByInt(SDValue Op, SelectionDAG &DAG)
   2072       const {
   2073   if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode())) {
   2074     if (SDValue S = BVN->getSplatValue()) {
   2075       unsigned NewOpc;
   2076       switch (Op.getOpcode()) {
   2077         case ISD::SHL:
   2078           NewOpc = HexagonISD::VASL;
   2079           break;
   2080         case ISD::SRA:
   2081           NewOpc = HexagonISD::VASR;
   2082           break;
   2083         case ISD::SRL:
   2084           NewOpc = HexagonISD::VLSR;
   2085           break;
   2086         default:
   2087           llvm_unreachable("Unexpected shift opcode");
   2088       }
   2089       return DAG.getNode(NewOpc, SDLoc(Op), ty(Op), Op.getOperand(0), S);
   2090     }
   2091   }
   2092 
   2093   return SDValue();
   2094 }
   2095 
   2096 SDValue
   2097 HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const {
   2098   return getVectorShiftByInt(Op, DAG);
   2099 }
   2100 
   2101 SDValue
   2102 HexagonTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
   2103   if (isa<ConstantSDNode>(Op.getOperand(1).getNode()))
   2104     return Op;
   2105   return SDValue();
   2106 }
   2107 
   2108 SDValue
   2109 HexagonTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
   2110   MVT ResTy = ty(Op);
   2111   SDValue InpV = Op.getOperand(0);
   2112   MVT InpTy = ty(InpV);
   2113   assert(ResTy.getSizeInBits() == InpTy.getSizeInBits());
   2114   const SDLoc &dl(Op);
   2115 
   2116   // Handle conversion from i8 to v8i1.
   2117   if (ResTy == MVT::v8i1) {
   2118     SDValue Sc = DAG.getBitcast(tyScalar(InpTy), InpV);
   2119     SDValue Ext = DAG.getZExtOrTrunc(Sc, dl, MVT::i32);
   2120     return getInstr(Hexagon::C2_tfrrp, dl, ResTy, Ext, DAG);
   2121   }
   2122 
   2123   return SDValue();
   2124 }
   2125 
   2126 bool
   2127 HexagonTargetLowering::getBuildVectorConstInts(ArrayRef<SDValue> Values,
   2128       MVT VecTy, SelectionDAG &DAG,
   2129       MutableArrayRef<ConstantInt*> Consts) const {
   2130   MVT ElemTy = VecTy.getVectorElementType();
   2131   unsigned ElemWidth = ElemTy.getSizeInBits();
   2132   IntegerType *IntTy = IntegerType::get(*DAG.getContext(), ElemWidth);
   2133   bool AllConst = true;
   2134 
   2135   for (unsigned i = 0, e = Values.size(); i != e; ++i) {
   2136     SDValue V = Values[i];
   2137     if (V.isUndef()) {
   2138       Consts[i] = ConstantInt::get(IntTy, 0);
   2139       continue;
   2140     }
   2141     // Make sure to always cast to IntTy.
   2142     if (auto *CN = dyn_cast<ConstantSDNode>(V.getNode())) {
   2143       const ConstantInt *CI = CN->getConstantIntValue();
   2144       Consts[i] = ConstantInt::get(IntTy, CI->getValue().getSExtValue());
   2145     } else if (auto *CN = dyn_cast<ConstantFPSDNode>(V.getNode())) {
   2146       const ConstantFP *CF = CN->getConstantFPValue();
   2147       APInt A = CF->getValueAPF().bitcastToAPInt();
   2148       Consts[i] = ConstantInt::get(IntTy, A.getZExtValue());
   2149     } else {
   2150       AllConst = false;
   2151     }
   2152   }
   2153   return AllConst;
   2154 }
   2155 
   2156 SDValue
   2157 HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl,
   2158                                      MVT VecTy, SelectionDAG &DAG) const {
   2159   MVT ElemTy = VecTy.getVectorElementType();
   2160   assert(VecTy.getVectorNumElements() == Elem.size());
   2161 
   2162   SmallVector<ConstantInt*,4> Consts(Elem.size());
   2163   bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts);
   2164 
   2165   unsigned First, Num = Elem.size();
   2166   for (First = 0; First != Num; ++First)
   2167     if (!isUndef(Elem[First]))
   2168       break;
   2169   if (First == Num)
   2170     return DAG.getUNDEF(VecTy);
   2171 
   2172   if (AllConst &&
   2173       llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); }))
   2174     return getZero(dl, VecTy, DAG);
   2175 
   2176   if (ElemTy == MVT::i16) {
   2177     assert(Elem.size() == 2);
   2178     if (AllConst) {
   2179       uint32_t V = (Consts[0]->getZExtValue() & 0xFFFF) |
   2180                    Consts[1]->getZExtValue() << 16;
   2181       return DAG.getBitcast(MVT::v2i16, DAG.getConstant(V, dl, MVT::i32));
   2182     }
   2183     SDValue N = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32,
   2184                          {Elem[1], Elem[0]}, DAG);
   2185     return DAG.getBitcast(MVT::v2i16, N);
   2186   }
   2187 
   2188   if (ElemTy == MVT::i8) {
   2189     // First try generating a constant.
   2190     if (AllConst) {
   2191       int32_t V = (Consts[0]->getZExtValue() & 0xFF) |
   2192                   (Consts[1]->getZExtValue() & 0xFF) << 8 |
   2193                   (Consts[1]->getZExtValue() & 0xFF) << 16 |
   2194                   Consts[2]->getZExtValue() << 24;
   2195       return DAG.getBitcast(MVT::v4i8, DAG.getConstant(V, dl, MVT::i32));
   2196     }
   2197 
   2198     // Then try splat.
   2199     bool IsSplat = true;
   2200     for (unsigned i = 0; i != Num; ++i) {
   2201       if (i == First)
   2202         continue;
   2203       if (Elem[i] == Elem[First] || isUndef(Elem[i]))
   2204         continue;
   2205       IsSplat = false;
   2206       break;
   2207     }
   2208     if (IsSplat) {
   2209       // Legalize the operand to VSPLAT.
   2210       SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32);
   2211       return DAG.getNode(HexagonISD::VSPLAT, dl, VecTy, Ext);
   2212     }
   2213 
   2214     // Generate
   2215     //   (zxtb(Elem[0]) | (zxtb(Elem[1]) << 8)) |
   2216     //   (zxtb(Elem[2]) | (zxtb(Elem[3]) << 8)) << 16
   2217     assert(Elem.size() == 4);
   2218     SDValue Vs[4];
   2219     for (unsigned i = 0; i != 4; ++i) {
   2220       Vs[i] = DAG.getZExtOrTrunc(Elem[i], dl, MVT::i32);
   2221       Vs[i] = DAG.getZeroExtendInReg(Vs[i], dl, MVT::i8);
   2222     }
   2223     SDValue S8 = DAG.getConstant(8, dl, MVT::i32);
   2224     SDValue T0 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[1], S8});
   2225     SDValue T1 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[3], S8});
   2226     SDValue B0 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[0], T0});
   2227     SDValue B1 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[2], T1});
   2228 
   2229     SDValue R = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32, {B1, B0}, DAG);
   2230     return DAG.getBitcast(MVT::v4i8, R);
   2231   }
   2232 
   2233 #ifndef NDEBUG
   2234   dbgs() << "VecTy: " << EVT(VecTy).getEVTString() << '\n';
   2235 #endif
   2236   llvm_unreachable("Unexpected vector element type");
   2237 }
   2238 
   2239 SDValue
   2240 HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl,
   2241                                      MVT VecTy, SelectionDAG &DAG) const {
   2242   MVT ElemTy = VecTy.getVectorElementType();
   2243   assert(VecTy.getVectorNumElements() == Elem.size());
   2244 
   2245   SmallVector<ConstantInt*,8> Consts(Elem.size());
   2246   bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts);
   2247 
   2248   unsigned First, Num = Elem.size();
   2249   for (First = 0; First != Num; ++First)
   2250     if (!isUndef(Elem[First]))
   2251       break;
   2252   if (First == Num)
   2253     return DAG.getUNDEF(VecTy);
   2254 
   2255   if (AllConst &&
   2256       llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); }))
   2257     return getZero(dl, VecTy, DAG);
   2258 
   2259   // First try splat if possible.
   2260   if (ElemTy == MVT::i16) {
   2261     bool IsSplat = true;
   2262     for (unsigned i = 0; i != Num; ++i) {
   2263       if (i == First)
   2264         continue;
   2265       if (Elem[i] == Elem[First] || isUndef(Elem[i]))
   2266         continue;
   2267       IsSplat = false;
   2268       break;
   2269     }
   2270     if (IsSplat) {
   2271       // Legalize the operand to VSPLAT.
   2272       SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32);
   2273       return DAG.getNode(HexagonISD::VSPLAT, dl, VecTy, Ext);
   2274     }
   2275   }
   2276 
   2277   // Then try constant.
   2278   if (AllConst) {
   2279     uint64_t Val = 0;
   2280     unsigned W = ElemTy.getSizeInBits();
   2281     uint64_t Mask = (ElemTy == MVT::i8)  ? 0xFFull
   2282                   : (ElemTy == MVT::i16) ? 0xFFFFull : 0xFFFFFFFFull;
   2283     for (unsigned i = 0; i != Num; ++i)
   2284       Val = (Val << W) | (Consts[Num-1-i]->getZExtValue() & Mask);
   2285     SDValue V0 = DAG.getConstant(Val, dl, MVT::i64);
   2286     return DAG.getBitcast(VecTy, V0);
   2287   }
   2288 
   2289   // Build two 32-bit vectors and concatenate.
   2290   MVT HalfTy = MVT::getVectorVT(ElemTy, Num/2);
   2291   SDValue L = (ElemTy == MVT::i32)
   2292                 ? Elem[0]
   2293                 : buildVector32(Elem.take_front(Num/2), dl, HalfTy, DAG);
   2294   SDValue H = (ElemTy == MVT::i32)
   2295                 ? Elem[1]
   2296                 : buildVector32(Elem.drop_front(Num/2), dl, HalfTy, DAG);
   2297   return DAG.getNode(HexagonISD::COMBINE, dl, VecTy, {H, L});
   2298 }
   2299 
   2300 SDValue
   2301 HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV,
   2302                                      const SDLoc &dl, MVT ValTy, MVT ResTy,
   2303                                      SelectionDAG &DAG) const {
   2304   MVT VecTy = ty(VecV);
   2305   assert(!ValTy.isVector() ||
   2306          VecTy.getVectorElementType() == ValTy.getVectorElementType());
   2307   unsigned VecWidth = VecTy.getSizeInBits();
   2308   unsigned ValWidth = ValTy.getSizeInBits();
   2309   unsigned ElemWidth = VecTy.getVectorElementType().getSizeInBits();
   2310   assert((VecWidth % ElemWidth) == 0);
   2311   auto *IdxN = dyn_cast<ConstantSDNode>(IdxV);
   2312 
   2313   // Special case for v{8,4,2}i1 (the only boolean vectors legal in Hexagon
   2314   // without any coprocessors).
   2315   if (ElemWidth == 1) {
   2316     assert(VecWidth == VecTy.getVectorNumElements() && "Sanity failure");
   2317     assert(VecWidth == 8 || VecWidth == 4 || VecWidth == 2);
   2318     // Check if this is an extract of the lowest bit.
   2319     if (IdxN) {
   2320       // Extracting the lowest bit is a no-op, but it changes the type,
   2321       // so it must be kept as an operation to avoid errors related to
   2322       // type mismatches.
   2323       if (IdxN->isNullValue() && ValTy.getSizeInBits() == 1)
   2324         return DAG.getNode(HexagonISD::TYPECAST, dl, MVT::i1, VecV);
   2325     }
   2326 
   2327     // If the value extracted is a single bit, use tstbit.
   2328     if (ValWidth == 1) {
   2329       SDValue A0 = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, {VecV}, DAG);
   2330       SDValue M0 = DAG.getConstant(8 / VecWidth, dl, MVT::i32);
   2331       SDValue I0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, M0);
   2332       return DAG.getNode(HexagonISD::TSTBIT, dl, MVT::i1, A0, I0);
   2333     }
   2334 
   2335     // Each bool vector (v2i1, v4i1, v8i1) always occupies 8 bits in
   2336     // a predicate register. The elements of the vector are repeated
   2337     // in the register (if necessary) so that the total number is 8.
   2338     // The extracted subvector will need to be expanded in such a way.
   2339     unsigned Scale = VecWidth / ValWidth;
   2340 
   2341     // Generate (p2d VecV) >> 8*Idx to move the interesting bytes to
   2342     // position 0.
   2343     assert(ty(IdxV) == MVT::i32);
   2344     SDValue S0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
   2345                              DAG.getConstant(8*Scale, dl, MVT::i32));
   2346     SDValue T0 = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
   2347     SDValue T1 = DAG.getNode(ISD::SRL, dl, MVT::i64, T0, S0);
   2348     while (Scale > 1) {
   2349       // The longest possible subvector is at most 32 bits, so it is always
   2350       // contained in the low subregister.
   2351       T1 = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, T1);
   2352       T1 = expandPredicate(T1, dl, DAG);
   2353       Scale /= 2;
   2354     }
   2355 
   2356     return DAG.getNode(HexagonISD::D2P, dl, ResTy, T1);
   2357   }
   2358 
   2359   assert(VecWidth == 32 || VecWidth == 64);
   2360 
   2361   // Cast everything to scalar integer types.
   2362   MVT ScalarTy = tyScalar(VecTy);
   2363   VecV = DAG.getBitcast(ScalarTy, VecV);
   2364 
   2365   SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32);
   2366   SDValue ExtV;
   2367 
   2368   if (IdxN) {
   2369     unsigned Off = IdxN->getZExtValue() * ElemWidth;
   2370     if (VecWidth == 64 && ValWidth == 32) {
   2371       assert(Off == 0 || Off == 32);
   2372       unsigned SubIdx = Off == 0 ? Hexagon::isub_lo : Hexagon::isub_hi;
   2373       ExtV = DAG.getTargetExtractSubreg(SubIdx, dl, MVT::i32, VecV);
   2374     } else if (Off == 0 && (ValWidth % 8) == 0) {
   2375       ExtV = DAG.getZeroExtendInReg(VecV, dl, tyScalar(ValTy));
   2376     } else {
   2377       SDValue OffV = DAG.getConstant(Off, dl, MVT::i32);
   2378       // The return type of EXTRACTU must be the same as the type of the
   2379       // input vector.
   2380       ExtV = DAG.getNode(HexagonISD::EXTRACTU, dl, ScalarTy,
   2381                          {VecV, WidthV, OffV});
   2382     }
   2383   } else {
   2384     if (ty(IdxV) != MVT::i32)
   2385       IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32);
   2386     SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
   2387                                DAG.getConstant(ElemWidth, dl, MVT::i32));
   2388     ExtV = DAG.getNode(HexagonISD::EXTRACTU, dl, ScalarTy,
   2389                        {VecV, WidthV, OffV});
   2390   }
   2391 
   2392   // Cast ExtV to the requested result type.
   2393   ExtV = DAG.getZExtOrTrunc(ExtV, dl, tyScalar(ResTy));
   2394   ExtV = DAG.getBitcast(ResTy, ExtV);
   2395   return ExtV;
   2396 }
   2397 
   2398 SDValue
   2399 HexagonTargetLowering::insertVector(SDValue VecV, SDValue ValV, SDValue IdxV,
   2400                                     const SDLoc &dl, MVT ValTy,
   2401                                     SelectionDAG &DAG) const {
   2402   MVT VecTy = ty(VecV);
   2403   if (VecTy.getVectorElementType() == MVT::i1) {
   2404     MVT ValTy = ty(ValV);
   2405     assert(ValTy.getVectorElementType() == MVT::i1);
   2406     SDValue ValR = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, ValV);
   2407     unsigned VecLen = VecTy.getVectorNumElements();
   2408     unsigned Scale = VecLen / ValTy.getVectorNumElements();
   2409     assert(Scale > 1);
   2410 
   2411     for (unsigned R = Scale; R > 1; R /= 2) {
   2412       ValR = contractPredicate(ValR, dl, DAG);
   2413       ValR = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
   2414                          DAG.getUNDEF(MVT::i32), ValR);
   2415     }
   2416     // The longest possible subvector is at most 32 bits, so it is always
   2417     // contained in the low subregister.
   2418     ValR = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, ValR);
   2419 
   2420     unsigned ValBytes = 64 / Scale;
   2421     SDValue Width = DAG.getConstant(ValBytes*8, dl, MVT::i32);
   2422     SDValue Idx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
   2423                               DAG.getConstant(8, dl, MVT::i32));
   2424     SDValue VecR = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
   2425     SDValue Ins = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32,
   2426                               {VecR, ValR, Width, Idx});
   2427     return DAG.getNode(HexagonISD::D2P, dl, VecTy, Ins);
   2428   }
   2429 
   2430   unsigned VecWidth = VecTy.getSizeInBits();
   2431   unsigned ValWidth = ValTy.getSizeInBits();
   2432   assert(VecWidth == 32 || VecWidth == 64);
   2433   assert((VecWidth % ValWidth) == 0);
   2434 
   2435   // Cast everything to scalar integer types.
   2436   MVT ScalarTy = MVT::getIntegerVT(VecWidth);
   2437   // The actual type of ValV may be different than ValTy (which is related
   2438   // to the vector type).
   2439   unsigned VW = ty(ValV).getSizeInBits();
   2440   ValV = DAG.getBitcast(MVT::getIntegerVT(VW), ValV);
   2441   VecV = DAG.getBitcast(ScalarTy, VecV);
   2442   if (VW != VecWidth)
   2443     ValV = DAG.getAnyExtOrTrunc(ValV, dl, ScalarTy);
   2444 
   2445   SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32);
   2446   SDValue InsV;
   2447 
   2448   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(IdxV)) {
   2449     unsigned W = C->getZExtValue() * ValWidth;
   2450     SDValue OffV = DAG.getConstant(W, dl, MVT::i32);
   2451     InsV = DAG.getNode(HexagonISD::INSERT, dl, ScalarTy,
   2452                        {VecV, ValV, WidthV, OffV});
   2453   } else {
   2454     if (ty(IdxV) != MVT::i32)
   2455       IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32);
   2456     SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, WidthV);
   2457     InsV = DAG.getNode(HexagonISD::INSERT, dl, ScalarTy,
   2458                        {VecV, ValV, WidthV, OffV});
   2459   }
   2460 
   2461   return DAG.getNode(ISD::BITCAST, dl, VecTy, InsV);
   2462 }
   2463 
   2464 SDValue
   2465 HexagonTargetLowering::expandPredicate(SDValue Vec32, const SDLoc &dl,
   2466                                        SelectionDAG &DAG) const {
   2467   assert(ty(Vec32).getSizeInBits() == 32);
   2468   if (isUndef(Vec32))
   2469     return DAG.getUNDEF(MVT::i64);
   2470   return getInstr(Hexagon::S2_vsxtbh, dl, MVT::i64, {Vec32}, DAG);
   2471 }
   2472 
   2473 SDValue
   2474 HexagonTargetLowering::contractPredicate(SDValue Vec64, const SDLoc &dl,
   2475                                          SelectionDAG &DAG) const {
   2476   assert(ty(Vec64).getSizeInBits() == 64);
   2477   if (isUndef(Vec64))
   2478     return DAG.getUNDEF(MVT::i32);
   2479   return getInstr(Hexagon::S2_vtrunehb, dl, MVT::i32, {Vec64}, DAG);
   2480 }
   2481 
   2482 SDValue
   2483 HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG)
   2484       const {
   2485   if (Ty.isVector()) {
   2486     assert(Ty.isInteger() && "Only integer vectors are supported here");
   2487     unsigned W = Ty.getSizeInBits();
   2488     if (W <= 64)
   2489       return DAG.getBitcast(Ty, DAG.getConstant(0, dl, MVT::getIntegerVT(W)));
   2490     return DAG.getNode(HexagonISD::VZERO, dl, Ty);
   2491   }
   2492 
   2493   if (Ty.isInteger())
   2494     return DAG.getConstant(0, dl, Ty);
   2495   if (Ty.isFloatingPoint())
   2496     return DAG.getConstantFP(0.0, dl, Ty);
   2497   llvm_unreachable("Invalid type for zero");
   2498 }
   2499 
   2500 SDValue
   2501 HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   2502   MVT VecTy = ty(Op);
   2503   unsigned BW = VecTy.getSizeInBits();
   2504   const SDLoc &dl(Op);
   2505   SmallVector<SDValue,8> Ops;
   2506   for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i)
   2507     Ops.push_back(Op.getOperand(i));
   2508 
   2509   if (BW == 32)
   2510     return buildVector32(Ops, dl, VecTy, DAG);
   2511   if (BW == 64)
   2512     return buildVector64(Ops, dl, VecTy, DAG);
   2513 
   2514   if (VecTy == MVT::v8i1 || VecTy == MVT::v4i1 || VecTy == MVT::v2i1) {
   2515     // For each i1 element in the resulting predicate register, put 1
   2516     // shifted by the index of the element into a general-purpose register,
   2517     // then or them together and transfer it back into a predicate register.
   2518     SDValue Rs[8];
   2519     SDValue Z = getZero(dl, MVT::i32, DAG);
   2520     // Always produce 8 bits, repeat inputs if necessary.
   2521     unsigned Rep = 8 / VecTy.getVectorNumElements();
   2522     for (unsigned i = 0; i != 8; ++i) {
   2523       SDValue S = DAG.getConstant(1ull << i, dl, MVT::i32);
   2524       Rs[i] = DAG.getSelect(dl, MVT::i32, Ops[i/Rep], S, Z);
   2525     }
   2526     for (ArrayRef<SDValue> A(Rs); A.size() != 1; A = A.drop_back(A.size()/2)) {
   2527       for (unsigned i = 0, e = A.size()/2; i != e; ++i)
   2528         Rs[i] = DAG.getNode(ISD::OR, dl, MVT::i32, Rs[2*i], Rs[2*i+1]);
   2529     }
   2530     // Move the value directly to a predicate register.
   2531     return getInstr(Hexagon::C2_tfrrp, dl, VecTy, {Rs[0]}, DAG);
   2532   }
   2533 
   2534   return SDValue();
   2535 }
   2536 
   2537 SDValue
   2538 HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
   2539                                            SelectionDAG &DAG) const {
   2540   MVT VecTy = ty(Op);
   2541   const SDLoc &dl(Op);
   2542   if (VecTy.getSizeInBits() == 64) {
   2543     assert(Op.getNumOperands() == 2);
   2544     return DAG.getNode(HexagonISD::COMBINE, dl, VecTy, Op.getOperand(1),
   2545                        Op.getOperand(0));
   2546   }
   2547 
   2548   MVT ElemTy = VecTy.getVectorElementType();
   2549   if (ElemTy == MVT::i1) {
   2550     assert(VecTy == MVT::v2i1 || VecTy == MVT::v4i1 || VecTy == MVT::v8i1);
   2551     MVT OpTy = ty(Op.getOperand(0));
   2552     // Scale is how many times the operands need to be contracted to match
   2553     // the representation in the target register.
   2554     unsigned Scale = VecTy.getVectorNumElements() / OpTy.getVectorNumElements();
   2555     assert(Scale == Op.getNumOperands() && Scale > 1);
   2556 
   2557     // First, convert all bool vectors to integers, then generate pairwise
   2558     // inserts to form values of doubled length. Up until there are only
   2559     // two values left to concatenate, all of these values will fit in a
   2560     // 32-bit integer, so keep them as i32 to use 32-bit inserts.
   2561     SmallVector<SDValue,4> Words[2];
   2562     unsigned IdxW = 0;
   2563 
   2564     for (SDValue P : Op.getNode()->op_values()) {
   2565       SDValue W = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, P);
   2566       for (unsigned R = Scale; R > 1; R /= 2) {
   2567         W = contractPredicate(W, dl, DAG);
   2568         W = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
   2569                         DAG.getUNDEF(MVT::i32), W);
   2570       }
   2571       W = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, W);
   2572       Words[IdxW].push_back(W);
   2573     }
   2574 
   2575     while (Scale > 2) {
   2576       SDValue WidthV = DAG.getConstant(64 / Scale, dl, MVT::i32);
   2577       Words[IdxW ^ 1].clear();
   2578 
   2579       for (unsigned i = 0, e = Words[IdxW].size(); i != e; i += 2) {
   2580         SDValue W0 = Words[IdxW][i], W1 = Words[IdxW][i+1];
   2581         // Insert W1 into W0 right next to the significant bits of W0.
   2582         SDValue T = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32,
   2583                                 {W0, W1, WidthV, WidthV});
   2584         Words[IdxW ^ 1].push_back(T);
   2585       }
   2586       IdxW ^= 1;
   2587       Scale /= 2;
   2588     }
   2589 
   2590     // Another sanity check. At this point there should only be two words
   2591     // left, and Scale should be 2.
   2592     assert(Scale == 2 && Words[IdxW].size() == 2);
   2593 
   2594     SDValue WW = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
   2595                              Words[IdxW][1], Words[IdxW][0]);
   2596     return DAG.getNode(HexagonISD::D2P, dl, VecTy, WW);
   2597   }
   2598 
   2599   return SDValue();
   2600 }
   2601 
   2602 SDValue
   2603 HexagonTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
   2604                                                SelectionDAG &DAG) const {
   2605   SDValue Vec = Op.getOperand(0);
   2606   MVT ElemTy = ty(Vec).getVectorElementType();
   2607   return extractVector(Vec, Op.getOperand(1), SDLoc(Op), ElemTy, ty(Op), DAG);
   2608 }
   2609 
   2610 SDValue
   2611 HexagonTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
   2612                                               SelectionDAG &DAG) const {
   2613   return extractVector(Op.getOperand(0), Op.getOperand(1), SDLoc(Op),
   2614                        ty(Op), ty(Op), DAG);
   2615 }
   2616 
   2617 SDValue
   2618 HexagonTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
   2619                                               SelectionDAG &DAG) const {
   2620   return insertVector(Op.getOperand(0), Op.getOperand(1), Op.getOperand(2),
   2621                       SDLoc(Op), ty(Op).getVectorElementType(), DAG);
   2622 }
   2623 
   2624 SDValue
   2625 HexagonTargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
   2626                                              SelectionDAG &DAG) const {
   2627   SDValue ValV = Op.getOperand(1);
   2628   return insertVector(Op.getOperand(0), ValV, Op.getOperand(2),
   2629                       SDLoc(Op), ty(ValV), DAG);
   2630 }
   2631 
   2632 bool
   2633 HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
   2634   // Assuming the caller does not have either a signext or zeroext modifier, and
   2635   // only one value is accepted, any reasonable truncation is allowed.
   2636   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
   2637     return false;
   2638 
   2639   // FIXME: in principle up to 64-bit could be made safe, but it would be very
   2640   // fragile at the moment: any support for multiple value returns would be
   2641   // liable to disallow tail calls involving i64 -> iN truncation in many cases.
   2642   return Ty1->getPrimitiveSizeInBits() <= 32;
   2643 }
   2644 
   2645 SDValue
   2646 HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
   2647       const {
   2648   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
   2649   unsigned HaveAlign = LN->getAlignment();
   2650   MVT LoadTy = ty(Op);
   2651   unsigned NeedAlign = Subtarget.getTypeAlignment(LoadTy);
   2652   if (HaveAlign >= NeedAlign)
   2653     return Op;
   2654 
   2655   const SDLoc &dl(Op);
   2656   const DataLayout &DL = DAG.getDataLayout();
   2657   LLVMContext &Ctx = *DAG.getContext();
   2658   unsigned AS = LN->getAddressSpace();
   2659 
   2660   // If the load aligning is disabled or the load can be broken up into two
   2661   // smaller legal loads, do the default (target-independent) expansion.
   2662   bool DoDefault = false;
   2663   // Handle it in the default way if this is an indexed load.
   2664   if (!LN->isUnindexed())
   2665     DoDefault = true;
   2666 
   2667   if (!AlignLoads) {
   2668     if (allowsMemoryAccess(Ctx, DL, LN->getMemoryVT(), AS, HaveAlign))
   2669       return Op;
   2670     DoDefault = true;
   2671   }
   2672   if (!DoDefault && 2*HaveAlign == NeedAlign) {
   2673     // The PartTy is the equivalent of "getLoadableTypeOfSize(HaveAlign)".
   2674     MVT PartTy = HaveAlign <= 8 ? MVT::getIntegerVT(8*HaveAlign)
   2675                                 : MVT::getVectorVT(MVT::i8, HaveAlign);
   2676     DoDefault = allowsMemoryAccess(Ctx, DL, PartTy, AS, HaveAlign);
   2677   }
   2678   if (DoDefault) {
   2679     std::pair<SDValue, SDValue> P = expandUnalignedLoad(LN, DAG);
   2680     return DAG.getMergeValues({P.first, P.second}, dl);
   2681   }
   2682 
   2683   // The code below generates two loads, both aligned as NeedAlign, and
   2684   // with the distance of NeedAlign between them. For that to cover the
   2685   // bits that need to be loaded (and without overlapping), the size of
   2686   // the loads should be equal to NeedAlign. This is true for all loadable
   2687   // types, but add an assertion in case something changes in the future.
   2688   assert(LoadTy.getSizeInBits() == 8*NeedAlign);
   2689 
   2690   unsigned LoadLen = NeedAlign;
   2691   SDValue Base = LN->getBasePtr();
   2692   SDValue Chain = LN->getChain();
   2693   auto BO = getBaseAndOffset(Base);
   2694   unsigned BaseOpc = BO.first.getOpcode();
   2695   if (BaseOpc == HexagonISD::VALIGNADDR && BO.second % LoadLen == 0)
   2696     return Op;
   2697 
   2698   if (BO.second % LoadLen != 0) {
   2699     BO.first = DAG.getNode(ISD::ADD, dl, MVT::i32, BO.first,
   2700                            DAG.getConstant(BO.second % LoadLen, dl, MVT::i32));
   2701     BO.second -= BO.second % LoadLen;
   2702   }
   2703   SDValue BaseNoOff = (BaseOpc != HexagonISD::VALIGNADDR)
   2704       ? DAG.getNode(HexagonISD::VALIGNADDR, dl, MVT::i32, BO.first,
   2705                     DAG.getConstant(NeedAlign, dl, MVT::i32))
   2706       : BO.first;
   2707   SDValue Base0 = DAG.getMemBasePlusOffset(BaseNoOff, BO.second, dl);
   2708   SDValue Base1 = DAG.getMemBasePlusOffset(BaseNoOff, BO.second+LoadLen, dl);
   2709 
   2710   MachineMemOperand *WideMMO = nullptr;
   2711   if (MachineMemOperand *MMO = LN->getMemOperand()) {
   2712     MachineFunction &MF = DAG.getMachineFunction();
   2713     WideMMO = MF.getMachineMemOperand(MMO->getPointerInfo(), MMO->getFlags(),
   2714                     2*LoadLen, LoadLen, MMO->getAAInfo(), MMO->getRanges(),
   2715                     MMO->getSyncScopeID(), MMO->getOrdering(),
   2716                     MMO->getFailureOrdering());
   2717   }
   2718 
   2719   SDValue Load0 = DAG.getLoad(LoadTy, dl, Chain, Base0, WideMMO);
   2720   SDValue Load1 = DAG.getLoad(LoadTy, dl, Chain, Base1, WideMMO);
   2721 
   2722   SDValue Aligned = DAG.getNode(HexagonISD::VALIGN, dl, LoadTy,
   2723                                 {Load1, Load0, BaseNoOff.getOperand(0)});
   2724   SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
   2725                                  Load0.getValue(1), Load1.getValue(1));
   2726   SDValue M = DAG.getMergeValues({Aligned, NewChain}, dl);
   2727   return M;
   2728 }
   2729 
   2730 SDValue
   2731 HexagonTargetLowering::LowerAddSubCarry(SDValue Op, SelectionDAG &DAG) const {
   2732   const SDLoc &dl(Op);
   2733   unsigned Opc = Op.getOpcode();
   2734   SDValue X = Op.getOperand(0), Y = Op.getOperand(1), C = Op.getOperand(2);
   2735 
   2736   if (Opc == ISD::ADDCARRY)
   2737     return DAG.getNode(HexagonISD::ADDC, dl, Op.getNode()->getVTList(),
   2738                        { X, Y, C });
   2739 
   2740   EVT CarryTy = C.getValueType();
   2741   SDValue SubC = DAG.getNode(HexagonISD::SUBC, dl, Op.getNode()->getVTList(),
   2742                              { X, Y, DAG.getLogicalNOT(dl, C, CarryTy) });
   2743   SDValue Out[] = { SubC.getValue(0),
   2744                     DAG.getLogicalNOT(dl, SubC.getValue(1), CarryTy) };
   2745   return DAG.getMergeValues(Out, dl);
   2746 }
   2747 
   2748 SDValue
   2749 HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
   2750   SDValue Chain     = Op.getOperand(0);
   2751   SDValue Offset    = Op.getOperand(1);
   2752   SDValue Handler   = Op.getOperand(2);
   2753   SDLoc dl(Op);
   2754   auto PtrVT = getPointerTy(DAG.getDataLayout());
   2755 
   2756   // Mark function as containing a call to EH_RETURN.
   2757   HexagonMachineFunctionInfo *FuncInfo =
   2758     DAG.getMachineFunction().getInfo<HexagonMachineFunctionInfo>();
   2759   FuncInfo->setHasEHReturn();
   2760 
   2761   unsigned OffsetReg = Hexagon::R28;
   2762 
   2763   SDValue StoreAddr =
   2764       DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getRegister(Hexagon::R30, PtrVT),
   2765                   DAG.getIntPtrConstant(4, dl));
   2766   Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo());
   2767   Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset);
   2768 
   2769   // Not needed we already use it as explict input to EH_RETURN.
   2770   // MF.getRegInfo().addLiveOut(OffsetReg);
   2771 
   2772   return DAG.getNode(HexagonISD::EH_RETURN, dl, MVT::Other, Chain);
   2773 }
   2774 
   2775 SDValue
   2776 HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   2777   unsigned Opc = Op.getOpcode();
   2778 
   2779   // Handle INLINEASM first.
   2780   if (Opc == ISD::INLINEASM)
   2781     return LowerINLINEASM(Op, DAG);
   2782 
   2783   if (isHvxOperation(Op)) {
   2784     // If HVX lowering returns nothing, try the default lowering.
   2785     if (SDValue V = LowerHvxOperation(Op, DAG))
   2786       return V;
   2787   }
   2788 
   2789   switch (Opc) {
   2790     default:
   2791 #ifndef NDEBUG
   2792       Op.getNode()->dumpr(&DAG);
   2793       if (Opc > HexagonISD::OP_BEGIN && Opc < HexagonISD::OP_END)
   2794         errs() << "Error: check for a non-legal type in this operation\n";
   2795 #endif
   2796       llvm_unreachable("Should not custom lower this!");
   2797     case ISD::CONCAT_VECTORS:       return LowerCONCAT_VECTORS(Op, DAG);
   2798     case ISD::INSERT_SUBVECTOR:     return LowerINSERT_SUBVECTOR(Op, DAG);
   2799     case ISD::INSERT_VECTOR_ELT:    return LowerINSERT_VECTOR_ELT(Op, DAG);
   2800     case ISD::EXTRACT_SUBVECTOR:    return LowerEXTRACT_SUBVECTOR(Op, DAG);
   2801     case ISD::EXTRACT_VECTOR_ELT:   return LowerEXTRACT_VECTOR_ELT(Op, DAG);
   2802     case ISD::BUILD_VECTOR:         return LowerBUILD_VECTOR(Op, DAG);
   2803     case ISD::VECTOR_SHUFFLE:       return LowerVECTOR_SHUFFLE(Op, DAG);
   2804     case ISD::BITCAST:              return LowerBITCAST(Op, DAG);
   2805     case ISD::LOAD:                 return LowerUnalignedLoad(Op, DAG);
   2806     case ISD::ADDCARRY:
   2807     case ISD::SUBCARRY:             return LowerAddSubCarry(Op, DAG);
   2808     case ISD::SRA:
   2809     case ISD::SHL:
   2810     case ISD::SRL:                  return LowerVECTOR_SHIFT(Op, DAG);
   2811     case ISD::ROTL:                 return LowerROTL(Op, DAG);
   2812     case ISD::ConstantPool:         return LowerConstantPool(Op, DAG);
   2813     case ISD::JumpTable:            return LowerJumpTable(Op, DAG);
   2814     case ISD::EH_RETURN:            return LowerEH_RETURN(Op, DAG);
   2815     case ISD::RETURNADDR:           return LowerRETURNADDR(Op, DAG);
   2816     case ISD::FRAMEADDR:            return LowerFRAMEADDR(Op, DAG);
   2817     case ISD::GlobalTLSAddress:     return LowerGlobalTLSAddress(Op, DAG);
   2818     case ISD::ATOMIC_FENCE:         return LowerATOMIC_FENCE(Op, DAG);
   2819     case ISD::GlobalAddress:        return LowerGLOBALADDRESS(Op, DAG);
   2820     case ISD::BlockAddress:         return LowerBlockAddress(Op, DAG);
   2821     case ISD::GLOBAL_OFFSET_TABLE:  return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
   2822     case ISD::VASTART:              return LowerVASTART(Op, DAG);
   2823     case ISD::DYNAMIC_STACKALLOC:   return LowerDYNAMIC_STACKALLOC(Op, DAG);
   2824     case ISD::SETCC:                return LowerSETCC(Op, DAG);
   2825     case ISD::VSELECT:              return LowerVSELECT(Op, DAG);
   2826     case ISD::INTRINSIC_WO_CHAIN:   return LowerINTRINSIC_WO_CHAIN(Op, DAG);
   2827     case ISD::INTRINSIC_VOID:       return LowerINTRINSIC_VOID(Op, DAG);
   2828     case ISD::PREFETCH:             return LowerPREFETCH(Op, DAG);
   2829     case ISD::READCYCLECOUNTER:     return LowerREADCYCLECOUNTER(Op, DAG);
   2830       break;
   2831   }
   2832 
   2833   return SDValue();
   2834 }
   2835 
   2836 void
   2837 HexagonTargetLowering::ReplaceNodeResults(SDNode *N,
   2838                                           SmallVectorImpl<SDValue> &Results,
   2839                                           SelectionDAG &DAG) const {
   2840   const SDLoc &dl(N);
   2841   switch (N->getOpcode()) {
   2842     case ISD::SRL:
   2843     case ISD::SRA:
   2844     case ISD::SHL:
   2845       return;
   2846     case ISD::BITCAST:
   2847       // Handle a bitcast from v8i1 to i8.
   2848       if (N->getValueType(0) == MVT::i8) {
   2849         SDValue P = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32,
   2850                              N->getOperand(0), DAG);
   2851         Results.push_back(P);
   2852       }
   2853       break;
   2854   }
   2855 }
   2856 
   2857 /// Returns relocation base for the given PIC jumptable.
   2858 SDValue
   2859 HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table,
   2860                                                 SelectionDAG &DAG) const {
   2861   int Idx = cast<JumpTableSDNode>(Table)->getIndex();
   2862   EVT VT = Table.getValueType();
   2863   SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
   2864   return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Table), VT, T);
   2865 }
   2866 
   2867 //===----------------------------------------------------------------------===//
   2868 // Inline Assembly Support
   2869 //===----------------------------------------------------------------------===//
   2870 
   2871 TargetLowering::ConstraintType
   2872 HexagonTargetLowering::getConstraintType(StringRef Constraint) const {
   2873   if (Constraint.size() == 1) {
   2874     switch (Constraint[0]) {
   2875       case 'q':
   2876       case 'v':
   2877         if (Subtarget.useHVXOps())
   2878           return C_RegisterClass;
   2879         break;
   2880       case 'a':
   2881         return C_RegisterClass;
   2882       default:
   2883         break;
   2884     }
   2885   }
   2886   return TargetLowering::getConstraintType(Constraint);
   2887 }
   2888 
   2889 std::pair<unsigned, const TargetRegisterClass*>
   2890 HexagonTargetLowering::getRegForInlineAsmConstraint(
   2891     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
   2892 
   2893   if (Constraint.size() == 1) {
   2894     switch (Constraint[0]) {
   2895     case 'r':   // R0-R31
   2896       switch (VT.SimpleTy) {
   2897       default:
   2898         return {0u, nullptr};
   2899       case MVT::i1:
   2900       case MVT::i8:
   2901       case MVT::i16:
   2902       case MVT::i32:
   2903       case MVT::f32:
   2904         return {0u, &Hexagon::IntRegsRegClass};
   2905       case MVT::i64:
   2906       case MVT::f64:
   2907         return {0u, &Hexagon::DoubleRegsRegClass};
   2908       }
   2909       break;
   2910     case 'a': // M0-M1
   2911       if (VT != MVT::i32)
   2912         return {0u, nullptr};
   2913       return {0u, &Hexagon::ModRegsRegClass};
   2914     case 'q': // q0-q3
   2915       switch (VT.getSizeInBits()) {
   2916       default:
   2917         return {0u, nullptr};
   2918       case 512:
   2919       case 1024:
   2920         return {0u, &Hexagon::HvxQRRegClass};
   2921       }
   2922       break;
   2923     case 'v': // V0-V31
   2924       switch (VT.getSizeInBits()) {
   2925       default:
   2926         return {0u, nullptr};
   2927       case 512:
   2928         return {0u, &Hexagon::HvxVRRegClass};
   2929       case 1024:
   2930         if (Subtarget.hasV60Ops() && Subtarget.useHVX128BOps())
   2931           return {0u, &Hexagon::HvxVRRegClass};
   2932         return {0u, &Hexagon::HvxWRRegClass};
   2933       case 2048:
   2934         return {0u, &Hexagon::HvxWRRegClass};
   2935       }
   2936       break;
   2937     default:
   2938       return {0u, nullptr};
   2939     }
   2940   }
   2941 
   2942   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
   2943 }
   2944 
   2945 /// isFPImmLegal - Returns true if the target can instruction select the
   2946 /// specified FP immediate natively. If false, the legalizer will
   2947 /// materialize the FP immediate as a load from a constant pool.
   2948 bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
   2949   return Subtarget.hasV5Ops();
   2950 }
   2951 
   2952 /// isLegalAddressingMode - Return true if the addressing mode represented by
   2953 /// AM is legal for this target, for a load/store of the specified type.
   2954 bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL,
   2955                                                   const AddrMode &AM, Type *Ty,
   2956                                                   unsigned AS, Instruction *I) const {
   2957   if (Ty->isSized()) {
   2958     // When LSR detects uses of the same base address to access different
   2959     // types (e.g. unions), it will assume a conservative type for these
   2960     // uses:
   2961     //   LSR Use: Kind=Address of void in addrspace(4294967295), ...
   2962     // The type Ty passed here would then be "void". Skip the alignment
   2963     // checks, but do not return false right away, since that confuses
   2964     // LSR into crashing.
   2965     unsigned A = DL.getABITypeAlignment(Ty);
   2966     // The base offset must be a multiple of the alignment.
   2967     if ((AM.BaseOffs % A) != 0)
   2968       return false;
   2969     // The shifted offset must fit in 11 bits.
   2970     if (!isInt<11>(AM.BaseOffs >> Log2_32(A)))
   2971       return false;
   2972   }
   2973 
   2974   // No global is ever allowed as a base.
   2975   if (AM.BaseGV)
   2976     return false;
   2977 
   2978   int Scale = AM.Scale;
   2979   if (Scale < 0)
   2980     Scale = -Scale;
   2981   switch (Scale) {
   2982   case 0:  // No scale reg, "r+i", "r", or just "i".
   2983     break;
   2984   default: // No scaled addressing mode.
   2985     return false;
   2986   }
   2987   return true;
   2988 }
   2989 
   2990 /// Return true if folding a constant offset with the given GlobalAddress is
   2991 /// legal.  It is frequently not legal in PIC relocation models.
   2992 bool HexagonTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA)
   2993       const {
   2994   return HTM.getRelocationModel() == Reloc::Static;
   2995 }
   2996 
   2997 /// isLegalICmpImmediate - Return true if the specified immediate is legal
   2998 /// icmp immediate, that is the target has icmp instructions which can compare
   2999 /// a register against the immediate without having to materialize the
   3000 /// immediate into a register.
   3001 bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
   3002   return Imm >= -512 && Imm <= 511;
   3003 }
   3004 
   3005 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
   3006 /// for tail call optimization. Targets which want to do tail call
   3007 /// optimization should implement this function.
   3008 bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
   3009                                  SDValue Callee,
   3010                                  CallingConv::ID CalleeCC,
   3011                                  bool IsVarArg,
   3012                                  bool IsCalleeStructRet,
   3013                                  bool IsCallerStructRet,
   3014                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
   3015                                  const SmallVectorImpl<SDValue> &OutVals,
   3016                                  const SmallVectorImpl<ISD::InputArg> &Ins,
   3017                                  SelectionDAG& DAG) const {
   3018   const Function &CallerF = DAG.getMachineFunction().getFunction();
   3019   CallingConv::ID CallerCC = CallerF.getCallingConv();
   3020   bool CCMatch = CallerCC == CalleeCC;
   3021 
   3022   // ***************************************************************************
   3023   //  Look for obvious safe cases to perform tail call optimization that do not
   3024   //  require ABI changes.
   3025   // ***************************************************************************
   3026 
   3027   // If this is a tail call via a function pointer, then don't do it!
   3028   if (!isa<GlobalAddressSDNode>(Callee) &&
   3029       !isa<ExternalSymbolSDNode>(Callee)) {
   3030     return false;
   3031   }
   3032 
   3033   // Do not optimize if the calling conventions do not match and the conventions
   3034   // used are not C or Fast.
   3035   if (!CCMatch) {
   3036     bool R = (CallerCC == CallingConv::C || CallerCC == CallingConv::Fast);
   3037     bool E = (CalleeCC == CallingConv::C || CalleeCC == CallingConv::Fast);
   3038     // If R & E, then ok.
   3039     if (!R || !E)
   3040       return false;
   3041   }
   3042 
   3043   // Do not tail call optimize vararg calls.
   3044   if (IsVarArg)
   3045     return false;
   3046 
   3047   // Also avoid tail call optimization if either caller or callee uses struct
   3048   // return semantics.
   3049   if (IsCalleeStructRet || IsCallerStructRet)
   3050     return false;
   3051 
   3052   // In addition to the cases above, we also disable Tail Call Optimization if
   3053   // the calling convention code that at least one outgoing argument needs to
   3054   // go on the stack. We cannot check that here because at this point that
   3055   // information is not available.
   3056   return true;
   3057 }
   3058 
   3059 /// Returns the target specific optimal type for load and store operations as
   3060 /// a result of memset, memcpy, and memmove lowering.
   3061 ///
   3062 /// If DstAlign is zero that means it's safe to destination alignment can
   3063 /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
   3064 /// a need to check it against alignment requirement, probably because the
   3065 /// source does not need to be loaded. If 'IsMemset' is true, that means it's
   3066 /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
   3067 /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
   3068 /// does not need to be loaded.  It returns EVT::Other if the type should be
   3069 /// determined using generic target-independent logic.
   3070 EVT HexagonTargetLowering::getOptimalMemOpType(uint64_t Size,
   3071       unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset,
   3072       bool MemcpyStrSrc, MachineFunction &MF) const {
   3073 
   3074   auto Aligned = [](unsigned GivenA, unsigned MinA) -> bool {
   3075     return (GivenA % MinA) == 0;
   3076   };
   3077 
   3078   if (Size >= 8 && Aligned(DstAlign, 8) && (IsMemset || Aligned(SrcAlign, 8)))
   3079     return MVT::i64;
   3080   if (Size >= 4 && Aligned(DstAlign, 4) && (IsMemset || Aligned(SrcAlign, 4)))
   3081     return MVT::i32;
   3082   if (Size >= 2 && Aligned(DstAlign, 2) && (IsMemset || Aligned(SrcAlign, 2)))
   3083     return MVT::i16;
   3084 
   3085   return MVT::Other;
   3086 }
   3087 
   3088 bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
   3089       unsigned AS, unsigned Align, bool *Fast) const {
   3090   if (Fast)
   3091     *Fast = false;
   3092   return Subtarget.isHVXVectorType(VT.getSimpleVT());
   3093 }
   3094 
   3095 std::pair<const TargetRegisterClass*, uint8_t>
   3096 HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
   3097       MVT VT) const {
   3098   if (Subtarget.isHVXVectorType(VT, true)) {
   3099     unsigned BitWidth = VT.getSizeInBits();
   3100     unsigned VecWidth = Subtarget.getVectorLength() * 8;
   3101 
   3102     if (VT.getVectorElementType() == MVT::i1)
   3103       return std::make_pair(&Hexagon::HvxQRRegClass, 1);
   3104     if (BitWidth == VecWidth)
   3105       return std::make_pair(&Hexagon::HvxVRRegClass, 1);
   3106     assert(BitWidth == 2 * VecWidth);
   3107     return std::make_pair(&Hexagon::HvxWRRegClass, 1);
   3108   }
   3109 
   3110   return TargetLowering::findRepresentativeClass(TRI, VT);
   3111 }
   3112 
   3113 Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
   3114       AtomicOrdering Ord) const {
   3115   BasicBlock *BB = Builder.GetInsertBlock();
   3116   Module *M = BB->getParent()->getParent();
   3117   Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
   3118   unsigned SZ = Ty->getPrimitiveSizeInBits();
   3119   assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic loads supported");
   3120   Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_L2_loadw_locked
   3121                                    : Intrinsic::hexagon_L4_loadd_locked;
   3122   Value *Fn = Intrinsic::getDeclaration(M, IntID);
   3123   return Builder.CreateCall(Fn, Addr, "larx");
   3124 }
   3125 
   3126 /// Perform a store-conditional operation to Addr. Return the status of the
   3127 /// store. This should be 0 if the store succeeded, non-zero otherwise.
   3128 Value *HexagonTargetLowering::emitStoreConditional(IRBuilder<> &Builder,
   3129       Value *Val, Value *Addr, AtomicOrdering Ord) const {
   3130   BasicBlock *BB = Builder.GetInsertBlock();
   3131   Module *M = BB->getParent()->getParent();
   3132   Type *Ty = Val->getType();
   3133   unsigned SZ = Ty->getPrimitiveSizeInBits();
   3134   assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic stores supported");
   3135   Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_S2_storew_locked
   3136                                    : Intrinsic::hexagon_S4_stored_locked;
   3137   Value *Fn = Intrinsic::getDeclaration(M, IntID);
   3138   Value *Call = Builder.CreateCall(Fn, {Addr, Val}, "stcx");
   3139   Value *Cmp = Builder.CreateICmpEQ(Call, Builder.getInt32(0), "");
   3140   Value *Ext = Builder.CreateZExt(Cmp, Type::getInt32Ty(M->getContext()));
   3141   return Ext;
   3142 }
   3143 
   3144 TargetLowering::AtomicExpansionKind
   3145 HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
   3146   // Do not expand loads and stores that don't exceed 64 bits.
   3147   return LI->getType()->getPrimitiveSizeInBits() > 64
   3148              ? AtomicExpansionKind::LLOnly
   3149              : AtomicExpansionKind::None;
   3150 }
   3151 
   3152 bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
   3153   // Do not expand loads and stores that don't exceed 64 bits.
   3154   return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64;
   3155 }
   3156 
   3157 bool HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
   3158       AtomicCmpXchgInst *AI) const {
   3159   const DataLayout &DL = AI->getModule()->getDataLayout();
   3160   unsigned Size = DL.getTypeStoreSize(AI->getCompareOperand()->getType());
   3161   return Size >= 4 && Size <= 8;
   3162 }
   3163