Home | History | Annotate | Download | only in ARM
      1 //===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements the ARMSelectionDAGInfo class.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "ARMTargetMachine.h"
     15 #include "llvm/CodeGen/SelectionDAG.h"
     16 #include "llvm/IR/DerivedTypes.h"
     17 using namespace llvm;
     18 
     19 #define DEBUG_TYPE "arm-selectiondag-info"
     20 
     21 // Emit, if possible, a specialized version of the given Libcall. Typically this
     22 // means selecting the appropriately aligned version, but we also convert memset
     23 // of 0 into memclr.
     24 SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
     25     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
     26     SDValue Size, unsigned Align, RTLIB::Libcall LC) const {
     27   const ARMSubtarget &Subtarget =
     28       DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
     29   const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
     30 
     31   // Only use a specialized AEABI function if the default version of this
     32   // Libcall is an AEABI function.
     33   if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
     34     return SDValue();
     35 
     36   // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
     37   // able to translate memset to memclr and use the value to index the function
     38   // name array.
     39   enum {
     40     AEABI_MEMCPY = 0,
     41     AEABI_MEMMOVE,
     42     AEABI_MEMSET,
     43     AEABI_MEMCLR
     44   } AEABILibcall;
     45   switch (LC) {
     46   case RTLIB::MEMCPY:
     47     AEABILibcall = AEABI_MEMCPY;
     48     break;
     49   case RTLIB::MEMMOVE:
     50     AEABILibcall = AEABI_MEMMOVE;
     51     break;
     52   case RTLIB::MEMSET:
     53     AEABILibcall = AEABI_MEMSET;
     54     if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
     55       if (ConstantSrc->getZExtValue() == 0)
     56         AEABILibcall = AEABI_MEMCLR;
     57     break;
     58   default:
     59     return SDValue();
     60   }
     61 
     62   // Choose the most-aligned libcall variant that we can
     63   enum {
     64     ALIGN1 = 0,
     65     ALIGN4,
     66     ALIGN8
     67   } AlignVariant;
     68   if ((Align & 7) == 0)
     69     AlignVariant = ALIGN8;
     70   else if ((Align & 3) == 0)
     71     AlignVariant = ALIGN4;
     72   else
     73     AlignVariant = ALIGN1;
     74 
     75   TargetLowering::ArgListTy Args;
     76   TargetLowering::ArgListEntry Entry;
     77   Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
     78   Entry.Node = Dst;
     79   Args.push_back(Entry);
     80   if (AEABILibcall == AEABI_MEMCLR) {
     81     Entry.Node = Size;
     82     Args.push_back(Entry);
     83   } else if (AEABILibcall == AEABI_MEMSET) {
     84     // Adjust parameters for memset, EABI uses format (ptr, size, value),
     85     // GNU library uses (ptr, value, size)
     86     // See RTABI section 4.3.4
     87     Entry.Node = Size;
     88     Args.push_back(Entry);
     89 
     90     // Extend or truncate the argument to be an i32 value for the call.
     91     if (Src.getValueType().bitsGT(MVT::i32))
     92       Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
     93     else if (Src.getValueType().bitsLT(MVT::i32))
     94       Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
     95 
     96     Entry.Node = Src;
     97     Entry.Ty = Type::getInt32Ty(*DAG.getContext());
     98     Entry.isSExt = false;
     99     Args.push_back(Entry);
    100   } else {
    101     Entry.Node = Src;
    102     Args.push_back(Entry);
    103 
    104     Entry.Node = Size;
    105     Args.push_back(Entry);
    106   }
    107 
    108   char const *FunctionNames[4][3] = {
    109     { "__aeabi_memcpy",  "__aeabi_memcpy4",  "__aeabi_memcpy8"  },
    110     { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
    111     { "__aeabi_memset",  "__aeabi_memset4",  "__aeabi_memset8"  },
    112     { "__aeabi_memclr",  "__aeabi_memclr4",  "__aeabi_memclr8"  }
    113   };
    114   TargetLowering::CallLoweringInfo CLI(DAG);
    115   CLI.setDebugLoc(dl)
    116       .setChain(Chain)
    117       .setCallee(
    118            TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
    119            DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
    120                                  TLI->getPointerTy(DAG.getDataLayout())),
    121            std::move(Args))
    122       .setDiscardResult();
    123   std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
    124 
    125   return CallResult.second;
    126 }
    127 
    128 SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
    129     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
    130     SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
    131     MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
    132   const ARMSubtarget &Subtarget =
    133       DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
    134   // Do repeated 4-byte loads and stores. To be improved.
    135   // This requires 4-byte alignment.
    136   if ((Align & 3) != 0)
    137     return SDValue();
    138   // This requires the copy size to be a constant, preferably
    139   // within a subtarget-specific limit.
    140   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
    141   if (!ConstantSize)
    142     return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
    143                                   RTLIB::MEMCPY);
    144   uint64_t SizeVal = ConstantSize->getZExtValue();
    145   if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
    146     return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
    147                                   RTLIB::MEMCPY);
    148 
    149   unsigned BytesLeft = SizeVal & 3;
    150   unsigned NumMemOps = SizeVal >> 2;
    151   unsigned EmittedNumMemOps = 0;
    152   EVT VT = MVT::i32;
    153   unsigned VTSize = 4;
    154   unsigned i = 0;
    155   // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
    156   const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
    157   SDValue TFOps[6];
    158   SDValue Loads[6];
    159   uint64_t SrcOff = 0, DstOff = 0;
    160 
    161   // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
    162   // VLDM/VSTM and make this code emit it when appropriate. This would reduce
    163   // pressure on the general purpose registers. However this seems harder to map
    164   // onto the register allocator's view of the world.
    165 
    166   // The number of MEMCPY pseudo-instructions to emit. We use up to
    167   // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
    168   // later on. This is a lower bound on the number of MEMCPY operations we must
    169   // emit.
    170   unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
    171 
    172   // Code size optimisation: do not inline memcpy if expansion results in
    173   // more instructions than the libary call.
    174   if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction()->optForMinSize()) {
    175     return SDValue();
    176   }
    177 
    178   SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
    179 
    180   for (unsigned I = 0; I != NumMEMCPYs; ++I) {
    181     // Evenly distribute registers among MEMCPY operations to reduce register
    182     // pressure.
    183     unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
    184     unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
    185 
    186     Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
    187                       DAG.getConstant(NumRegs, dl, MVT::i32));
    188     Src = Dst.getValue(1);
    189     Chain = Dst.getValue(2);
    190 
    191     DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
    192     SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
    193 
    194     EmittedNumMemOps = NextEmittedNumMemOps;
    195   }
    196 
    197   if (BytesLeft == 0)
    198     return Chain;
    199 
    200   // Issue loads / stores for the trailing (1 - 3) bytes.
    201   unsigned BytesLeftSave = BytesLeft;
    202   i = 0;
    203   while (BytesLeft) {
    204     if (BytesLeft >= 2) {
    205       VT = MVT::i16;
    206       VTSize = 2;
    207     } else {
    208       VT = MVT::i8;
    209       VTSize = 1;
    210     }
    211 
    212     Loads[i] = DAG.getLoad(VT, dl, Chain,
    213                            DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
    214                                        DAG.getConstant(SrcOff, dl, MVT::i32)),
    215                            SrcPtrInfo.getWithOffset(SrcOff),
    216                            false, false, false, 0);
    217     TFOps[i] = Loads[i].getValue(1);
    218     ++i;
    219     SrcOff += VTSize;
    220     BytesLeft -= VTSize;
    221   }
    222   Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
    223                       makeArrayRef(TFOps, i));
    224 
    225   i = 0;
    226   BytesLeft = BytesLeftSave;
    227   while (BytesLeft) {
    228     if (BytesLeft >= 2) {
    229       VT = MVT::i16;
    230       VTSize = 2;
    231     } else {
    232       VT = MVT::i8;
    233       VTSize = 1;
    234     }
    235 
    236     TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
    237                             DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
    238                                         DAG.getConstant(DstOff, dl, MVT::i32)),
    239                             DstPtrInfo.getWithOffset(DstOff), false, false, 0);
    240     ++i;
    241     DstOff += VTSize;
    242     BytesLeft -= VTSize;
    243   }
    244   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
    245                      makeArrayRef(TFOps, i));
    246 }
    247 
    248 SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove(
    249     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
    250     SDValue Size, unsigned Align, bool isVolatile,
    251     MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
    252   return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
    253                                 RTLIB::MEMMOVE);
    254 }
    255 
    256 SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset(
    257     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
    258     SDValue Size, unsigned Align, bool isVolatile,
    259     MachinePointerInfo DstPtrInfo) const {
    260   return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
    261                                 RTLIB::MEMSET);
    262 }
    263