Home | History | Annotate | Download | only in ARM
      1 //===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements the ARMSelectionDAGInfo class.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "ARMTargetMachine.h"
     15 #include "llvm/CodeGen/SelectionDAG.h"
     16 #include "llvm/IR/DerivedTypes.h"
     17 using namespace llvm;
     18 
     19 #define DEBUG_TYPE "arm-selectiondag-info"
     20 
     21 // Emit, if possible, a specialized version of the given Libcall. Typically this
     22 // means selecting the appropriately aligned version, but we also convert memset
     23 // of 0 into memclr.
     24 SDValue ARMSelectionDAGInfo::
     25 EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl,
     26                        SDValue Chain,
     27                        SDValue Dst, SDValue Src,
     28                        SDValue Size, unsigned Align,
     29                        RTLIB::Libcall LC) const {
     30   const ARMSubtarget &Subtarget =
     31       DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
     32   const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
     33 
     34   // Only use a specialized AEABI function if the default version of this
     35   // Libcall is an AEABI function.
     36   if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
     37     return SDValue();
     38 
     39   // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
     40   // able to translate memset to memclr and use the value to index the function
     41   // name array.
     42   enum {
     43     AEABI_MEMCPY = 0,
     44     AEABI_MEMMOVE,
     45     AEABI_MEMSET,
     46     AEABI_MEMCLR
     47   } AEABILibcall;
     48   switch (LC) {
     49   case RTLIB::MEMCPY:
     50     AEABILibcall = AEABI_MEMCPY;
     51     break;
     52   case RTLIB::MEMMOVE:
     53     AEABILibcall = AEABI_MEMMOVE;
     54     break;
     55   case RTLIB::MEMSET:
     56     AEABILibcall = AEABI_MEMSET;
     57     if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
     58       if (ConstantSrc->getZExtValue() == 0)
     59         AEABILibcall = AEABI_MEMCLR;
     60     break;
     61   default:
     62     return SDValue();
     63   }
     64 
     65   // Choose the most-aligned libcall variant that we can
     66   enum {
     67     ALIGN1 = 0,
     68     ALIGN4,
     69     ALIGN8
     70   } AlignVariant;
     71   if ((Align & 7) == 0)
     72     AlignVariant = ALIGN8;
     73   else if ((Align & 3) == 0)
     74     AlignVariant = ALIGN4;
     75   else
     76     AlignVariant = ALIGN1;
     77 
     78   TargetLowering::ArgListTy Args;
     79   TargetLowering::ArgListEntry Entry;
     80   Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
     81   Entry.Node = Dst;
     82   Args.push_back(Entry);
     83   if (AEABILibcall == AEABI_MEMCLR) {
     84     Entry.Node = Size;
     85     Args.push_back(Entry);
     86   } else if (AEABILibcall == AEABI_MEMSET) {
     87     // Adjust parameters for memset, EABI uses format (ptr, size, value),
     88     // GNU library uses (ptr, value, size)
     89     // See RTABI section 4.3.4
     90     Entry.Node = Size;
     91     Args.push_back(Entry);
     92 
     93     // Extend or truncate the argument to be an i32 value for the call.
     94     if (Src.getValueType().bitsGT(MVT::i32))
     95       Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
     96     else if (Src.getValueType().bitsLT(MVT::i32))
     97       Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
     98 
     99     Entry.Node = Src;
    100     Entry.Ty = Type::getInt32Ty(*DAG.getContext());
    101     Entry.isSExt = false;
    102     Args.push_back(Entry);
    103   } else {
    104     Entry.Node = Src;
    105     Args.push_back(Entry);
    106 
    107     Entry.Node = Size;
    108     Args.push_back(Entry);
    109   }
    110 
    111   char const *FunctionNames[4][3] = {
    112     { "__aeabi_memcpy",  "__aeabi_memcpy4",  "__aeabi_memcpy8"  },
    113     { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
    114     { "__aeabi_memset",  "__aeabi_memset4",  "__aeabi_memset8"  },
    115     { "__aeabi_memclr",  "__aeabi_memclr4",  "__aeabi_memclr8"  }
    116   };
    117   TargetLowering::CallLoweringInfo CLI(DAG);
    118   CLI.setDebugLoc(dl)
    119       .setChain(Chain)
    120       .setCallee(
    121            TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
    122            DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
    123                                  TLI->getPointerTy(DAG.getDataLayout())),
    124            std::move(Args), 0)
    125       .setDiscardResult();
    126   std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
    127 
    128   return CallResult.second;
    129 }
    130 
    131 SDValue
    132 ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
    133                                              SDValue Chain,
    134                                              SDValue Dst, SDValue Src,
    135                                              SDValue Size, unsigned Align,
    136                                              bool isVolatile, bool AlwaysInline,
    137                                              MachinePointerInfo DstPtrInfo,
    138                                           MachinePointerInfo SrcPtrInfo) const {
    139   const ARMSubtarget &Subtarget =
    140       DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
    141   // Do repeated 4-byte loads and stores. To be improved.
    142   // This requires 4-byte alignment.
    143   if ((Align & 3) != 0)
    144     return SDValue();
    145   // This requires the copy size to be a constant, preferably
    146   // within a subtarget-specific limit.
    147   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
    148   if (!ConstantSize)
    149     return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
    150                                   RTLIB::MEMCPY);
    151   uint64_t SizeVal = ConstantSize->getZExtValue();
    152   if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
    153     return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
    154                                   RTLIB::MEMCPY);
    155 
    156   unsigned BytesLeft = SizeVal & 3;
    157   unsigned NumMemOps = SizeVal >> 2;
    158   unsigned EmittedNumMemOps = 0;
    159   EVT VT = MVT::i32;
    160   unsigned VTSize = 4;
    161   unsigned i = 0;
    162   // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
    163   const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
    164   SDValue TFOps[6];
    165   SDValue Loads[6];
    166   uint64_t SrcOff = 0, DstOff = 0;
    167 
    168   // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
    169   // VLDM/VSTM and make this code emit it when appropriate. This would reduce
    170   // pressure on the general purpose registers. However this seems harder to map
    171   // onto the register allocator's view of the world.
    172 
    173   // The number of MEMCPY pseudo-instructions to emit. We use up to
    174   // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
    175   // later on. This is a lower bound on the number of MEMCPY operations we must
    176   // emit.
    177   unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
    178 
    179   SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
    180 
    181   for (unsigned I = 0; I != NumMEMCPYs; ++I) {
    182     // Evenly distribute registers among MEMCPY operations to reduce register
    183     // pressure.
    184     unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
    185     unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
    186 
    187     Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
    188                       DAG.getConstant(NumRegs, dl, MVT::i32));
    189     Src = Dst.getValue(1);
    190     Chain = Dst.getValue(2);
    191 
    192     DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
    193     SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
    194 
    195     EmittedNumMemOps = NextEmittedNumMemOps;
    196   }
    197 
    198   if (BytesLeft == 0)
    199     return Chain;
    200 
    201   // Issue loads / stores for the trailing (1 - 3) bytes.
    202   unsigned BytesLeftSave = BytesLeft;
    203   i = 0;
    204   while (BytesLeft) {
    205     if (BytesLeft >= 2) {
    206       VT = MVT::i16;
    207       VTSize = 2;
    208     } else {
    209       VT = MVT::i8;
    210       VTSize = 1;
    211     }
    212 
    213     Loads[i] = DAG.getLoad(VT, dl, Chain,
    214                            DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
    215                                        DAG.getConstant(SrcOff, dl, MVT::i32)),
    216                            SrcPtrInfo.getWithOffset(SrcOff),
    217                            false, false, false, 0);
    218     TFOps[i] = Loads[i].getValue(1);
    219     ++i;
    220     SrcOff += VTSize;
    221     BytesLeft -= VTSize;
    222   }
    223   Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
    224                       makeArrayRef(TFOps, i));
    225 
    226   i = 0;
    227   BytesLeft = BytesLeftSave;
    228   while (BytesLeft) {
    229     if (BytesLeft >= 2) {
    230       VT = MVT::i16;
    231       VTSize = 2;
    232     } else {
    233       VT = MVT::i8;
    234       VTSize = 1;
    235     }
    236 
    237     TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
    238                             DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
    239                                         DAG.getConstant(DstOff, dl, MVT::i32)),
    240                             DstPtrInfo.getWithOffset(DstOff), false, false, 0);
    241     ++i;
    242     DstOff += VTSize;
    243     BytesLeft -= VTSize;
    244   }
    245   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
    246                      makeArrayRef(TFOps, i));
    247 }
    248 
    249 
    250 SDValue ARMSelectionDAGInfo::
    251 EmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl,
    252                          SDValue Chain,
    253                          SDValue Dst, SDValue Src,
    254                          SDValue Size, unsigned Align,
    255                          bool isVolatile,
    256                          MachinePointerInfo DstPtrInfo,
    257                          MachinePointerInfo SrcPtrInfo) const {
    258   return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
    259                                 RTLIB::MEMMOVE);
    260 }
    261 
    262 
    263 SDValue ARMSelectionDAGInfo::
    264 EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
    265                         SDValue Chain, SDValue Dst,
    266                         SDValue Src, SDValue Size,
    267                         unsigned Align, bool isVolatile,
    268                         MachinePointerInfo DstPtrInfo) const {
    269   return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
    270                                 RTLIB::MEMSET);
    271 }
    272