1 //===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the ARMSelectionDAGInfo class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "ARMTargetMachine.h" 15 #include "llvm/CodeGen/SelectionDAG.h" 16 #include "llvm/IR/DerivedTypes.h" 17 using namespace llvm; 18 19 #define DEBUG_TYPE "arm-selectiondag-info" 20 21 // Emit, if possible, a specialized version of the given Libcall. Typically this 22 // means selecting the appropriately aligned version, but we also convert memset 23 // of 0 into memclr. 24 SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall( 25 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, 26 SDValue Size, unsigned Align, RTLIB::Libcall LC) const { 27 const ARMSubtarget &Subtarget = 28 DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); 29 const ARMTargetLowering *TLI = Subtarget.getTargetLowering(); 30 31 // Only use a specialized AEABI function if the default version of this 32 // Libcall is an AEABI function. 33 if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0) 34 return SDValue(); 35 36 // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be 37 // able to translate memset to memclr and use the value to index the function 38 // name array. 39 enum { 40 AEABI_MEMCPY = 0, 41 AEABI_MEMMOVE, 42 AEABI_MEMSET, 43 AEABI_MEMCLR 44 } AEABILibcall; 45 switch (LC) { 46 case RTLIB::MEMCPY: 47 AEABILibcall = AEABI_MEMCPY; 48 break; 49 case RTLIB::MEMMOVE: 50 AEABILibcall = AEABI_MEMMOVE; 51 break; 52 case RTLIB::MEMSET: 53 AEABILibcall = AEABI_MEMSET; 54 if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src)) 55 if (ConstantSrc->getZExtValue() == 0) 56 AEABILibcall = AEABI_MEMCLR; 57 break; 58 default: 59 return SDValue(); 60 } 61 62 // Choose the most-aligned libcall variant that we can 63 enum { 64 ALIGN1 = 0, 65 ALIGN4, 66 ALIGN8 67 } AlignVariant; 68 if ((Align & 7) == 0) 69 AlignVariant = ALIGN8; 70 else if ((Align & 3) == 0) 71 AlignVariant = ALIGN4; 72 else 73 AlignVariant = ALIGN1; 74 75 TargetLowering::ArgListTy Args; 76 TargetLowering::ArgListEntry Entry; 77 Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); 78 Entry.Node = Dst; 79 Args.push_back(Entry); 80 if (AEABILibcall == AEABI_MEMCLR) { 81 Entry.Node = Size; 82 Args.push_back(Entry); 83 } else if (AEABILibcall == AEABI_MEMSET) { 84 // Adjust parameters for memset, EABI uses format (ptr, size, value), 85 // GNU library uses (ptr, value, size) 86 // See RTABI section 4.3.4 87 Entry.Node = Size; 88 Args.push_back(Entry); 89 90 // Extend or truncate the argument to be an i32 value for the call. 91 if (Src.getValueType().bitsGT(MVT::i32)) 92 Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src); 93 else if (Src.getValueType().bitsLT(MVT::i32)) 94 Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); 95 96 Entry.Node = Src; 97 Entry.Ty = Type::getInt32Ty(*DAG.getContext()); 98 Entry.isSExt = false; 99 Args.push_back(Entry); 100 } else { 101 Entry.Node = Src; 102 Args.push_back(Entry); 103 104 Entry.Node = Size; 105 Args.push_back(Entry); 106 } 107 108 char const *FunctionNames[4][3] = { 109 { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" }, 110 { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" }, 111 { "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" }, 112 { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" } 113 }; 114 TargetLowering::CallLoweringInfo CLI(DAG); 115 CLI.setDebugLoc(dl) 116 .setChain(Chain) 117 .setCallee( 118 TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), 119 DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant], 120 TLI->getPointerTy(DAG.getDataLayout())), 121 std::move(Args)) 122 .setDiscardResult(); 123 std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); 124 125 return CallResult.second; 126 } 127 128 SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy( 129 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, 130 SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, 131 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { 132 const ARMSubtarget &Subtarget = 133 DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); 134 // Do repeated 4-byte loads and stores. To be improved. 135 // This requires 4-byte alignment. 136 if ((Align & 3) != 0) 137 return SDValue(); 138 // This requires the copy size to be a constant, preferably 139 // within a subtarget-specific limit. 140 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 141 if (!ConstantSize) 142 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, 143 RTLIB::MEMCPY); 144 uint64_t SizeVal = ConstantSize->getZExtValue(); 145 if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold()) 146 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, 147 RTLIB::MEMCPY); 148 149 unsigned BytesLeft = SizeVal & 3; 150 unsigned NumMemOps = SizeVal >> 2; 151 unsigned EmittedNumMemOps = 0; 152 EVT VT = MVT::i32; 153 unsigned VTSize = 4; 154 unsigned i = 0; 155 // Emit a maximum of 4 loads in Thumb1 since we have fewer registers 156 const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6; 157 SDValue TFOps[6]; 158 SDValue Loads[6]; 159 uint64_t SrcOff = 0, DstOff = 0; 160 161 // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to 162 // VLDM/VSTM and make this code emit it when appropriate. This would reduce 163 // pressure on the general purpose registers. However this seems harder to map 164 // onto the register allocator's view of the world. 165 166 // The number of MEMCPY pseudo-instructions to emit. We use up to 167 // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm 168 // later on. This is a lower bound on the number of MEMCPY operations we must 169 // emit. 170 unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM; 171 172 // Code size optimisation: do not inline memcpy if expansion results in 173 // more instructions than the libary call. 174 if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction()->optForMinSize()) { 175 return SDValue(); 176 } 177 178 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue); 179 180 for (unsigned I = 0; I != NumMEMCPYs; ++I) { 181 // Evenly distribute registers among MEMCPY operations to reduce register 182 // pressure. 183 unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs; 184 unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps; 185 186 Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src, 187 DAG.getConstant(NumRegs, dl, MVT::i32)); 188 Src = Dst.getValue(1); 189 Chain = Dst.getValue(2); 190 191 DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize); 192 SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize); 193 194 EmittedNumMemOps = NextEmittedNumMemOps; 195 } 196 197 if (BytesLeft == 0) 198 return Chain; 199 200 // Issue loads / stores for the trailing (1 - 3) bytes. 201 unsigned BytesLeftSave = BytesLeft; 202 i = 0; 203 while (BytesLeft) { 204 if (BytesLeft >= 2) { 205 VT = MVT::i16; 206 VTSize = 2; 207 } else { 208 VT = MVT::i8; 209 VTSize = 1; 210 } 211 212 Loads[i] = DAG.getLoad(VT, dl, Chain, 213 DAG.getNode(ISD::ADD, dl, MVT::i32, Src, 214 DAG.getConstant(SrcOff, dl, MVT::i32)), 215 SrcPtrInfo.getWithOffset(SrcOff), 216 false, false, false, 0); 217 TFOps[i] = Loads[i].getValue(1); 218 ++i; 219 SrcOff += VTSize; 220 BytesLeft -= VTSize; 221 } 222 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 223 makeArrayRef(TFOps, i)); 224 225 i = 0; 226 BytesLeft = BytesLeftSave; 227 while (BytesLeft) { 228 if (BytesLeft >= 2) { 229 VT = MVT::i16; 230 VTSize = 2; 231 } else { 232 VT = MVT::i8; 233 VTSize = 1; 234 } 235 236 TFOps[i] = DAG.getStore(Chain, dl, Loads[i], 237 DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, 238 DAG.getConstant(DstOff, dl, MVT::i32)), 239 DstPtrInfo.getWithOffset(DstOff), false, false, 0); 240 ++i; 241 DstOff += VTSize; 242 BytesLeft -= VTSize; 243 } 244 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 245 makeArrayRef(TFOps, i)); 246 } 247 248 SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove( 249 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, 250 SDValue Size, unsigned Align, bool isVolatile, 251 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { 252 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, 253 RTLIB::MEMMOVE); 254 } 255 256 SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset( 257 SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, 258 SDValue Size, unsigned Align, bool isVolatile, 259 MachinePointerInfo DstPtrInfo) const { 260 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, 261 RTLIB::MEMSET); 262 } 263