1 //===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the ARMSelectionDAGInfo class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "ARMTargetMachine.h" 15 #include "llvm/CodeGen/SelectionDAG.h" 16 #include "llvm/IR/DerivedTypes.h" 17 using namespace llvm; 18 19 #define DEBUG_TYPE "arm-selectiondag-info" 20 21 // Emit, if possible, a specialized version of the given Libcall. Typically this 22 // means selecting the appropriately aligned version, but we also convert memset 23 // of 0 into memclr. 24 SDValue ARMSelectionDAGInfo:: 25 EmitSpecializedLibcall(SelectionDAG &DAG, SDLoc dl, 26 SDValue Chain, 27 SDValue Dst, SDValue Src, 28 SDValue Size, unsigned Align, 29 RTLIB::Libcall LC) const { 30 const ARMSubtarget &Subtarget = 31 DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); 32 const ARMTargetLowering *TLI = Subtarget.getTargetLowering(); 33 34 // Only use a specialized AEABI function if the default version of this 35 // Libcall is an AEABI function. 36 if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0) 37 return SDValue(); 38 39 // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be 40 // able to translate memset to memclr and use the value to index the function 41 // name array. 42 enum { 43 AEABI_MEMCPY = 0, 44 AEABI_MEMMOVE, 45 AEABI_MEMSET, 46 AEABI_MEMCLR 47 } AEABILibcall; 48 switch (LC) { 49 case RTLIB::MEMCPY: 50 AEABILibcall = AEABI_MEMCPY; 51 break; 52 case RTLIB::MEMMOVE: 53 AEABILibcall = AEABI_MEMMOVE; 54 break; 55 case RTLIB::MEMSET: 56 AEABILibcall = AEABI_MEMSET; 57 if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src)) 58 if (ConstantSrc->getZExtValue() == 0) 59 AEABILibcall = AEABI_MEMCLR; 60 break; 61 default: 62 return SDValue(); 63 } 64 65 // Choose the most-aligned libcall variant that we can 66 enum { 67 ALIGN1 = 0, 68 ALIGN4, 69 ALIGN8 70 } AlignVariant; 71 if ((Align & 7) == 0) 72 AlignVariant = ALIGN8; 73 else if ((Align & 3) == 0) 74 AlignVariant = ALIGN4; 75 else 76 AlignVariant = ALIGN1; 77 78 TargetLowering::ArgListTy Args; 79 TargetLowering::ArgListEntry Entry; 80 Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); 81 Entry.Node = Dst; 82 Args.push_back(Entry); 83 if (AEABILibcall == AEABI_MEMCLR) { 84 Entry.Node = Size; 85 Args.push_back(Entry); 86 } else if (AEABILibcall == AEABI_MEMSET) { 87 // Adjust parameters for memset, EABI uses format (ptr, size, value), 88 // GNU library uses (ptr, value, size) 89 // See RTABI section 4.3.4 90 Entry.Node = Size; 91 Args.push_back(Entry); 92 93 // Extend or truncate the argument to be an i32 value for the call. 94 if (Src.getValueType().bitsGT(MVT::i32)) 95 Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src); 96 else if (Src.getValueType().bitsLT(MVT::i32)) 97 Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); 98 99 Entry.Node = Src; 100 Entry.Ty = Type::getInt32Ty(*DAG.getContext()); 101 Entry.isSExt = false; 102 Args.push_back(Entry); 103 } else { 104 Entry.Node = Src; 105 Args.push_back(Entry); 106 107 Entry.Node = Size; 108 Args.push_back(Entry); 109 } 110 111 char const *FunctionNames[4][3] = { 112 { "__aeabi_memcpy", "__aeabi_memcpy4", "__aeabi_memcpy8" }, 113 { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" }, 114 { "__aeabi_memset", "__aeabi_memset4", "__aeabi_memset8" }, 115 { "__aeabi_memclr", "__aeabi_memclr4", "__aeabi_memclr8" } 116 }; 117 TargetLowering::CallLoweringInfo CLI(DAG); 118 CLI.setDebugLoc(dl) 119 .setChain(Chain) 120 .setCallee( 121 TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()), 122 DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant], 123 TLI->getPointerTy(DAG.getDataLayout())), 124 std::move(Args), 0) 125 .setDiscardResult(); 126 std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); 127 128 return CallResult.second; 129 } 130 131 SDValue 132 ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, 133 SDValue Chain, 134 SDValue Dst, SDValue Src, 135 SDValue Size, unsigned Align, 136 bool isVolatile, bool AlwaysInline, 137 MachinePointerInfo DstPtrInfo, 138 MachinePointerInfo SrcPtrInfo) const { 139 const ARMSubtarget &Subtarget = 140 DAG.getMachineFunction().getSubtarget<ARMSubtarget>(); 141 // Do repeated 4-byte loads and stores. To be improved. 142 // This requires 4-byte alignment. 143 if ((Align & 3) != 0) 144 return SDValue(); 145 // This requires the copy size to be a constant, preferably 146 // within a subtarget-specific limit. 147 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 148 if (!ConstantSize) 149 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, 150 RTLIB::MEMCPY); 151 uint64_t SizeVal = ConstantSize->getZExtValue(); 152 if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold()) 153 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, 154 RTLIB::MEMCPY); 155 156 unsigned BytesLeft = SizeVal & 3; 157 unsigned NumMemOps = SizeVal >> 2; 158 unsigned EmittedNumMemOps = 0; 159 EVT VT = MVT::i32; 160 unsigned VTSize = 4; 161 unsigned i = 0; 162 // Emit a maximum of 4 loads in Thumb1 since we have fewer registers 163 const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6; 164 SDValue TFOps[6]; 165 SDValue Loads[6]; 166 uint64_t SrcOff = 0, DstOff = 0; 167 168 // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to 169 // VLDM/VSTM and make this code emit it when appropriate. This would reduce 170 // pressure on the general purpose registers. However this seems harder to map 171 // onto the register allocator's view of the world. 172 173 // The number of MEMCPY pseudo-instructions to emit. We use up to 174 // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm 175 // later on. This is a lower bound on the number of MEMCPY operations we must 176 // emit. 177 unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM; 178 179 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue); 180 181 for (unsigned I = 0; I != NumMEMCPYs; ++I) { 182 // Evenly distribute registers among MEMCPY operations to reduce register 183 // pressure. 184 unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs; 185 unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps; 186 187 Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src, 188 DAG.getConstant(NumRegs, dl, MVT::i32)); 189 Src = Dst.getValue(1); 190 Chain = Dst.getValue(2); 191 192 DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize); 193 SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize); 194 195 EmittedNumMemOps = NextEmittedNumMemOps; 196 } 197 198 if (BytesLeft == 0) 199 return Chain; 200 201 // Issue loads / stores for the trailing (1 - 3) bytes. 202 unsigned BytesLeftSave = BytesLeft; 203 i = 0; 204 while (BytesLeft) { 205 if (BytesLeft >= 2) { 206 VT = MVT::i16; 207 VTSize = 2; 208 } else { 209 VT = MVT::i8; 210 VTSize = 1; 211 } 212 213 Loads[i] = DAG.getLoad(VT, dl, Chain, 214 DAG.getNode(ISD::ADD, dl, MVT::i32, Src, 215 DAG.getConstant(SrcOff, dl, MVT::i32)), 216 SrcPtrInfo.getWithOffset(SrcOff), 217 false, false, false, 0); 218 TFOps[i] = Loads[i].getValue(1); 219 ++i; 220 SrcOff += VTSize; 221 BytesLeft -= VTSize; 222 } 223 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 224 makeArrayRef(TFOps, i)); 225 226 i = 0; 227 BytesLeft = BytesLeftSave; 228 while (BytesLeft) { 229 if (BytesLeft >= 2) { 230 VT = MVT::i16; 231 VTSize = 2; 232 } else { 233 VT = MVT::i8; 234 VTSize = 1; 235 } 236 237 TFOps[i] = DAG.getStore(Chain, dl, Loads[i], 238 DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, 239 DAG.getConstant(DstOff, dl, MVT::i32)), 240 DstPtrInfo.getWithOffset(DstOff), false, false, 0); 241 ++i; 242 DstOff += VTSize; 243 BytesLeft -= VTSize; 244 } 245 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 246 makeArrayRef(TFOps, i)); 247 } 248 249 250 SDValue ARMSelectionDAGInfo:: 251 EmitTargetCodeForMemmove(SelectionDAG &DAG, SDLoc dl, 252 SDValue Chain, 253 SDValue Dst, SDValue Src, 254 SDValue Size, unsigned Align, 255 bool isVolatile, 256 MachinePointerInfo DstPtrInfo, 257 MachinePointerInfo SrcPtrInfo) const { 258 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, 259 RTLIB::MEMMOVE); 260 } 261 262 263 SDValue ARMSelectionDAGInfo:: 264 EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, 265 SDValue Chain, SDValue Dst, 266 SDValue Src, SDValue Size, 267 unsigned Align, bool isVolatile, 268 MachinePointerInfo DstPtrInfo) const { 269 return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align, 270 RTLIB::MEMSET); 271 } 272