1 //===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the X86SelectionDAGInfo class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "X86InstrInfo.h" 15 #include "X86ISelLowering.h" 16 #include "X86RegisterInfo.h" 17 #include "X86Subtarget.h" 18 #include "X86SelectionDAGInfo.h" 19 #include "llvm/CodeGen/SelectionDAG.h" 20 #include "llvm/IR/DerivedTypes.h" 21 #include "llvm/Target/TargetLowering.h" 22 23 using namespace llvm; 24 25 #define DEBUG_TYPE "x86-selectiondag-info" 26 27 X86SelectionDAGInfo::X86SelectionDAGInfo(const DataLayout &DL) 28 : TargetSelectionDAGInfo(&DL) {} 29 30 X86SelectionDAGInfo::~X86SelectionDAGInfo() {} 31 32 bool X86SelectionDAGInfo::isBaseRegConflictPossible( 33 SelectionDAG &DAG, ArrayRef<unsigned> ClobberSet) const { 34 // We cannot use TRI->hasBasePointer() until *after* we select all basic 35 // blocks. Legalization may introduce new stack temporaries with large 36 // alignment requirements. Fall back to generic code if there are any 37 // dynamic stack adjustments (hopefully rare) and the base pointer would 38 // conflict if we had to use it. 39 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 40 if (!MFI->hasVarSizedObjects() && !MFI->hasInlineAsmWithSPAdjust()) 41 return false; 42 43 const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>( 44 DAG.getSubtarget().getRegisterInfo()); 45 unsigned BaseReg = TRI->getBaseRegister(); 46 for (unsigned R : ClobberSet) 47 if (BaseReg == R) 48 return true; 49 return false; 50 } 51 52 SDValue 53 X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, 54 SDValue Chain, 55 SDValue Dst, SDValue Src, 56 SDValue Size, unsigned Align, 57 bool isVolatile, 58 MachinePointerInfo DstPtrInfo) const { 59 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 60 const X86Subtarget &Subtarget = 61 DAG.getMachineFunction().getSubtarget<X86Subtarget>(); 62 63 #ifndef NDEBUG 64 // If the base register might conflict with our physical registers, bail out. 65 const unsigned ClobberSet[] = {X86::RCX, X86::RAX, X86::RDI, 66 X86::ECX, X86::EAX, X86::EDI}; 67 assert(!isBaseRegConflictPossible(DAG, ClobberSet)); 68 #endif 69 70 // If to a segment-relative address space, use the default lowering. 71 if (DstPtrInfo.getAddrSpace() >= 256) 72 return SDValue(); 73 74 // If not DWORD aligned or size is more than the threshold, call the library. 75 // The libc version is likely to be faster for these cases. It can use the 76 // address value and run time information about the CPU. 77 if ((Align & 3) != 0 || !ConstantSize || 78 ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) { 79 // Check to see if there is a specialized entry-point for memory zeroing. 80 ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); 81 82 if (const char *bzeroEntry = V && 83 V->isNullValue() ? Subtarget.getBZeroEntry() : nullptr) { 84 EVT IntPtr = DAG.getTargetLoweringInfo().getPointerTy(); 85 Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); 86 TargetLowering::ArgListTy Args; 87 TargetLowering::ArgListEntry Entry; 88 Entry.Node = Dst; 89 Entry.Ty = IntPtrTy; 90 Args.push_back(Entry); 91 Entry.Node = Size; 92 Args.push_back(Entry); 93 94 TargetLowering::CallLoweringInfo CLI(DAG); 95 CLI.setDebugLoc(dl).setChain(Chain) 96 .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), 97 DAG.getExternalSymbol(bzeroEntry, IntPtr), std::move(Args), 98 0) 99 .setDiscardResult(); 100 101 std::pair<SDValue,SDValue> CallResult = DAG.getTargetLoweringInfo().LowerCallTo(CLI); 102 return CallResult.second; 103 } 104 105 // Otherwise have the target-independent code call memset. 106 return SDValue(); 107 } 108 109 uint64_t SizeVal = ConstantSize->getZExtValue(); 110 SDValue InFlag; 111 EVT AVT; 112 SDValue Count; 113 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src); 114 unsigned BytesLeft = 0; 115 bool TwoRepStos = false; 116 if (ValC) { 117 unsigned ValReg; 118 uint64_t Val = ValC->getZExtValue() & 255; 119 120 // If the value is a constant, then we can potentially use larger sets. 121 switch (Align & 3) { 122 case 2: // WORD aligned 123 AVT = MVT::i16; 124 ValReg = X86::AX; 125 Val = (Val << 8) | Val; 126 break; 127 case 0: // DWORD aligned 128 AVT = MVT::i32; 129 ValReg = X86::EAX; 130 Val = (Val << 8) | Val; 131 Val = (Val << 16) | Val; 132 if (Subtarget.is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned 133 AVT = MVT::i64; 134 ValReg = X86::RAX; 135 Val = (Val << 32) | Val; 136 } 137 break; 138 default: // Byte aligned 139 AVT = MVT::i8; 140 ValReg = X86::AL; 141 Count = DAG.getIntPtrConstant(SizeVal); 142 break; 143 } 144 145 if (AVT.bitsGT(MVT::i8)) { 146 unsigned UBytes = AVT.getSizeInBits() / 8; 147 Count = DAG.getIntPtrConstant(SizeVal / UBytes); 148 BytesLeft = SizeVal % UBytes; 149 } 150 151 Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT), 152 InFlag); 153 InFlag = Chain.getValue(1); 154 } else { 155 AVT = MVT::i8; 156 Count = DAG.getIntPtrConstant(SizeVal); 157 Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag); 158 InFlag = Chain.getValue(1); 159 } 160 161 Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX, 162 Count, InFlag); 163 InFlag = Chain.getValue(1); 164 Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI, 165 Dst, InFlag); 166 InFlag = Chain.getValue(1); 167 168 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); 169 SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; 170 Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); 171 172 if (TwoRepStos) { 173 InFlag = Chain.getValue(1); 174 Count = Size; 175 EVT CVT = Count.getValueType(); 176 SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, 177 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 178 Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : 179 X86::ECX, 180 Left, InFlag); 181 InFlag = Chain.getValue(1); 182 Tys = DAG.getVTList(MVT::Other, MVT::Glue); 183 SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; 184 Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); 185 } else if (BytesLeft) { 186 // Handle the last 1 - 7 bytes. 187 unsigned Offset = SizeVal - BytesLeft; 188 EVT AddrVT = Dst.getValueType(); 189 EVT SizeVT = Size.getValueType(); 190 191 Chain = DAG.getMemset(Chain, dl, 192 DAG.getNode(ISD::ADD, dl, AddrVT, Dst, 193 DAG.getConstant(Offset, AddrVT)), 194 Src, 195 DAG.getConstant(BytesLeft, SizeVT), 196 Align, isVolatile, false, 197 DstPtrInfo.getWithOffset(Offset)); 198 } 199 200 // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. 201 return Chain; 202 } 203 204 SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( 205 SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, 206 SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, 207 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { 208 // This requires the copy size to be a constant, preferably 209 // within a subtarget-specific limit. 210 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 211 const X86Subtarget &Subtarget = 212 DAG.getMachineFunction().getSubtarget<X86Subtarget>(); 213 if (!ConstantSize) 214 return SDValue(); 215 uint64_t SizeVal = ConstantSize->getZExtValue(); 216 if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold()) 217 return SDValue(); 218 219 /// If not DWORD aligned, it is more efficient to call the library. However 220 /// if calling the library is not allowed (AlwaysInline), then soldier on as 221 /// the code generated here is better than the long load-store sequence we 222 /// would otherwise get. 223 if (!AlwaysInline && (Align & 3) != 0) 224 return SDValue(); 225 226 // If to a segment-relative address space, use the default lowering. 227 if (DstPtrInfo.getAddrSpace() >= 256 || 228 SrcPtrInfo.getAddrSpace() >= 256) 229 return SDValue(); 230 231 // If the base register might conflict with our physical registers, bail out. 232 const unsigned ClobberSet[] = {X86::RCX, X86::RSI, X86::RDI, 233 X86::ECX, X86::ESI, X86::EDI}; 234 if (isBaseRegConflictPossible(DAG, ClobberSet)) 235 return SDValue(); 236 237 MVT AVT; 238 if (Align & 1) 239 AVT = MVT::i8; 240 else if (Align & 2) 241 AVT = MVT::i16; 242 else if (Align & 4) 243 // DWORD aligned 244 AVT = MVT::i32; 245 else 246 // QWORD aligned 247 AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; 248 249 unsigned UBytes = AVT.getSizeInBits() / 8; 250 unsigned CountVal = SizeVal / UBytes; 251 SDValue Count = DAG.getIntPtrConstant(CountVal); 252 unsigned BytesLeft = SizeVal % UBytes; 253 254 SDValue InFlag; 255 Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : 256 X86::ECX, 257 Count, InFlag); 258 InFlag = Chain.getValue(1); 259 Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : 260 X86::EDI, 261 Dst, InFlag); 262 InFlag = Chain.getValue(1); 263 Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI : 264 X86::ESI, 265 Src, InFlag); 266 InFlag = Chain.getValue(1); 267 268 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); 269 SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; 270 SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops); 271 272 SmallVector<SDValue, 4> Results; 273 Results.push_back(RepMovs); 274 if (BytesLeft) { 275 // Handle the last 1 - 7 bytes. 276 unsigned Offset = SizeVal - BytesLeft; 277 EVT DstVT = Dst.getValueType(); 278 EVT SrcVT = Src.getValueType(); 279 EVT SizeVT = Size.getValueType(); 280 Results.push_back(DAG.getMemcpy(Chain, dl, 281 DAG.getNode(ISD::ADD, dl, DstVT, Dst, 282 DAG.getConstant(Offset, DstVT)), 283 DAG.getNode(ISD::ADD, dl, SrcVT, Src, 284 DAG.getConstant(Offset, SrcVT)), 285 DAG.getConstant(BytesLeft, SizeVT), 286 Align, isVolatile, AlwaysInline, false, 287 DstPtrInfo.getWithOffset(Offset), 288 SrcPtrInfo.getWithOffset(Offset))); 289 } 290 291 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results); 292 } 293