1 //===-- PTXSelectionDAGInfo.cpp - PTX SelectionDAG Info -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the PTXSelectionDAGInfo class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #define DEBUG_TYPE "ptx-selectiondag-info" 15 #include "PTXTargetMachine.h" 16 #include "llvm/DerivedTypes.h" 17 #include "llvm/CodeGen/SelectionDAG.h" 18 using namespace llvm; 19 20 PTXSelectionDAGInfo::PTXSelectionDAGInfo(const TargetMachine &TM) 21 : TargetSelectionDAGInfo(TM), 22 Subtarget(&TM.getSubtarget<PTXSubtarget>()) { 23 } 24 25 PTXSelectionDAGInfo::~PTXSelectionDAGInfo() { 26 } 27 28 SDValue 29 PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, 30 SDValue Chain, 31 SDValue Dst, SDValue Src, 32 SDValue Size, unsigned Align, 33 bool isVolatile, bool AlwaysInline, 34 MachinePointerInfo DstPtrInfo, 35 MachinePointerInfo SrcPtrInfo) const { 36 // Do repeated 4-byte loads and stores. To be improved. 37 // This requires 4-byte alignment. 38 if ((Align & 3) != 0) 39 return SDValue(); 40 // This requires the copy size to be a constant, preferably 41 // within a subtarget-specific limit. 42 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 43 if (!ConstantSize) 44 return SDValue(); 45 uint64_t SizeVal = ConstantSize->getZExtValue(); 46 // Always inline memcpys. In PTX, we do not have a C library that provides 47 // a memcpy function. 48 //if (!AlwaysInline) 49 // return SDValue(); 50 51 unsigned BytesLeft = SizeVal & 3; 52 unsigned NumMemOps = SizeVal >> 2; 53 unsigned EmittedNumMemOps = 0; 54 EVT VT = MVT::i32; 55 unsigned VTSize = 4; 56 unsigned i = 0; 57 const unsigned MAX_LOADS_IN_LDM = 6; 58 SDValue TFOps[MAX_LOADS_IN_LDM]; 59 SDValue Loads[MAX_LOADS_IN_LDM]; 60 uint64_t SrcOff = 0, DstOff = 0; 61 EVT PointerType = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; 62 63 // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the 64 // same number of stores. The loads and stores will get combined into 65 // ldm/stm later on. 66 while (EmittedNumMemOps < NumMemOps) { 67 for (i = 0; 68 i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { 69 Loads[i] = DAG.getLoad(VT, dl, Chain, 70 DAG.getNode(ISD::ADD, dl, PointerType, Src, 71 DAG.getConstant(SrcOff, PointerType)), 72 SrcPtrInfo.getWithOffset(SrcOff), isVolatile, 73 false, 0); 74 TFOps[i] = Loads[i].getValue(1); 75 SrcOff += VTSize; 76 } 77 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); 78 79 for (i = 0; 80 i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { 81 TFOps[i] = DAG.getStore(Chain, dl, Loads[i], 82 DAG.getNode(ISD::ADD, dl, PointerType, Dst, 83 DAG.getConstant(DstOff, PointerType)), 84 DstPtrInfo.getWithOffset(DstOff), 85 isVolatile, false, 0); 86 DstOff += VTSize; 87 } 88 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); 89 90 EmittedNumMemOps += i; 91 } 92 93 if (BytesLeft == 0) 94 return Chain; 95 96 // Issue loads / stores for the trailing (1 - 3) bytes. 97 unsigned BytesLeftSave = BytesLeft; 98 i = 0; 99 while (BytesLeft) { 100 if (BytesLeft >= 2) { 101 VT = MVT::i16; 102 VTSize = 2; 103 } else { 104 VT = MVT::i8; 105 VTSize = 1; 106 } 107 108 Loads[i] = DAG.getLoad(VT, dl, Chain, 109 DAG.getNode(ISD::ADD, dl, PointerType, Src, 110 DAG.getConstant(SrcOff, PointerType)), 111 SrcPtrInfo.getWithOffset(SrcOff), false, false, 0); 112 TFOps[i] = Loads[i].getValue(1); 113 ++i; 114 SrcOff += VTSize; 115 BytesLeft -= VTSize; 116 } 117 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); 118 119 i = 0; 120 BytesLeft = BytesLeftSave; 121 while (BytesLeft) { 122 if (BytesLeft >= 2) { 123 VT = MVT::i16; 124 VTSize = 2; 125 } else { 126 VT = MVT::i8; 127 VTSize = 1; 128 } 129 130 TFOps[i] = DAG.getStore(Chain, dl, Loads[i], 131 DAG.getNode(ISD::ADD, dl, PointerType, Dst, 132 DAG.getConstant(DstOff, PointerType)), 133 DstPtrInfo.getWithOffset(DstOff), false, false, 0); 134 ++i; 135 DstOff += VTSize; 136 BytesLeft -= VTSize; 137 } 138 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); 139 } 140 141 SDValue PTXSelectionDAGInfo:: 142 EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, 143 SDValue Chain, SDValue Dst, 144 SDValue Src, SDValue Size, 145 unsigned Align, bool isVolatile, 146 MachinePointerInfo DstPtrInfo) const { 147 llvm_unreachable("memset lowering not implemented for PTX yet"); 148 } 149 150