1 //===-- PTXSelectionDAGInfo.cpp - PTX SelectionDAG Info -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the PTXSelectionDAGInfo class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #define DEBUG_TYPE "ptx-selectiondag-info" 15 #include "PTXTargetMachine.h" 16 #include "llvm/DerivedTypes.h" 17 #include "llvm/CodeGen/SelectionDAG.h" 18 using namespace llvm; 19 20 PTXSelectionDAGInfo::PTXSelectionDAGInfo(const TargetMachine &TM) 21 : TargetSelectionDAGInfo(TM), 22 Subtarget(&TM.getSubtarget<PTXSubtarget>()) { 23 } 24 25 PTXSelectionDAGInfo::~PTXSelectionDAGInfo() { 26 } 27 28 SDValue 29 PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, 30 SDValue Chain, 31 SDValue Dst, SDValue Src, 32 SDValue Size, unsigned Align, 33 bool isVolatile, bool AlwaysInline, 34 MachinePointerInfo DstPtrInfo, 35 MachinePointerInfo SrcPtrInfo) const { 36 // Do repeated 4-byte loads and stores. To be improved. 37 // This requires 4-byte alignment. 38 if ((Align & 3) != 0) 39 return SDValue(); 40 // This requires the copy size to be a constant, preferably 41 // within a subtarget-specific limit. 42 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 43 if (!ConstantSize) 44 return SDValue(); 45 uint64_t SizeVal = ConstantSize->getZExtValue(); 46 // Always inline memcpys. In PTX, we do not have a C library that provides 47 // a memcpy function. 48 //if (!AlwaysInline) 49 // return SDValue(); 50 51 unsigned BytesLeft = SizeVal & 3; 52 unsigned NumMemOps = SizeVal >> 2; 53 unsigned EmittedNumMemOps = 0; 54 EVT VT = MVT::i32; 55 unsigned VTSize = 4; 56 unsigned i = 0; 57 const unsigned MAX_LOADS_IN_LDM = 6; 58 SDValue TFOps[MAX_LOADS_IN_LDM]; 59 SDValue Loads[MAX_LOADS_IN_LDM]; 60 uint64_t SrcOff = 0, DstOff = 0; 61 EVT PointerType = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; 62 63 // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the 64 // same number of stores. The loads and stores will get combined into 65 // ldm/stm later on. 66 while (EmittedNumMemOps < NumMemOps) { 67 for (i = 0; 68 i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { 69 Loads[i] = DAG.getLoad(VT, dl, Chain, 70 DAG.getNode(ISD::ADD, dl, PointerType, Src, 71 DAG.getConstant(SrcOff, PointerType)), 72 SrcPtrInfo.getWithOffset(SrcOff), isVolatile, 73 false, false, 0); 74 TFOps[i] = Loads[i].getValue(1); 75 SrcOff += VTSize; 76 } 77 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); 78 79 for (i = 0; 80 i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { 81 TFOps[i] = DAG.getStore(Chain, dl, Loads[i], 82 DAG.getNode(ISD::ADD, dl, PointerType, Dst, 83 DAG.getConstant(DstOff, PointerType)), 84 DstPtrInfo.getWithOffset(DstOff), 85 isVolatile, false, 0); 86 DstOff += VTSize; 87 } 88 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); 89 90 EmittedNumMemOps += i; 91 } 92 93 if (BytesLeft == 0) 94 return Chain; 95 96 // Issue loads / stores for the trailing (1 - 3) bytes. 97 unsigned BytesLeftSave = BytesLeft; 98 i = 0; 99 while (BytesLeft) { 100 if (BytesLeft >= 2) { 101 VT = MVT::i16; 102 VTSize = 2; 103 } else { 104 VT = MVT::i8; 105 VTSize = 1; 106 } 107 108 Loads[i] = DAG.getLoad(VT, dl, Chain, 109 DAG.getNode(ISD::ADD, dl, PointerType, Src, 110 DAG.getConstant(SrcOff, PointerType)), 111 SrcPtrInfo.getWithOffset(SrcOff), false, false, 112 false, 0); 113 TFOps[i] = Loads[i].getValue(1); 114 ++i; 115 SrcOff += VTSize; 116 BytesLeft -= VTSize; 117 } 118 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); 119 120 i = 0; 121 BytesLeft = BytesLeftSave; 122 while (BytesLeft) { 123 if (BytesLeft >= 2) { 124 VT = MVT::i16; 125 VTSize = 2; 126 } else { 127 VT = MVT::i8; 128 VTSize = 1; 129 } 130 131 TFOps[i] = DAG.getStore(Chain, dl, Loads[i], 132 DAG.getNode(ISD::ADD, dl, PointerType, Dst, 133 DAG.getConstant(DstOff, PointerType)), 134 DstPtrInfo.getWithOffset(DstOff), false, false, 0); 135 ++i; 136 DstOff += VTSize; 137 BytesLeft -= VTSize; 138 } 139 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); 140 } 141 142 SDValue PTXSelectionDAGInfo:: 143 EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, 144 SDValue Chain, SDValue Dst, 145 SDValue Src, SDValue Size, 146 unsigned Align, bool isVolatile, 147 MachinePointerInfo DstPtrInfo) const { 148 llvm_unreachable("memset lowering not implemented for PTX yet"); 149 } 150 151