Home | History | Annotate | Download | only in PTX
      1 //===-- PTXSelectionDAGInfo.cpp - PTX SelectionDAG Info -------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements the PTXSelectionDAGInfo class.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #define DEBUG_TYPE "ptx-selectiondag-info"
     15 #include "PTXTargetMachine.h"
     16 #include "llvm/DerivedTypes.h"
     17 #include "llvm/CodeGen/SelectionDAG.h"
     18 using namespace llvm;
     19 
     20 PTXSelectionDAGInfo::PTXSelectionDAGInfo(const TargetMachine &TM)
     21   : TargetSelectionDAGInfo(TM),
     22     Subtarget(&TM.getSubtarget<PTXSubtarget>()) {
     23 }
     24 
     25 PTXSelectionDAGInfo::~PTXSelectionDAGInfo() {
     26 }
     27 
     28 SDValue
     29 PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
     30                                              SDValue Chain,
     31                                              SDValue Dst, SDValue Src,
     32                                              SDValue Size, unsigned Align,
     33                                              bool isVolatile, bool AlwaysInline,
     34                                              MachinePointerInfo DstPtrInfo,
     35                                           MachinePointerInfo SrcPtrInfo) const {
     36   // Do repeated 4-byte loads and stores. To be improved.
     37   // This requires 4-byte alignment.
     38   if ((Align & 3) != 0)
     39     return SDValue();
     40   // This requires the copy size to be a constant, preferably
     41   // within a subtarget-specific limit.
     42   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
     43   if (!ConstantSize)
     44     return SDValue();
     45   uint64_t SizeVal = ConstantSize->getZExtValue();
     46   // Always inline memcpys. In PTX, we do not have a C library that provides
     47   // a memcpy function.
     48   //if (!AlwaysInline)
     49   //  return SDValue();
     50 
     51   unsigned BytesLeft = SizeVal & 3;
     52   unsigned NumMemOps = SizeVal >> 2;
     53   unsigned EmittedNumMemOps = 0;
     54   EVT VT = MVT::i32;
     55   unsigned VTSize = 4;
     56   unsigned i = 0;
     57   const unsigned MAX_LOADS_IN_LDM = 6;
     58   SDValue TFOps[MAX_LOADS_IN_LDM];
     59   SDValue Loads[MAX_LOADS_IN_LDM];
     60   uint64_t SrcOff = 0, DstOff = 0;
     61   EVT PointerType = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
     62 
     63   // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
     64   // same number of stores.  The loads and stores will get combined into
     65   // ldm/stm later on.
     66   while (EmittedNumMemOps < NumMemOps) {
     67     for (i = 0;
     68          i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
     69       Loads[i] = DAG.getLoad(VT, dl, Chain,
     70                              DAG.getNode(ISD::ADD, dl, PointerType, Src,
     71                                          DAG.getConstant(SrcOff, PointerType)),
     72                              SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
     73                              false, false, 0);
     74       TFOps[i] = Loads[i].getValue(1);
     75       SrcOff += VTSize;
     76     }
     77     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
     78 
     79     for (i = 0;
     80          i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
     81       TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
     82                               DAG.getNode(ISD::ADD, dl, PointerType, Dst,
     83                                           DAG.getConstant(DstOff, PointerType)),
     84                               DstPtrInfo.getWithOffset(DstOff),
     85                               isVolatile, false, 0);
     86       DstOff += VTSize;
     87     }
     88     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
     89 
     90     EmittedNumMemOps += i;
     91   }
     92 
     93   if (BytesLeft == 0)
     94     return Chain;
     95 
     96   // Issue loads / stores for the trailing (1 - 3) bytes.
     97   unsigned BytesLeftSave = BytesLeft;
     98   i = 0;
     99   while (BytesLeft) {
    100     if (BytesLeft >= 2) {
    101       VT = MVT::i16;
    102       VTSize = 2;
    103     } else {
    104       VT = MVT::i8;
    105       VTSize = 1;
    106     }
    107 
    108     Loads[i] = DAG.getLoad(VT, dl, Chain,
    109                            DAG.getNode(ISD::ADD, dl, PointerType, Src,
    110                                        DAG.getConstant(SrcOff, PointerType)),
    111                            SrcPtrInfo.getWithOffset(SrcOff), false, false,
    112                            false, 0);
    113     TFOps[i] = Loads[i].getValue(1);
    114     ++i;
    115     SrcOff += VTSize;
    116     BytesLeft -= VTSize;
    117   }
    118   Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
    119 
    120   i = 0;
    121   BytesLeft = BytesLeftSave;
    122   while (BytesLeft) {
    123     if (BytesLeft >= 2) {
    124       VT = MVT::i16;
    125       VTSize = 2;
    126     } else {
    127       VT = MVT::i8;
    128       VTSize = 1;
    129     }
    130 
    131     TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
    132                             DAG.getNode(ISD::ADD, dl, PointerType, Dst,
    133                                         DAG.getConstant(DstOff, PointerType)),
    134                             DstPtrInfo.getWithOffset(DstOff), false, false, 0);
    135     ++i;
    136     DstOff += VTSize;
    137     BytesLeft -= VTSize;
    138   }
    139   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
    140 }
    141 
    142 SDValue PTXSelectionDAGInfo::
    143 EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
    144                         SDValue Chain, SDValue Dst,
    145                         SDValue Src, SDValue Size,
    146                         unsigned Align, bool isVolatile,
    147                         MachinePointerInfo DstPtrInfo) const {
    148   llvm_unreachable("memset lowering not implemented for PTX yet");
    149 }
    150 
    151