Home | History | Annotate | Download | only in Renderscript
      1 /*
      2  * Copyright 2012, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "bcc/Assert.h"
     18 #include "bcc/Renderscript/RSTransforms.h"
     19 
     20 #include <cstdlib>
     21 
     22 #include <llvm/IR/DerivedTypes.h>
     23 #include <llvm/IR/Function.h>
     24 #include <llvm/IR/Instructions.h>
     25 #include <llvm/IR/IRBuilder.h>
     26 #include <llvm/IR/Module.h>
     27 #include <llvm/Pass.h>
     28 #include <llvm/Support/raw_ostream.h>
     29 #include <llvm/IR/DataLayout.h>
     30 #include <llvm/IR/Type.h>
     31 
     32 #include "bcc/Config/Config.h"
     33 #include "bcc/Renderscript/RSInfo.h"
     34 #include "bcc/Support/Log.h"
     35 
     36 using namespace bcc;
     37 
     38 namespace {
     39 
     40 /* RSForEachExpandPass - This pass operates on functions that are able to be
     41  * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the
     42  * ForEach-able function to be invoked over the appropriate data cells of the
     43  * input/output allocations (adjusting other relevant parameters as we go). We
     44  * support doing this for any ForEach-able compute kernels. The new function
     45  * name is the original function name followed by ".expand". Note that we
     46  * still generate code for the original function.
     47  */
     48 class RSForEachExpandPass : public llvm::ModulePass {
     49 private:
     50   static char ID;
     51 
     52   llvm::Module *M;
     53   llvm::LLVMContext *C;
     54 
     55   const RSInfo::ExportForeachFuncListTy &mFuncs;
     56 
     57   // Turns on optimization of allocation stride values.
     58   bool mEnableStepOpt;
     59 
     60   uint32_t getRootSignature(llvm::Function *F) {
     61     const llvm::NamedMDNode *ExportForEachMetadata =
     62         M->getNamedMetadata("#rs_export_foreach");
     63 
     64     if (!ExportForEachMetadata) {
     65       llvm::SmallVector<llvm::Type*, 8> RootArgTys;
     66       for (llvm::Function::arg_iterator B = F->arg_begin(),
     67                                         E = F->arg_end();
     68            B != E;
     69            ++B) {
     70         RootArgTys.push_back(B->getType());
     71       }
     72 
     73       // For pre-ICS bitcode, we may not have signature information. In that
     74       // case, we use the size of the RootArgTys to select the number of
     75       // arguments.
     76       return (1 << RootArgTys.size()) - 1;
     77     }
     78 
     79     if (ExportForEachMetadata->getNumOperands() == 0) {
     80       return 0;
     81     }
     82 
     83     bccAssert(ExportForEachMetadata->getNumOperands() > 0);
     84 
     85     // We only handle the case for legacy root() functions here, so this is
     86     // hard-coded to look at only the first such function.
     87     llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
     88     if (SigNode != NULL && SigNode->getNumOperands() == 1) {
     89       llvm::Value *SigVal = SigNode->getOperand(0);
     90       if (SigVal->getValueID() == llvm::Value::MDStringVal) {
     91         llvm::StringRef SigString =
     92             static_cast<llvm::MDString*>(SigVal)->getString();
     93         uint32_t Signature = 0;
     94         if (SigString.getAsInteger(10, Signature)) {
     95           ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
     96           return 0;
     97         }
     98         return Signature;
     99       }
    100     }
    101 
    102     return 0;
    103   }
    104 
    105   // Get the actual value we should use to step through an allocation.
    106   // DL - Target Data size/layout information.
    107   // T - Type of allocation (should be a pointer).
    108   // OrigStep - Original step increment (root.expand() input from driver).
    109   llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *T,
    110                             llvm::Value *OrigStep) {
    111     bccAssert(DL);
    112     bccAssert(T);
    113     bccAssert(OrigStep);
    114     llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(T);
    115     llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
    116     if (mEnableStepOpt && T != VoidPtrTy && PT) {
    117       llvm::Type *ET = PT->getElementType();
    118       uint64_t ETSize = DL->getTypeAllocSize(ET);
    119       llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
    120       return llvm::ConstantInt::get(Int32Ty, ETSize);
    121     } else {
    122       return OrigStep;
    123     }
    124   }
    125 
    126   static bool hasIn(uint32_t Signature) {
    127     return Signature & 0x01;
    128   }
    129 
    130   static bool hasOut(uint32_t Signature) {
    131     return Signature & 0x02;
    132   }
    133 
    134   static bool hasUsrData(uint32_t Signature) {
    135     return Signature & 0x04;
    136   }
    137 
    138   static bool hasX(uint32_t Signature) {
    139     return Signature & 0x08;
    140   }
    141 
    142   static bool hasY(uint32_t Signature) {
    143     return Signature & 0x10;
    144   }
    145 
    146   static bool isKernel(uint32_t Signature) {
    147     return Signature & 0x20;
    148   }
    149 
    150 
    151 public:
    152   RSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs,
    153                       bool pEnableStepOpt)
    154       : ModulePass(ID), M(NULL), C(NULL), mFuncs(pForeachFuncs),
    155         mEnableStepOpt(pEnableStepOpt) {
    156   }
    157 
    158   /* Performs the actual optimization on a selected function. On success, the
    159    * Module will contain a new function of the name "<NAME>.expand" that
    160    * invokes <NAME>() in a loop with the appropriate parameters.
    161    */
    162   bool ExpandFunction(llvm::Function *F, uint32_t Signature) {
    163     ALOGV("Expanding ForEach-able Function %s", F->getName().str().c_str());
    164 
    165     if (!Signature) {
    166       Signature = getRootSignature(F);
    167       if (!Signature) {
    168         // We couldn't determine how to expand this function based on its
    169         // function signature.
    170         return false;
    171       }
    172     }
    173 
    174     llvm::DataLayout DL(M);
    175 
    176     llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
    177     llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
    178     llvm::Type *SizeTy = Int32Ty;
    179 
    180     /* Defined in frameworks/base/libs/rs/rs_hal.h:
    181      *
    182      * struct RsForEachStubParamStruct {
    183      *   const void *in;
    184      *   void *out;
    185      *   const void *usr;
    186      *   size_t usr_len;
    187      *   uint32_t x;
    188      *   uint32_t y;
    189      *   uint32_t z;
    190      *   uint32_t lod;
    191      *   enum RsAllocationCubemapFace face;
    192      *   uint32_t ar[16];
    193      * };
    194      */
    195     llvm::SmallVector<llvm::Type*, 9> StructTys;
    196     StructTys.push_back(VoidPtrTy);  // const void *in
    197     StructTys.push_back(VoidPtrTy);  // void *out
    198     StructTys.push_back(VoidPtrTy);  // const void *usr
    199     StructTys.push_back(SizeTy);     // size_t usr_len
    200     StructTys.push_back(Int32Ty);    // uint32_t x
    201     StructTys.push_back(Int32Ty);    // uint32_t y
    202     StructTys.push_back(Int32Ty);    // uint32_t z
    203     StructTys.push_back(Int32Ty);    // uint32_t lod
    204     StructTys.push_back(Int32Ty);    // enum RsAllocationCubemapFace
    205     StructTys.push_back(llvm::ArrayType::get(Int32Ty, 16));  // uint32_t ar[16]
    206 
    207     llvm::Type *ForEachStubPtrTy = llvm::StructType::create(
    208         StructTys, "RsForEachStubParamStruct")->getPointerTo();
    209 
    210     /* Create the function signature for our expanded function.
    211      * void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
    212      *       uint32_t instep, uint32_t outstep)
    213      */
    214     llvm::SmallVector<llvm::Type*, 8> ParamTys;
    215     ParamTys.push_back(ForEachStubPtrTy);  // const RsForEachStubParamStruct *p
    216     ParamTys.push_back(Int32Ty);           // uint32_t x1
    217     ParamTys.push_back(Int32Ty);           // uint32_t x2
    218     ParamTys.push_back(Int32Ty);           // uint32_t instep
    219     ParamTys.push_back(Int32Ty);           // uint32_t outstep
    220 
    221     llvm::FunctionType *FT =
    222         llvm::FunctionType::get(llvm::Type::getVoidTy(*C), ParamTys, false);
    223     llvm::Function *ExpandedFunc =
    224         llvm::Function::Create(FT,
    225                                llvm::GlobalValue::ExternalLinkage,
    226                                F->getName() + ".expand", M);
    227 
    228     // Create and name the actual arguments to this expanded function.
    229     llvm::SmallVector<llvm::Argument*, 8> ArgVec;
    230     for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(),
    231                                       E = ExpandedFunc->arg_end();
    232          B != E;
    233          ++B) {
    234       ArgVec.push_back(B);
    235     }
    236 
    237     if (ArgVec.size() != 5) {
    238       ALOGE("Incorrect number of arguments to function: %zu",
    239             ArgVec.size());
    240       return false;
    241     }
    242     llvm::Value *Arg_p = ArgVec[0];
    243     llvm::Value *Arg_x1 = ArgVec[1];
    244     llvm::Value *Arg_x2 = ArgVec[2];
    245     llvm::Value *Arg_instep = ArgVec[3];
    246     llvm::Value *Arg_outstep = ArgVec[4];
    247 
    248     Arg_p->setName("p");
    249     Arg_x1->setName("x1");
    250     Arg_x2->setName("x2");
    251     Arg_instep->setName("arg_instep");
    252     Arg_outstep->setName("arg_outstep");
    253 
    254     llvm::Value *InStep = NULL;
    255     llvm::Value *OutStep = NULL;
    256 
    257     // Construct the actual function body.
    258     llvm::BasicBlock *Begin =
    259         llvm::BasicBlock::Create(*C, "Begin", ExpandedFunc);
    260     llvm::IRBuilder<> Builder(Begin);
    261 
    262     // uint32_t X = x1;
    263     llvm::AllocaInst *AX = Builder.CreateAlloca(Int32Ty, 0, "AX");
    264     Builder.CreateStore(Arg_x1, AX);
    265 
    266     // Collect and construct the arguments for the kernel().
    267     // Note that we load any loop-invariant arguments before entering the Loop.
    268     llvm::Function::arg_iterator Args = F->arg_begin();
    269 
    270     llvm::Type *InTy = NULL;
    271     llvm::AllocaInst *AIn = NULL;
    272     if (hasIn(Signature)) {
    273       InTy = Args->getType();
    274       AIn = Builder.CreateAlloca(InTy, 0, "AIn");
    275       InStep = getStepValue(&DL, InTy, Arg_instep);
    276       InStep->setName("instep");
    277       Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
    278           Builder.CreateStructGEP(Arg_p, 0)), InTy), AIn);
    279       Args++;
    280     }
    281 
    282     llvm::Type *OutTy = NULL;
    283     llvm::AllocaInst *AOut = NULL;
    284     if (hasOut(Signature)) {
    285       OutTy = Args->getType();
    286       AOut = Builder.CreateAlloca(OutTy, 0, "AOut");
    287       OutStep = getStepValue(&DL, OutTy, Arg_outstep);
    288       OutStep->setName("outstep");
    289       Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
    290           Builder.CreateStructGEP(Arg_p, 1)), OutTy), AOut);
    291       Args++;
    292     }
    293 
    294     llvm::Value *UsrData = NULL;
    295     if (hasUsrData(Signature)) {
    296       llvm::Type *UsrDataTy = Args->getType();
    297       UsrData = Builder.CreatePointerCast(Builder.CreateLoad(
    298           Builder.CreateStructGEP(Arg_p, 2)), UsrDataTy);
    299       UsrData->setName("UsrData");
    300       Args++;
    301     }
    302 
    303     if (hasX(Signature)) {
    304       Args++;
    305     }
    306 
    307     llvm::Value *Y = NULL;
    308     if (hasY(Signature)) {
    309       Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
    310       Args++;
    311     }
    312 
    313     bccAssert(Args == F->arg_end());
    314 
    315     llvm::BasicBlock *Loop = llvm::BasicBlock::Create(*C, "Loop", ExpandedFunc);
    316     llvm::BasicBlock *Exit = llvm::BasicBlock::Create(*C, "Exit", ExpandedFunc);
    317 
    318     // if (x1 < x2) goto Loop; else goto Exit;
    319     llvm::Value *Cond = Builder.CreateICmpSLT(Arg_x1, Arg_x2);
    320     Builder.CreateCondBr(Cond, Loop, Exit);
    321 
    322     // Loop:
    323     Builder.SetInsertPoint(Loop);
    324 
    325     // Populate the actual call to kernel().
    326     llvm::SmallVector<llvm::Value*, 8> RootArgs;
    327 
    328     llvm::Value *InPtr = NULL;
    329     llvm::Value *OutPtr = NULL;
    330 
    331     if (AIn) {
    332       InPtr = Builder.CreateLoad(AIn, "InPtr");
    333       RootArgs.push_back(InPtr);
    334     }
    335 
    336     if (AOut) {
    337       OutPtr = Builder.CreateLoad(AOut, "OutPtr");
    338       RootArgs.push_back(OutPtr);
    339     }
    340 
    341     if (UsrData) {
    342       RootArgs.push_back(UsrData);
    343     }
    344 
    345     // We always have to load X, since it is used to iterate through the loop.
    346     llvm::Value *X = Builder.CreateLoad(AX, "X");
    347     if (hasX(Signature)) {
    348       RootArgs.push_back(X);
    349     }
    350 
    351     if (Y) {
    352       RootArgs.push_back(Y);
    353     }
    354 
    355     Builder.CreateCall(F, RootArgs);
    356 
    357     if (InPtr) {
    358       // InPtr += instep
    359       llvm::Value *NewIn = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
    360           Builder.CreatePtrToInt(InPtr, Int32Ty), InStep), InTy);
    361       Builder.CreateStore(NewIn, AIn);
    362     }
    363 
    364     if (OutPtr) {
    365       // OutPtr += outstep
    366       llvm::Value *NewOut = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
    367           Builder.CreatePtrToInt(OutPtr, Int32Ty), OutStep), OutTy);
    368       Builder.CreateStore(NewOut, AOut);
    369     }
    370 
    371     // X++;
    372     llvm::Value *XPlusOne =
    373         Builder.CreateNUWAdd(X, llvm::ConstantInt::get(Int32Ty, 1));
    374     Builder.CreateStore(XPlusOne, AX);
    375 
    376     // If (X < x2) goto Loop; else goto Exit;
    377     Cond = Builder.CreateICmpSLT(XPlusOne, Arg_x2);
    378     Builder.CreateCondBr(Cond, Loop, Exit);
    379 
    380     // Exit:
    381     Builder.SetInsertPoint(Exit);
    382     Builder.CreateRetVoid();
    383 
    384     return true;
    385   }
    386 
    387   /* Expand a pass-by-value kernel.
    388    */
    389   bool ExpandKernel(llvm::Function *F, uint32_t Signature) {
    390     bccAssert(isKernel(Signature));
    391     ALOGV("Expanding kernel Function %s", F->getName().str().c_str());
    392 
    393     // TODO: Refactor this to share functionality with ExpandFunction.
    394     llvm::DataLayout DL(M);
    395 
    396     llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
    397     llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
    398     llvm::Type *SizeTy = Int32Ty;
    399 
    400     /* Defined in frameworks/base/libs/rs/rs_hal.h:
    401      *
    402      * struct RsForEachStubParamStruct {
    403      *   const void *in;
    404      *   void *out;
    405      *   const void *usr;
    406      *   size_t usr_len;
    407      *   uint32_t x;
    408      *   uint32_t y;
    409      *   uint32_t z;
    410      *   uint32_t lod;
    411      *   enum RsAllocationCubemapFace face;
    412      *   uint32_t ar[16];
    413      * };
    414      */
    415     llvm::SmallVector<llvm::Type*, 9> StructTys;
    416     StructTys.push_back(VoidPtrTy);  // const void *in
    417     StructTys.push_back(VoidPtrTy);  // void *out
    418     StructTys.push_back(VoidPtrTy);  // const void *usr
    419     StructTys.push_back(SizeTy);     // size_t usr_len
    420     StructTys.push_back(Int32Ty);    // uint32_t x
    421     StructTys.push_back(Int32Ty);    // uint32_t y
    422     StructTys.push_back(Int32Ty);    // uint32_t z
    423     StructTys.push_back(Int32Ty);    // uint32_t lod
    424     StructTys.push_back(Int32Ty);    // enum RsAllocationCubemapFace
    425     StructTys.push_back(llvm::ArrayType::get(Int32Ty, 16));  // uint32_t ar[16]
    426 
    427     llvm::Type *ForEachStubPtrTy = llvm::StructType::create(
    428         StructTys, "RsForEachStubParamStruct")->getPointerTo();
    429 
    430     /* Create the function signature for our expanded function.
    431      * void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
    432      *       uint32_t instep, uint32_t outstep)
    433      */
    434     llvm::SmallVector<llvm::Type*, 8> ParamTys;
    435     ParamTys.push_back(ForEachStubPtrTy);  // const RsForEachStubParamStruct *p
    436     ParamTys.push_back(Int32Ty);           // uint32_t x1
    437     ParamTys.push_back(Int32Ty);           // uint32_t x2
    438     ParamTys.push_back(Int32Ty);           // uint32_t instep
    439     ParamTys.push_back(Int32Ty);           // uint32_t outstep
    440 
    441     llvm::FunctionType *FT =
    442         llvm::FunctionType::get(llvm::Type::getVoidTy(*C), ParamTys, false);
    443     llvm::Function *ExpandedFunc =
    444         llvm::Function::Create(FT,
    445                                llvm::GlobalValue::ExternalLinkage,
    446                                F->getName() + ".expand", M);
    447 
    448     // Create and name the actual arguments to this expanded function.
    449     llvm::SmallVector<llvm::Argument*, 8> ArgVec;
    450     for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(),
    451                                       E = ExpandedFunc->arg_end();
    452          B != E;
    453          ++B) {
    454       ArgVec.push_back(B);
    455     }
    456 
    457     if (ArgVec.size() != 5) {
    458       ALOGE("Incorrect number of arguments to function: %zu",
    459             ArgVec.size());
    460       return false;
    461     }
    462     llvm::Value *Arg_p = ArgVec[0];
    463     llvm::Value *Arg_x1 = ArgVec[1];
    464     llvm::Value *Arg_x2 = ArgVec[2];
    465     llvm::Value *Arg_instep = ArgVec[3];
    466     llvm::Value *Arg_outstep = ArgVec[4];
    467 
    468     Arg_p->setName("p");
    469     Arg_x1->setName("x1");
    470     Arg_x2->setName("x2");
    471     Arg_instep->setName("arg_instep");
    472     Arg_outstep->setName("arg_outstep");
    473 
    474     llvm::Value *InStep = NULL;
    475     llvm::Value *OutStep = NULL;
    476 
    477     // Construct the actual function body.
    478     llvm::BasicBlock *Begin =
    479         llvm::BasicBlock::Create(*C, "Begin", ExpandedFunc);
    480     llvm::IRBuilder<> Builder(Begin);
    481 
    482     // uint32_t X = x1;
    483     llvm::AllocaInst *AX = Builder.CreateAlloca(Int32Ty, 0, "AX");
    484     Builder.CreateStore(Arg_x1, AX);
    485 
    486     // Collect and construct the arguments for the kernel().
    487     // Note that we load any loop-invariant arguments before entering the Loop.
    488     llvm::Function::arg_iterator Args = F->arg_begin();
    489 
    490     llvm::Type *OutTy = NULL;
    491     llvm::AllocaInst *AOut = NULL;
    492     bool PassOutByReference = false;
    493     if (hasOut(Signature)) {
    494       llvm::Type *OutBaseTy = F->getReturnType();
    495       if (OutBaseTy->isVoidTy()) {
    496         PassOutByReference = true;
    497         OutTy = Args->getType();
    498         Args++;
    499       } else {
    500         OutTy = OutBaseTy->getPointerTo();
    501         // We don't increment Args, since we are using the actual return type.
    502       }
    503       AOut = Builder.CreateAlloca(OutTy, 0, "AOut");
    504       OutStep = getStepValue(&DL, OutTy, Arg_outstep);
    505       OutStep->setName("outstep");
    506       Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
    507           Builder.CreateStructGEP(Arg_p, 1)), OutTy), AOut);
    508     }
    509 
    510     llvm::Type *InBaseTy = NULL;
    511     llvm::Type *InTy = NULL;
    512     llvm::AllocaInst *AIn = NULL;
    513     if (hasIn(Signature)) {
    514       InBaseTy = Args->getType();
    515       InTy =InBaseTy->getPointerTo();
    516       AIn = Builder.CreateAlloca(InTy, 0, "AIn");
    517       InStep = getStepValue(&DL, InTy, Arg_instep);
    518       InStep->setName("instep");
    519       Builder.CreateStore(Builder.CreatePointerCast(Builder.CreateLoad(
    520           Builder.CreateStructGEP(Arg_p, 0)), InTy), AIn);
    521       Args++;
    522     }
    523 
    524     // No usrData parameter on kernels.
    525     bccAssert(!hasUsrData(Signature));
    526 
    527     if (hasX(Signature)) {
    528       Args++;
    529     }
    530 
    531     llvm::Value *Y = NULL;
    532     if (hasY(Signature)) {
    533       Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
    534       Args++;
    535     }
    536 
    537     bccAssert(Args == F->arg_end());
    538 
    539     llvm::BasicBlock *Loop = llvm::BasicBlock::Create(*C, "Loop", ExpandedFunc);
    540     llvm::BasicBlock *Exit = llvm::BasicBlock::Create(*C, "Exit", ExpandedFunc);
    541 
    542     // if (x1 < x2) goto Loop; else goto Exit;
    543     llvm::Value *Cond = Builder.CreateICmpSLT(Arg_x1, Arg_x2);
    544     Builder.CreateCondBr(Cond, Loop, Exit);
    545 
    546     // Loop:
    547     Builder.SetInsertPoint(Loop);
    548 
    549     // Populate the actual call to kernel().
    550     llvm::SmallVector<llvm::Value*, 8> RootArgs;
    551 
    552     llvm::Value *InPtr = NULL;
    553     llvm::Value *In = NULL;
    554     llvm::Value *OutPtr = NULL;
    555 
    556     if (PassOutByReference) {
    557       OutPtr = Builder.CreateLoad(AOut, "OutPtr");
    558       RootArgs.push_back(OutPtr);
    559     }
    560 
    561     if (AIn) {
    562       InPtr = Builder.CreateLoad(AIn, "InPtr");
    563       In = Builder.CreateLoad(InPtr, "In");
    564       RootArgs.push_back(In);
    565     }
    566 
    567     // We always have to load X, since it is used to iterate through the loop.
    568     llvm::Value *X = Builder.CreateLoad(AX, "X");
    569     if (hasX(Signature)) {
    570       RootArgs.push_back(X);
    571     }
    572 
    573     if (Y) {
    574       RootArgs.push_back(Y);
    575     }
    576 
    577     llvm::Value *RetVal = Builder.CreateCall(F, RootArgs);
    578 
    579     if (AOut && !PassOutByReference) {
    580       OutPtr = Builder.CreateLoad(AOut, "OutPtr");
    581       Builder.CreateStore(RetVal, OutPtr);
    582     }
    583 
    584     if (InPtr) {
    585       // InPtr += instep
    586       llvm::Value *NewIn = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
    587           Builder.CreatePtrToInt(InPtr, Int32Ty), InStep), InTy);
    588       Builder.CreateStore(NewIn, AIn);
    589     }
    590 
    591     if (OutPtr) {
    592       // OutPtr += outstep
    593       llvm::Value *NewOut = Builder.CreateIntToPtr(Builder.CreateNUWAdd(
    594           Builder.CreatePtrToInt(OutPtr, Int32Ty), OutStep), OutTy);
    595       Builder.CreateStore(NewOut, AOut);
    596     }
    597 
    598     // X++;
    599     llvm::Value *XPlusOne =
    600         Builder.CreateNUWAdd(X, llvm::ConstantInt::get(Int32Ty, 1));
    601     Builder.CreateStore(XPlusOne, AX);
    602 
    603     // If (X < x2) goto Loop; else goto Exit;
    604     Cond = Builder.CreateICmpSLT(XPlusOne, Arg_x2);
    605     Builder.CreateCondBr(Cond, Loop, Exit);
    606 
    607     // Exit:
    608     Builder.SetInsertPoint(Exit);
    609     Builder.CreateRetVoid();
    610 
    611     return true;
    612   }
    613 
    614   virtual bool runOnModule(llvm::Module &M) {
    615     bool Changed = false;
    616     this->M = &M;
    617     C = &M.getContext();
    618 
    619     for (RSInfo::ExportForeachFuncListTy::const_iterator
    620              func_iter = mFuncs.begin(), func_end = mFuncs.end();
    621          func_iter != func_end; func_iter++) {
    622       const char *name = func_iter->first;
    623       uint32_t signature = func_iter->second;
    624       llvm::Function *kernel = M.getFunction(name);
    625       if (kernel && isKernel(signature)) {
    626         Changed |= ExpandKernel(kernel, signature);
    627       }
    628       else if (kernel && kernel->getReturnType()->isVoidTy()) {
    629         Changed |= ExpandFunction(kernel, signature);
    630       }
    631     }
    632 
    633     return Changed;
    634   }
    635 
    636   virtual const char *getPassName() const {
    637     return "ForEach-able Function Expansion";
    638   }
    639 
    640 }; // end RSForEachExpandPass
    641 
    642 } // end anonymous namespace
    643 
    644 char RSForEachExpandPass::ID = 0;
    645 
    646 namespace bcc {
    647 
    648 llvm::ModulePass *
    649 createRSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs,
    650                           bool pEnableStepOpt){
    651   return new RSForEachExpandPass(pForeachFuncs, pEnableStepOpt);
    652 }
    653 
    654 } // end namespace bcc
    655