Home | History | Annotate | Download | only in Renderscript
      1 /*
      2  * Copyright 2012, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "bcc/Assert.h"
     18 #include "bcc/Renderscript/RSTransforms.h"
     19 
     20 #include <cstdlib>
     21 
     22 #include <llvm/IR/DerivedTypes.h>
     23 #include <llvm/IR/Function.h>
     24 #include <llvm/IR/Instructions.h>
     25 #include <llvm/IR/IRBuilder.h>
     26 #include <llvm/IR/MDBuilder.h>
     27 #include <llvm/IR/Module.h>
     28 #include <llvm/Pass.h>
     29 #include <llvm/Support/raw_ostream.h>
     30 #include <llvm/IR/DataLayout.h>
     31 #include <llvm/IR/Function.h>
     32 #include <llvm/IR/Type.h>
     33 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
     34 
     35 #include "bcc/Config/Config.h"
     36 #include "bcc/Renderscript/RSInfo.h"
     37 #include "bcc/Support/Log.h"
     38 
     39 #include "bcinfo/MetadataExtractor.h"
     40 
     41 using namespace bcc;
     42 
     43 namespace {
     44 
     45 /* RSForEachExpandPass - This pass operates on functions that are able to be
     46  * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the
     47  * ForEach-able function to be invoked over the appropriate data cells of the
     48  * input/output allocations (adjusting other relevant parameters as we go). We
     49  * support doing this for any ForEach-able compute kernels. The new function
     50  * name is the original function name followed by ".expand". Note that we
     51  * still generate code for the original function.
     52  */
     53 class RSForEachExpandPass : public llvm::ModulePass {
     54 private:
     55   static char ID;
     56 
     57   llvm::Module *M;
     58   llvm::LLVMContext *C;
     59 
     60   const RSInfo::ExportForeachFuncListTy &mFuncs;
     61 
     62   // Turns on optimization of allocation stride values.
     63   bool mEnableStepOpt;
     64 
     65   uint32_t getRootSignature(llvm::Function *F) {
     66     const llvm::NamedMDNode *ExportForEachMetadata =
     67         M->getNamedMetadata("#rs_export_foreach");
     68 
     69     if (!ExportForEachMetadata) {
     70       llvm::SmallVector<llvm::Type*, 8> RootArgTys;
     71       for (llvm::Function::arg_iterator B = F->arg_begin(),
     72                                         E = F->arg_end();
     73            B != E;
     74            ++B) {
     75         RootArgTys.push_back(B->getType());
     76       }
     77 
     78       // For pre-ICS bitcode, we may not have signature information. In that
     79       // case, we use the size of the RootArgTys to select the number of
     80       // arguments.
     81       return (1 << RootArgTys.size()) - 1;
     82     }
     83 
     84     if (ExportForEachMetadata->getNumOperands() == 0) {
     85       return 0;
     86     }
     87 
     88     bccAssert(ExportForEachMetadata->getNumOperands() > 0);
     89 
     90     // We only handle the case for legacy root() functions here, so this is
     91     // hard-coded to look at only the first such function.
     92     llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
     93     if (SigNode != NULL && SigNode->getNumOperands() == 1) {
     94       llvm::Value *SigVal = SigNode->getOperand(0);
     95       if (SigVal->getValueID() == llvm::Value::MDStringVal) {
     96         llvm::StringRef SigString =
     97             static_cast<llvm::MDString*>(SigVal)->getString();
     98         uint32_t Signature = 0;
     99         if (SigString.getAsInteger(10, Signature)) {
    100           ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
    101           return 0;
    102         }
    103         return Signature;
    104       }
    105     }
    106 
    107     return 0;
    108   }
    109 
    110   // Get the actual value we should use to step through an allocation.
    111   //
    112   // Normally the value we use to step through an allocation is given to us by
    113   // the driver. However, for certain primitive data types, we can derive an
    114   // integer constant for the step value. We use this integer constant whenever
    115   // possible to allow further compiler optimizations to take place.
    116   //
    117   // DL - Target Data size/layout information.
    118   // T - Type of allocation (should be a pointer).
    119   // OrigStep - Original step increment (root.expand() input from driver).
    120   llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *T,
    121                             llvm::Value *OrigStep) {
    122     bccAssert(DL);
    123     bccAssert(T);
    124     bccAssert(OrigStep);
    125     llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(T);
    126     llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
    127     if (mEnableStepOpt && T != VoidPtrTy && PT) {
    128       llvm::Type *ET = PT->getElementType();
    129       uint64_t ETSize = DL->getTypeAllocSize(ET);
    130       llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
    131       return llvm::ConstantInt::get(Int32Ty, ETSize);
    132     } else {
    133       return OrigStep;
    134     }
    135   }
    136 
    137   /// @brief Returns the type of the ForEach stub parameter structure.
    138   ///
    139   /// Renderscript uses a single structure in which all parameters are passed
    140   /// to keep the signature of the expanded function independent of the
    141   /// parameters passed to it.
    142   llvm::Type *getForeachStubTy() {
    143     llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*C);
    144     llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
    145     llvm::Type *SizeTy = Int32Ty;
    146     /* Defined in frameworks/base/libs/rs/rs_hal.h:
    147      *
    148      * struct RsForEachStubParamStruct {
    149      *   const void *in;
    150      *   void *out;
    151      *   const void *usr;
    152      *   size_t usr_len;
    153      *   uint32_t x;
    154      *   uint32_t y;
    155      *   uint32_t z;
    156      *   uint32_t lod;
    157      *   enum RsAllocationCubemapFace face;
    158      *   uint32_t ar[16];
    159      * };
    160      */
    161     llvm::SmallVector<llvm::Type*, 9> StructTys;
    162     StructTys.push_back(VoidPtrTy);  // const void *in
    163     StructTys.push_back(VoidPtrTy);  // void *out
    164     StructTys.push_back(VoidPtrTy);  // const void *usr
    165     StructTys.push_back(SizeTy);     // size_t usr_len
    166     StructTys.push_back(Int32Ty);    // uint32_t x
    167     StructTys.push_back(Int32Ty);    // uint32_t y
    168     StructTys.push_back(Int32Ty);    // uint32_t z
    169     StructTys.push_back(Int32Ty);    // uint32_t lod
    170     StructTys.push_back(Int32Ty);    // enum RsAllocationCubemapFace
    171     StructTys.push_back(llvm::ArrayType::get(Int32Ty, 16));  // uint32_t ar[16]
    172 
    173     return llvm::StructType::create(StructTys, "RsForEachStubParamStruct");
    174   }
    175 
    176   /// @brief Create skeleton of the expanded function.
    177   ///
    178   /// This creates a function with the following signature:
    179   ///
    180   ///   void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
    181   ///         uint32_t instep, uint32_t outstep)
    182   ///
    183   llvm::Function *createEmptyExpandedFunction(llvm::StringRef OldName) {
    184     llvm::Type *ForEachStubPtrTy = getForeachStubTy()->getPointerTo();
    185     llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*C);
    186 
    187     llvm::SmallVector<llvm::Type*, 8> ParamTys;
    188     ParamTys.push_back(ForEachStubPtrTy);  // const RsForEachStubParamStruct *p
    189     ParamTys.push_back(Int32Ty);           // uint32_t x1
    190     ParamTys.push_back(Int32Ty);           // uint32_t x2
    191     ParamTys.push_back(Int32Ty);           // uint32_t instep
    192     ParamTys.push_back(Int32Ty);           // uint32_t outstep
    193 
    194     llvm::FunctionType *FT =
    195         llvm::FunctionType::get(llvm::Type::getVoidTy(*C), ParamTys, false);
    196     llvm::Function *F =
    197         llvm::Function::Create(FT, llvm::GlobalValue::ExternalLinkage,
    198                                OldName + ".expand", M);
    199 
    200     llvm::Function::arg_iterator AI = F->arg_begin();
    201 
    202     AI->setName("p");
    203     AI++;
    204     AI->setName("x1");
    205     AI++;
    206     AI->setName("x2");
    207     AI++;
    208     AI->setName("arg_instep");
    209     AI++;
    210     AI->setName("arg_outstep");
    211     AI++;
    212 
    213     assert(AI == F->arg_end());
    214 
    215     llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*C, "Begin", F);
    216     llvm::IRBuilder<> Builder(Begin);
    217     Builder.CreateRetVoid();
    218 
    219     return F;
    220   }
    221 
    222   /// @brief Create an empty loop
    223   ///
    224   /// Create a loop of the form:
    225   ///
    226   /// for (i = LowerBound; i < UpperBound; i++)
    227   ///   ;
    228   ///
    229   /// After the loop has been created, the builder is set such that
    230   /// instructions can be added to the loop body.
    231   ///
    232   /// @param Builder The builder to use to build this loop. The current
    233   ///                position of the builder is the position the loop
    234   ///                will be inserted.
    235   /// @param LowerBound The first value of the loop iterator
    236   /// @param UpperBound The maximal value of the loop iterator
    237   /// @param LoopIV A reference that will be set to the loop iterator.
    238   /// @return The BasicBlock that will be executed after the loop.
    239   llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder,
    240                                llvm::Value *LowerBound,
    241                                llvm::Value *UpperBound,
    242                                llvm::PHINode **LoopIV) {
    243     assert(LowerBound->getType() == UpperBound->getType());
    244 
    245     llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB;
    246     llvm::Value *Cond, *IVNext;
    247     llvm::PHINode *IV;
    248 
    249     CondBB = Builder.GetInsertBlock();
    250     AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), this);
    251     HeaderBB = llvm::BasicBlock::Create(*C, "Loop", CondBB->getParent());
    252 
    253     // if (LowerBound < Upperbound)
    254     //   goto LoopHeader
    255     // else
    256     //   goto AfterBB
    257     CondBB->getTerminator()->eraseFromParent();
    258     Builder.SetInsertPoint(CondBB);
    259     Cond = Builder.CreateICmpULT(LowerBound, UpperBound);
    260     Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
    261 
    262     // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ]
    263     // iv.next = iv + 1
    264     // if (iv.next < Upperbound)
    265     //   goto LoopHeader
    266     // else
    267     //   goto AfterBB
    268     Builder.SetInsertPoint(HeaderBB);
    269     IV = Builder.CreatePHI(LowerBound->getType(), 2, "X");
    270     IV->addIncoming(LowerBound, CondBB);
    271     IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1));
    272     IV->addIncoming(IVNext, HeaderBB);
    273     Cond = Builder.CreateICmpULT(IVNext, UpperBound);
    274     Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
    275     AfterBB->setName("Exit");
    276     Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
    277     *LoopIV = IV;
    278     return AfterBB;
    279   }
    280 
    281 public:
    282   RSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs,
    283                       bool pEnableStepOpt)
    284       : ModulePass(ID), M(NULL), C(NULL), mFuncs(pForeachFuncs),
    285         mEnableStepOpt(pEnableStepOpt) {
    286   }
    287 
    288   /* Performs the actual optimization on a selected function. On success, the
    289    * Module will contain a new function of the name "<NAME>.expand" that
    290    * invokes <NAME>() in a loop with the appropriate parameters.
    291    */
    292   bool ExpandFunction(llvm::Function *F, uint32_t Signature) {
    293     ALOGV("Expanding ForEach-able Function %s", F->getName().str().c_str());
    294 
    295     if (!Signature) {
    296       Signature = getRootSignature(F);
    297       if (!Signature) {
    298         // We couldn't determine how to expand this function based on its
    299         // function signature.
    300         return false;
    301       }
    302     }
    303 
    304     llvm::DataLayout DL(M);
    305 
    306     llvm::Function *ExpandedFunc = createEmptyExpandedFunction(F->getName());
    307 
    308     // Create and name the actual arguments to this expanded function.
    309     llvm::SmallVector<llvm::Argument*, 8> ArgVec;
    310     for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(),
    311                                       E = ExpandedFunc->arg_end();
    312          B != E;
    313          ++B) {
    314       ArgVec.push_back(B);
    315     }
    316 
    317     if (ArgVec.size() != 5) {
    318       ALOGE("Incorrect number of arguments to function: %zu",
    319             ArgVec.size());
    320       return false;
    321     }
    322     llvm::Value *Arg_p = ArgVec[0];
    323     llvm::Value *Arg_x1 = ArgVec[1];
    324     llvm::Value *Arg_x2 = ArgVec[2];
    325     llvm::Value *Arg_instep = ArgVec[3];
    326     llvm::Value *Arg_outstep = ArgVec[4];
    327 
    328     llvm::Value *InStep = NULL;
    329     llvm::Value *OutStep = NULL;
    330 
    331     // Construct the actual function body.
    332     llvm::IRBuilder<> Builder(ExpandedFunc->getEntryBlock().begin());
    333 
    334     // Collect and construct the arguments for the kernel().
    335     // Note that we load any loop-invariant arguments before entering the Loop.
    336     llvm::Function::arg_iterator Args = F->arg_begin();
    337 
    338     llvm::Type *InTy = NULL;
    339     llvm::Value *InBasePtr = NULL;
    340     if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) {
    341       InTy = Args->getType();
    342       InStep = getStepValue(&DL, InTy, Arg_instep);
    343       InStep->setName("instep");
    344       InBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 0));
    345       Args++;
    346     }
    347 
    348     llvm::Type *OutTy = NULL;
    349     llvm::Value *OutBasePtr = NULL;
    350     if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
    351       OutTy = Args->getType();
    352       OutStep = getStepValue(&DL, OutTy, Arg_outstep);
    353       OutStep->setName("outstep");
    354       OutBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 1));
    355       Args++;
    356     }
    357 
    358     llvm::Value *UsrData = NULL;
    359     if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) {
    360       llvm::Type *UsrDataTy = Args->getType();
    361       UsrData = Builder.CreatePointerCast(Builder.CreateLoad(
    362           Builder.CreateStructGEP(Arg_p, 2)), UsrDataTy);
    363       UsrData->setName("UsrData");
    364       Args++;
    365     }
    366 
    367     if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
    368       Args++;
    369     }
    370 
    371     llvm::Value *Y = NULL;
    372     if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
    373       Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
    374       Args++;
    375     }
    376 
    377     bccAssert(Args == F->arg_end());
    378 
    379     llvm::PHINode *IV;
    380     createLoop(Builder, Arg_x1, Arg_x2, &IV);
    381 
    382     // Populate the actual call to kernel().
    383     llvm::SmallVector<llvm::Value*, 8> RootArgs;
    384 
    385     llvm::Value *InPtr = NULL;
    386     llvm::Value *OutPtr = NULL;
    387 
    388     // Calculate the current input and output pointers
    389     //
    390     // We always calculate the input/output pointers with a GEP operating on i8
    391     // values and only cast at the very end to OutTy. This is because the step
    392     // between two values is given in bytes.
    393     //
    394     // TODO: We could further optimize the output by using a GEP operation of
    395     // type 'OutTy' in cases where the element type of the allocation allows.
    396     if (OutBasePtr) {
    397       llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
    398       OutOffset = Builder.CreateMul(OutOffset, OutStep);
    399       OutPtr = Builder.CreateGEP(OutBasePtr, OutOffset);
    400       OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
    401     }
    402     if (InBasePtr) {
    403       llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
    404       InOffset = Builder.CreateMul(InOffset, InStep);
    405       InPtr = Builder.CreateGEP(InBasePtr, InOffset);
    406       InPtr = Builder.CreatePointerCast(InPtr, InTy);
    407     }
    408 
    409     if (InPtr) {
    410       RootArgs.push_back(InPtr);
    411     }
    412 
    413     if (OutPtr) {
    414       RootArgs.push_back(OutPtr);
    415     }
    416 
    417     if (UsrData) {
    418       RootArgs.push_back(UsrData);
    419     }
    420 
    421     llvm::Value *X = IV;
    422     if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
    423       RootArgs.push_back(X);
    424     }
    425 
    426     if (Y) {
    427       RootArgs.push_back(Y);
    428     }
    429 
    430     Builder.CreateCall(F, RootArgs);
    431 
    432     return true;
    433   }
    434 
    435   /* Expand a pass-by-value kernel.
    436    */
    437   bool ExpandKernel(llvm::Function *F, uint32_t Signature) {
    438     bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature));
    439     ALOGV("Expanding kernel Function %s", F->getName().str().c_str());
    440 
    441     // TODO: Refactor this to share functionality with ExpandFunction.
    442     llvm::DataLayout DL(M);
    443 
    444     llvm::Function *ExpandedFunc = createEmptyExpandedFunction(F->getName());
    445 
    446     // Create and name the actual arguments to this expanded function.
    447     llvm::SmallVector<llvm::Argument*, 8> ArgVec;
    448     for (llvm::Function::arg_iterator B = ExpandedFunc->arg_begin(),
    449                                       E = ExpandedFunc->arg_end();
    450          B != E;
    451          ++B) {
    452       ArgVec.push_back(B);
    453     }
    454 
    455     if (ArgVec.size() != 5) {
    456       ALOGE("Incorrect number of arguments to function: %zu",
    457             ArgVec.size());
    458       return false;
    459     }
    460     llvm::Value *Arg_p = ArgVec[0];
    461     llvm::Value *Arg_x1 = ArgVec[1];
    462     llvm::Value *Arg_x2 = ArgVec[2];
    463     llvm::Value *Arg_instep = ArgVec[3];
    464     llvm::Value *Arg_outstep = ArgVec[4];
    465 
    466     llvm::Value *InStep = NULL;
    467     llvm::Value *OutStep = NULL;
    468 
    469     // Construct the actual function body.
    470     llvm::IRBuilder<> Builder(ExpandedFunc->getEntryBlock().begin());
    471 
    472     // Create TBAA meta-data.
    473     llvm::MDNode *TBAARenderScript, *TBAAAllocation, *TBAAPointer;
    474 
    475     llvm::MDBuilder MDHelper(*C);
    476     TBAARenderScript = MDHelper.createTBAARoot("RenderScript TBAA");
    477     TBAAAllocation = MDHelper.createTBAANode("allocation", TBAARenderScript);
    478     TBAAPointer = MDHelper.createTBAANode("pointer", TBAARenderScript);
    479 
    480     // Collect and construct the arguments for the kernel().
    481     // Note that we load any loop-invariant arguments before entering the Loop.
    482     llvm::Function::arg_iterator Args = F->arg_begin();
    483 
    484     llvm::Type *OutTy = NULL;
    485     bool PassOutByReference = false;
    486     llvm::LoadInst *OutBasePtr = NULL;
    487     if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
    488       llvm::Type *OutBaseTy = F->getReturnType();
    489       if (OutBaseTy->isVoidTy()) {
    490         PassOutByReference = true;
    491         OutTy = Args->getType();
    492         Args++;
    493       } else {
    494         OutTy = OutBaseTy->getPointerTo();
    495         // We don't increment Args, since we are using the actual return type.
    496       }
    497       OutStep = getStepValue(&DL, OutTy, Arg_outstep);
    498       OutStep->setName("outstep");
    499       OutBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 1));
    500       OutBasePtr->setMetadata("tbaa", TBAAPointer);
    501     }
    502 
    503     llvm::Type *InBaseTy = NULL;
    504     llvm::Type *InTy = NULL;
    505     llvm::LoadInst *InBasePtr = NULL;
    506     if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) {
    507       InBaseTy = Args->getType();
    508       InTy =InBaseTy->getPointerTo();
    509       InStep = getStepValue(&DL, InTy, Arg_instep);
    510       InStep->setName("instep");
    511       InBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 0));
    512       InBasePtr->setMetadata("tbaa", TBAAPointer);
    513       Args++;
    514     }
    515 
    516     // No usrData parameter on kernels.
    517     bccAssert(
    518         !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature));
    519 
    520     if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
    521       Args++;
    522     }
    523 
    524     llvm::Value *Y = NULL;
    525     if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
    526       Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
    527       Args++;
    528     }
    529 
    530     bccAssert(Args == F->arg_end());
    531 
    532     llvm::PHINode *IV;
    533     createLoop(Builder, Arg_x1, Arg_x2, &IV);
    534 
    535     // Populate the actual call to kernel().
    536     llvm::SmallVector<llvm::Value*, 8> RootArgs;
    537 
    538     llvm::Value *InPtr = NULL;
    539     llvm::Value *OutPtr = NULL;
    540 
    541     // Calculate the current input and output pointers
    542     //
    543     // We always calculate the input/output pointers with a GEP operating on i8
    544     // values and only cast at the very end to OutTy. This is because the step
    545     // between two values is given in bytes.
    546     //
    547     // TODO: We could further optimize the output by using a GEP operation of
    548     // type 'OutTy' in cases where the element type of the allocation allows.
    549     if (OutBasePtr) {
    550       llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
    551       OutOffset = Builder.CreateMul(OutOffset, OutStep);
    552       OutPtr = Builder.CreateGEP(OutBasePtr, OutOffset);
    553       OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
    554     }
    555     if (InBasePtr) {
    556       llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
    557       InOffset = Builder.CreateMul(InOffset, InStep);
    558       InPtr = Builder.CreateGEP(InBasePtr, InOffset);
    559       InPtr = Builder.CreatePointerCast(InPtr, InTy);
    560     }
    561 
    562     if (PassOutByReference) {
    563       RootArgs.push_back(OutPtr);
    564     }
    565 
    566     if (InPtr) {
    567       llvm::LoadInst *In = Builder.CreateLoad(InPtr, "In");
    568       In->setMetadata("tbaa", TBAAAllocation);
    569       RootArgs.push_back(In);
    570     }
    571 
    572     llvm::Value *X = IV;
    573     if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
    574       RootArgs.push_back(X);
    575     }
    576 
    577     if (Y) {
    578       RootArgs.push_back(Y);
    579     }
    580 
    581     llvm::Value *RetVal = Builder.CreateCall(F, RootArgs);
    582 
    583     if (OutPtr && !PassOutByReference) {
    584       llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr);
    585       Store->setMetadata("tbaa", TBAAAllocation);
    586     }
    587 
    588     return true;
    589   }
    590 
    591   /// @brief Checks if pointers to allocation internals are exposed
    592   ///
    593   /// This function verifies if through the parameters passed to the kernel
    594   /// or through calls to the runtime library the script gains access to
    595   /// pointers pointing to data within a RenderScript Allocation.
    596   /// If we know we control all loads from and stores to data within
    597   /// RenderScript allocations and if we know the run-time internal accesses
    598   /// are all annotated with RenderScript TBAA metadata, only then we
    599   /// can safely use TBAA to distinguish between generic and from-allocation
    600   /// pointers.
    601   bool allocPointersExposed(llvm::Module &M) {
    602     // Old style kernel function can expose pointers to elements within
    603     // allocations.
    604     // TODO: Extend analysis to allow simple cases of old-style kernels.
    605     for (RSInfo::ExportForeachFuncListTy::const_iterator
    606              func_iter = mFuncs.begin(), func_end = mFuncs.end();
    607          func_iter != func_end; func_iter++) {
    608       const char *Name = func_iter->first;
    609       uint32_t Signature = func_iter->second;
    610       if (M.getFunction(Name) &&
    611           !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) {
    612         return true;
    613       }
    614     }
    615 
    616     // Check for library functions that expose a pointer to an Allocation or
    617     // that are not yet annotated with RenderScript-specific tbaa information.
    618     static std::vector<std::string> Funcs;
    619 
    620     // rsGetElementAt(...)
    621     Funcs.push_back("_Z14rsGetElementAt13rs_allocationj");
    622     Funcs.push_back("_Z14rsGetElementAt13rs_allocationjj");
    623     Funcs.push_back("_Z14rsGetElementAt13rs_allocationjjj");
    624     // rsSetElementAt()
    625     Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvj");
    626     Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjj");
    627     Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjjj");
    628     // rsGetElementAtYuv_uchar_Y()
    629     Funcs.push_back("_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj");
    630     // rsGetElementAtYuv_uchar_U()
    631     Funcs.push_back("_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj");
    632     // rsGetElementAtYuv_uchar_V()
    633     Funcs.push_back("_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj");
    634 
    635     for (std::vector<std::string>::iterator FI = Funcs.begin(),
    636                                             FE = Funcs.end();
    637          FI != FE; ++FI) {
    638       llvm::Function *F = M.getFunction(*FI);
    639 
    640       if (!F) {
    641         ALOGE("Missing run-time function '%s'", FI->c_str());
    642         return true;
    643       }
    644 
    645       if (F->getNumUses() > 0) {
    646         return true;
    647       }
    648     }
    649 
    650     return false;
    651   }
    652 
    653   /// @brief Connect RenderScript TBAA metadata to C/C++ metadata
    654   ///
    655   /// The TBAA metadata used to annotate loads/stores from RenderScript
    656   /// Allocations is generated in a separate TBAA tree with a "RenderScript TBAA"
    657   /// root node. LLVM does assume may-alias for all nodes in unrelated alias
    658   /// analysis trees. This function makes the RenderScript TBAA a subtree of the
    659   /// normal C/C++ TBAA tree aside of normal C/C++ types. With the connected trees
    660   /// every access to an Allocation is resolved to must-alias if compared to
    661   /// a normal C/C++ access.
    662   void connectRenderScriptTBAAMetadata(llvm::Module &M) {
    663     llvm::MDBuilder MDHelper(*C);
    664     llvm::MDNode *TBAARenderScript = MDHelper.createTBAARoot("RenderScript TBAA");
    665 
    666     llvm::MDNode *TBAARoot = MDHelper.createTBAARoot("Simple C/C++ TBAA");
    667     llvm::MDNode *TBAAMergedRS = MDHelper.createTBAANode("RenderScript", TBAARoot);
    668 
    669     TBAARenderScript->replaceAllUsesWith(TBAAMergedRS);
    670   }
    671 
    672   virtual bool runOnModule(llvm::Module &M) {
    673     bool Changed = false;
    674     this->M = &M;
    675     C = &M.getContext();
    676 
    677     bool AllocsExposed = allocPointersExposed(M);
    678 
    679     for (RSInfo::ExportForeachFuncListTy::const_iterator
    680              func_iter = mFuncs.begin(), func_end = mFuncs.end();
    681          func_iter != func_end; func_iter++) {
    682       const char *name = func_iter->first;
    683       uint32_t signature = func_iter->second;
    684       llvm::Function *kernel = M.getFunction(name);
    685       if (kernel) {
    686         if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) {
    687           Changed |= ExpandKernel(kernel, signature);
    688           kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
    689         } else if (kernel->getReturnType()->isVoidTy()) {
    690           Changed |= ExpandFunction(kernel, signature);
    691           kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
    692         } else {
    693           // There are some graphics root functions that are not
    694           // expanded, but that will be called directly. For those
    695           // functions, we can not set the linkage to internal.
    696         }
    697       }
    698     }
    699 
    700     if (!AllocsExposed) {
    701       connectRenderScriptTBAAMetadata(M);
    702     }
    703 
    704     return Changed;
    705   }
    706 
    707   virtual const char *getPassName() const {
    708     return "ForEach-able Function Expansion";
    709   }
    710 
    711 }; // end RSForEachExpandPass
    712 
    713 } // end anonymous namespace
    714 
    715 char RSForEachExpandPass::ID = 0;
    716 
    717 namespace bcc {
    718 
    719 llvm::ModulePass *
    720 createRSForEachExpandPass(const RSInfo::ExportForeachFuncListTy &pForeachFuncs,
    721                           bool pEnableStepOpt){
    722   return new RSForEachExpandPass(pForeachFuncs, pEnableStepOpt);
    723 }
    724 
    725 } // end namespace bcc
    726