Home | History | Annotate | Download | only in Renderscript
      1 /*
      2  * Copyright 2012, The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *     http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "bcc/Assert.h"
     18 #include "bcc/Renderscript/RSTransforms.h"
     19 
     20 #include <cstdlib>
     21 
     22 #include <llvm/IR/DerivedTypes.h>
     23 #include <llvm/IR/Function.h>
     24 #include <llvm/IR/Instructions.h>
     25 #include <llvm/IR/IRBuilder.h>
     26 #include <llvm/IR/MDBuilder.h>
     27 #include <llvm/IR/Module.h>
     28 #include <llvm/Pass.h>
     29 #include <llvm/Support/raw_ostream.h>
     30 #include <llvm/IR/DataLayout.h>
     31 #include <llvm/IR/Function.h>
     32 #include <llvm/IR/Type.h>
     33 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
     34 
     35 #include "bcc/Config/Config.h"
     36 #include "bcc/Support/Log.h"
     37 
     38 #include "bcinfo/MetadataExtractor.h"
     39 
     40 #define NUM_EXPANDED_FUNCTION_PARAMS 5
     41 
     42 using namespace bcc;
     43 
     44 namespace {
     45 
     46 static const bool gEnableRsTbaa = true;
     47 
     48 /* RSForEachExpandPass - This pass operates on functions that are able to be
     49  * called via rsForEach() or "foreach_<NAME>". We create an inner loop for the
     50  * ForEach-able function to be invoked over the appropriate data cells of the
     51  * input/output allocations (adjusting other relevant parameters as we go). We
     52  * support doing this for any ForEach-able compute kernels. The new function
     53  * name is the original function name followed by ".expand". Note that we
     54  * still generate code for the original function.
     55  */
     56 class RSForEachExpandPass : public llvm::ModulePass {
     57 private:
     58   static char ID;
     59 
     60   llvm::Module *Module;
     61   llvm::LLVMContext *Context;
     62 
     63   /*
     64    * Pointer to LLVM type information for the ForEachStubType and the function
     65    * signature for expanded kernels.  These must be re-calculated for each
     66    * module the pass is run on.
     67    */
     68   llvm::StructType   *ForEachStubType;
     69   llvm::FunctionType *ExpandedFunctionType;
     70 
     71   uint32_t mExportForEachCount;
     72   const char **mExportForEachNameList;
     73   const uint32_t *mExportForEachSignatureList;
     74 
     75   // Turns on optimization of allocation stride values.
     76   bool mEnableStepOpt;
     77 
     78   uint32_t getRootSignature(llvm::Function *Function) {
     79     const llvm::NamedMDNode *ExportForEachMetadata =
     80         Module->getNamedMetadata("#rs_export_foreach");
     81 
     82     if (!ExportForEachMetadata) {
     83       llvm::SmallVector<llvm::Type*, 8> RootArgTys;
     84       for (llvm::Function::arg_iterator B = Function->arg_begin(),
     85                                         E = Function->arg_end();
     86            B != E;
     87            ++B) {
     88         RootArgTys.push_back(B->getType());
     89       }
     90 
     91       // For pre-ICS bitcode, we may not have signature information. In that
     92       // case, we use the size of the RootArgTys to select the number of
     93       // arguments.
     94       return (1 << RootArgTys.size()) - 1;
     95     }
     96 
     97     if (ExportForEachMetadata->getNumOperands() == 0) {
     98       return 0;
     99     }
    100 
    101     bccAssert(ExportForEachMetadata->getNumOperands() > 0);
    102 
    103     // We only handle the case for legacy root() functions here, so this is
    104     // hard-coded to look at only the first such function.
    105     llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
    106     if (SigNode != NULL && SigNode->getNumOperands() == 1) {
    107       llvm::Value *SigVal = SigNode->getOperand(0);
    108       if (SigVal->getValueID() == llvm::Value::MDStringVal) {
    109         llvm::StringRef SigString =
    110             static_cast<llvm::MDString*>(SigVal)->getString();
    111         uint32_t Signature = 0;
    112         if (SigString.getAsInteger(10, Signature)) {
    113           ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
    114           return 0;
    115         }
    116         return Signature;
    117       }
    118     }
    119 
    120     return 0;
    121   }
    122 
    123   // Get the actual value we should use to step through an allocation.
    124   //
    125   // Normally the value we use to step through an allocation is given to us by
    126   // the driver. However, for certain primitive data types, we can derive an
    127   // integer constant for the step value. We use this integer constant whenever
    128   // possible to allow further compiler optimizations to take place.
    129   //
    130   // DL - Target Data size/layout information.
    131   // T - Type of allocation (should be a pointer).
    132   // OrigStep - Original step increment (root.expand() input from driver).
    133   llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType,
    134                             llvm::Value *OrigStep) {
    135     bccAssert(DL);
    136     bccAssert(AllocType);
    137     bccAssert(OrigStep);
    138     llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
    139     llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
    140     if (mEnableStepOpt && AllocType != VoidPtrTy && PT) {
    141       llvm::Type *ET = PT->getElementType();
    142       uint64_t ETSize = DL->getTypeAllocSize(ET);
    143       llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
    144       return llvm::ConstantInt::get(Int32Ty, ETSize);
    145     } else {
    146       return OrigStep;
    147     }
    148   }
    149 
    150   /// @brief Builds the types required by the pass for the given context.
    151   void buildTypes(void) {
    152     // Create the RsForEachStubParam struct.
    153 
    154     llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
    155     llvm::Type *Int32Ty   = llvm::Type::getInt32Ty(*Context);
    156     /* Defined in frameworks/base/libs/rs/rs_hal.h:
    157      *
    158      * struct RsForEachStubParamStruct {
    159      *   const void *in;
    160      *   void *out;
    161      *   const void *usr;
    162      *   uint32_t usr_len;
    163      *   uint32_t x;
    164      *   uint32_t y;
    165      *   uint32_t z;
    166      *   uint32_t lod;
    167      *   enum RsAllocationCubemapFace face;
    168      *   uint32_t ar[16];
    169      *   const void **ins;
    170      *   uint32_t *eStrideIns;
    171      * };
    172      */
    173     llvm::SmallVector<llvm::Type*, 16> StructTypes;
    174     StructTypes.push_back(VoidPtrTy);  // const void *in
    175     StructTypes.push_back(VoidPtrTy);  // void *out
    176     StructTypes.push_back(VoidPtrTy);  // const void *usr
    177     StructTypes.push_back(Int32Ty);    // uint32_t usr_len
    178     StructTypes.push_back(Int32Ty);    // uint32_t x
    179     StructTypes.push_back(Int32Ty);    // uint32_t y
    180     StructTypes.push_back(Int32Ty);    // uint32_t z
    181     StructTypes.push_back(Int32Ty);    // uint32_t lod
    182     StructTypes.push_back(Int32Ty);    // enum RsAllocationCubemapFace
    183     StructTypes.push_back(llvm::ArrayType::get(Int32Ty, 16)); // uint32_t ar[16]
    184 
    185     StructTypes.push_back(llvm::PointerType::getUnqual(VoidPtrTy)); // const void **ins
    186     StructTypes.push_back(Int32Ty->getPointerTo()); // uint32_t *eStrideIns
    187 
    188     ForEachStubType =
    189       llvm::StructType::create(StructTypes, "RsForEachStubParamStruct");
    190 
    191     // Create the function type for expanded kernels.
    192 
    193     llvm::Type *ForEachStubPtrTy = ForEachStubType->getPointerTo();
    194 
    195     llvm::SmallVector<llvm::Type*, 8> ParamTypes;
    196     ParamTypes.push_back(ForEachStubPtrTy); // const RsForEachStubParamStruct *p
    197     ParamTypes.push_back(Int32Ty);          // uint32_t x1
    198     ParamTypes.push_back(Int32Ty);          // uint32_t x2
    199     ParamTypes.push_back(Int32Ty);          // uint32_t instep
    200     ParamTypes.push_back(Int32Ty);          // uint32_t outstep
    201 
    202     ExpandedFunctionType = llvm::FunctionType::get(llvm::Type::getVoidTy(*Context),
    203                                               ParamTypes,
    204                                               false);
    205   }
    206 
    207   /// @brief Create skeleton of the expanded function.
    208   ///
    209   /// This creates a function with the following signature:
    210   ///
    211   ///   void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
    212   ///         uint32_t instep, uint32_t outstep)
    213   ///
    214   llvm::Function *createEmptyExpandedFunction(llvm::StringRef OldName) {
    215     llvm::Function *ExpandedFunction =
    216       llvm::Function::Create(ExpandedFunctionType,
    217                              llvm::GlobalValue::ExternalLinkage,
    218                              OldName + ".expand", Module);
    219 
    220     bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
    221 
    222     llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin();
    223 
    224     (AI++)->setName("p");
    225     (AI++)->setName("x1");
    226     (AI++)->setName("x2");
    227     (AI++)->setName("arg_instep");
    228     (AI++)->setName("arg_outstep");
    229 
    230     llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
    231                                                        ExpandedFunction);
    232     llvm::IRBuilder<> Builder(Begin);
    233     Builder.CreateRetVoid();
    234 
    235     return ExpandedFunction;
    236   }
    237 
    238   /// @brief Create an empty loop
    239   ///
    240   /// Create a loop of the form:
    241   ///
    242   /// for (i = LowerBound; i < UpperBound; i++)
    243   ///   ;
    244   ///
    245   /// After the loop has been created, the builder is set such that
    246   /// instructions can be added to the loop body.
    247   ///
    248   /// @param Builder The builder to use to build this loop. The current
    249   ///                position of the builder is the position the loop
    250   ///                will be inserted.
    251   /// @param LowerBound The first value of the loop iterator
    252   /// @param UpperBound The maximal value of the loop iterator
    253   /// @param LoopIV A reference that will be set to the loop iterator.
    254   /// @return The BasicBlock that will be executed after the loop.
    255   llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder,
    256                                llvm::Value *LowerBound,
    257                                llvm::Value *UpperBound,
    258                                llvm::PHINode **LoopIV) {
    259     assert(LowerBound->getType() == UpperBound->getType());
    260 
    261     llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB;
    262     llvm::Value *Cond, *IVNext;
    263     llvm::PHINode *IV;
    264 
    265     CondBB = Builder.GetInsertBlock();
    266     AfterBB = llvm::SplitBlock(CondBB, Builder.GetInsertPoint(), this);
    267     HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent());
    268 
    269     // if (LowerBound < Upperbound)
    270     //   goto LoopHeader
    271     // else
    272     //   goto AfterBB
    273     CondBB->getTerminator()->eraseFromParent();
    274     Builder.SetInsertPoint(CondBB);
    275     Cond = Builder.CreateICmpULT(LowerBound, UpperBound);
    276     Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
    277 
    278     // iv = PHI [CondBB -> LowerBound], [LoopHeader -> NextIV ]
    279     // iv.next = iv + 1
    280     // if (iv.next < Upperbound)
    281     //   goto LoopHeader
    282     // else
    283     //   goto AfterBB
    284     Builder.SetInsertPoint(HeaderBB);
    285     IV = Builder.CreatePHI(LowerBound->getType(), 2, "X");
    286     IV->addIncoming(LowerBound, CondBB);
    287     IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1));
    288     IV->addIncoming(IVNext, HeaderBB);
    289     Cond = Builder.CreateICmpULT(IVNext, UpperBound);
    290     Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
    291     AfterBB->setName("Exit");
    292     Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
    293     *LoopIV = IV;
    294     return AfterBB;
    295   }
    296 
    297 public:
    298   RSForEachExpandPass(bool pEnableStepOpt)
    299       : ModulePass(ID), Module(NULL), Context(NULL),
    300         mEnableStepOpt(pEnableStepOpt) {
    301 
    302   }
    303 
    304   /* Performs the actual optimization on a selected function. On success, the
    305    * Module will contain a new function of the name "<NAME>.expand" that
    306    * invokes <NAME>() in a loop with the appropriate parameters.
    307    */
    308   bool ExpandFunction(llvm::Function *Function, uint32_t Signature) {
    309     ALOGV("Expanding ForEach-able Function %s",
    310           Function->getName().str().c_str());
    311 
    312     if (!Signature) {
    313       Signature = getRootSignature(Function);
    314       if (!Signature) {
    315         // We couldn't determine how to expand this function based on its
    316         // function signature.
    317         return false;
    318       }
    319     }
    320 
    321     llvm::DataLayout DL(Module);
    322 
    323     llvm::Function *ExpandedFunction =
    324       createEmptyExpandedFunction(Function->getName());
    325 
    326     bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
    327 
    328     /*
    329      * Extract the expanded function's parameters.  It is guaranteed by
    330      * createEmptyExpandedFunction that there will be five parameters.
    331      */
    332     llvm::Function::arg_iterator ExpandedFunctionArgIter =
    333       ExpandedFunction->arg_begin();
    334 
    335     llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
    336     llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
    337     llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
    338     llvm::Value *Arg_instep  = &*(ExpandedFunctionArgIter++);
    339     llvm::Value *Arg_outstep = &*ExpandedFunctionArgIter;
    340 
    341     llvm::Value *InStep  = NULL;
    342     llvm::Value *OutStep = NULL;
    343 
    344     // Construct the actual function body.
    345     llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
    346 
    347     // Collect and construct the arguments for the kernel().
    348     // Note that we load any loop-invariant arguments before entering the Loop.
    349     llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin();
    350 
    351     llvm::Type *InTy = NULL;
    352     llvm::Value *InBasePtr = NULL;
    353     if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) {
    354       InTy = (FunctionArgIter++)->getType();
    355       InStep = getStepValue(&DL, InTy, Arg_instep);
    356       InStep->setName("instep");
    357       InBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 0));
    358     }
    359 
    360     llvm::Type *OutTy = NULL;
    361     llvm::Value *OutBasePtr = NULL;
    362     if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
    363       OutTy = (FunctionArgIter++)->getType();
    364       OutStep = getStepValue(&DL, OutTy, Arg_outstep);
    365       OutStep->setName("outstep");
    366       OutBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 1));
    367     }
    368 
    369     llvm::Value *UsrData = NULL;
    370     if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) {
    371       llvm::Type *UsrDataTy = (FunctionArgIter++)->getType();
    372       UsrData = Builder.CreatePointerCast(Builder.CreateLoad(
    373           Builder.CreateStructGEP(Arg_p, 2)), UsrDataTy);
    374       UsrData->setName("UsrData");
    375     }
    376 
    377     if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
    378       FunctionArgIter++;
    379     }
    380 
    381     llvm::Value *Y = NULL;
    382     if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
    383       Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
    384       FunctionArgIter++;
    385     }
    386 
    387     bccAssert(FunctionArgIter == Function->arg_end());
    388 
    389     llvm::PHINode *IV;
    390     createLoop(Builder, Arg_x1, Arg_x2, &IV);
    391 
    392     // Populate the actual call to kernel().
    393     llvm::SmallVector<llvm::Value*, 8> RootArgs;
    394 
    395     llvm::Value *InPtr  = NULL;
    396     llvm::Value *OutPtr = NULL;
    397 
    398     // Calculate the current input and output pointers
    399     //
    400     // We always calculate the input/output pointers with a GEP operating on i8
    401     // values and only cast at the very end to OutTy. This is because the step
    402     // between two values is given in bytes.
    403     //
    404     // TODO: We could further optimize the output by using a GEP operation of
    405     // type 'OutTy' in cases where the element type of the allocation allows.
    406     if (OutBasePtr) {
    407       llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
    408       OutOffset = Builder.CreateMul(OutOffset, OutStep);
    409       OutPtr = Builder.CreateGEP(OutBasePtr, OutOffset);
    410       OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
    411     }
    412 
    413     if (InBasePtr) {
    414       llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
    415       InOffset = Builder.CreateMul(InOffset, InStep);
    416       InPtr = Builder.CreateGEP(InBasePtr, InOffset);
    417       InPtr = Builder.CreatePointerCast(InPtr, InTy);
    418     }
    419 
    420     if (InPtr) {
    421       RootArgs.push_back(InPtr);
    422     }
    423 
    424     if (OutPtr) {
    425       RootArgs.push_back(OutPtr);
    426     }
    427 
    428     if (UsrData) {
    429       RootArgs.push_back(UsrData);
    430     }
    431 
    432     llvm::Value *X = IV;
    433     if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
    434       RootArgs.push_back(X);
    435     }
    436 
    437     if (Y) {
    438       RootArgs.push_back(Y);
    439     }
    440 
    441     Builder.CreateCall(Function, RootArgs);
    442 
    443     return true;
    444   }
    445 
    446   /* Expand a pass-by-value kernel.
    447    */
    448   bool ExpandKernel(llvm::Function *Function, uint32_t Signature) {
    449     bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature));
    450     ALOGV("Expanding kernel Function %s", Function->getName().str().c_str());
    451 
    452     // TODO: Refactor this to share functionality with ExpandFunction.
    453     llvm::DataLayout DL(Module);
    454 
    455     llvm::Function *ExpandedFunction =
    456       createEmptyExpandedFunction(Function->getName());
    457 
    458     /*
    459      * Extract the expanded function's parameters.  It is guaranteed by
    460      * createEmptyExpandedFunction that there will be five parameters.
    461      */
    462 
    463     bccAssert(ExpandedFunction->arg_size() == NUM_EXPANDED_FUNCTION_PARAMS);
    464 
    465     llvm::Function::arg_iterator ExpandedFunctionArgIter =
    466       ExpandedFunction->arg_begin();
    467 
    468     llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
    469     llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
    470     llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
    471     llvm::Value *Arg_instep  = &*(ExpandedFunctionArgIter++);
    472     llvm::Value *Arg_outstep = &*ExpandedFunctionArgIter;
    473 
    474     // Construct the actual function body.
    475     llvm::IRBuilder<> Builder(ExpandedFunction->getEntryBlock().begin());
    476 
    477     // Create TBAA meta-data.
    478     llvm::MDNode *TBAARenderScript, *TBAAAllocation, *TBAAPointer;
    479     llvm::MDBuilder MDHelper(*Context);
    480 
    481     TBAARenderScript = MDHelper.createTBAARoot("RenderScript TBAA");
    482     TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation", TBAARenderScript);
    483     TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation, TBAAAllocation, 0);
    484     TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer", TBAARenderScript);
    485     TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
    486 
    487     /*
    488      * Collect and construct the arguments for the kernel().
    489      *
    490      * Note that we load any loop-invariant arguments before entering the Loop.
    491      */
    492     size_t NumInputs = Function->arg_size();
    493 
    494     llvm::Value *Y = NULL;
    495     if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
    496       Y = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 5), "Y");
    497       --NumInputs;
    498     }
    499 
    500     if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
    501       --NumInputs;
    502     }
    503 
    504     // No usrData parameter on kernels.
    505     bccAssert(
    506         !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature));
    507 
    508     llvm::Function::arg_iterator ArgIter = Function->arg_begin();
    509 
    510     // Check the return type
    511     llvm::Type     *OutTy      = NULL;
    512     llvm::Value    *OutStep    = NULL;
    513     llvm::LoadInst *OutBasePtr = NULL;
    514 
    515     bool PassOutByReference = false;
    516 
    517     if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
    518       llvm::Type *OutBaseTy = Function->getReturnType();
    519 
    520       if (OutBaseTy->isVoidTy()) {
    521         PassOutByReference = true;
    522         OutTy = ArgIter->getType();
    523 
    524         ArgIter++;
    525         --NumInputs;
    526       } else {
    527         // We don't increment Args, since we are using the actual return type.
    528         OutTy = OutBaseTy->getPointerTo();
    529       }
    530 
    531       OutStep = getStepValue(&DL, OutTy, Arg_outstep);
    532       OutStep->setName("outstep");
    533       OutBasePtr = Builder.CreateLoad(Builder.CreateStructGEP(Arg_p, 1));
    534       if (gEnableRsTbaa) {
    535         OutBasePtr->setMetadata("tbaa", TBAAPointer);
    536       }
    537     }
    538 
    539     llvm::SmallVector<llvm::Type*,     8> InTypes;
    540     llvm::SmallVector<llvm::Value*,    8> InSteps;
    541     llvm::SmallVector<llvm::LoadInst*, 8> InBasePtrs;
    542     llvm::SmallVector<bool,            8> InIsStructPointer;
    543 
    544     if (NumInputs == 1) {
    545       llvm::Type *InType = ArgIter->getType();
    546 
    547       /*
    548        * AArch64 calling dictate that structs of sufficient size get passed by
    549        * poiter instead of passed by value.  This, combined with the fact that
    550        * we don't allow kernels to operate on pointer data means that if we see
    551        * a kernel with a pointer parameter we know that it is struct input that
    552        * has been promoted.  As such we don't need to convert its type to a
    553        * pointer.  Later we will need to know to avoid a load, so we save this
    554        * information in InIsStructPointer.
    555        */
    556       if (!InType->isPointerTy()) {
    557         InType = InType->getPointerTo();
    558         InIsStructPointer.push_back(false);
    559       } else {
    560         InIsStructPointer.push_back(true);
    561       }
    562 
    563       llvm::Value *InStep = getStepValue(&DL, InType, Arg_instep);
    564 
    565       InStep->setName("instep");
    566 
    567       llvm::Value    *Input     = Builder.CreateStructGEP(Arg_p, 0);
    568       llvm::LoadInst *InBasePtr = Builder.CreateLoad(Input, "input_base");
    569 
    570       if (gEnableRsTbaa) {
    571         InBasePtr->setMetadata("tbaa", TBAAPointer);
    572       }
    573 
    574       InTypes.push_back(InType);
    575       InSteps.push_back(InStep);
    576       InBasePtrs.push_back(InBasePtr);
    577 
    578     } else if (NumInputs > 1) {
    579       llvm::Value    *InsMember  = Builder.CreateStructGEP(Arg_p, 10);
    580       llvm::LoadInst *InsBasePtr = Builder.CreateLoad(InsMember,
    581                                                       "inputs_base");
    582 
    583       llvm::Value    *InStepsMember = Builder.CreateStructGEP(Arg_p, 11);
    584       llvm::LoadInst *InStepsBase   = Builder.CreateLoad(InStepsMember,
    585                                                          "insteps_base");
    586 
    587       for (size_t InputIndex = 0; InputIndex < NumInputs;
    588            ++InputIndex, ArgIter++) {
    589 
    590           llvm::Value *IndexVal = Builder.getInt32(InputIndex);
    591 
    592           llvm::Value    *InStepAddr = Builder.CreateGEP(InStepsBase, IndexVal);
    593           llvm::LoadInst *InStepArg  = Builder.CreateLoad(InStepAddr,
    594                                                           "instep_addr");
    595 
    596           llvm::Type *InType = ArgIter->getType();
    597 
    598           /*
    599          * AArch64 calling dictate that structs of sufficient size get passed by
    600          * poiter instead of passed by value.  This, combined with the fact that
    601          * we don't allow kernels to operate on pointer data means that if we
    602          * see a kernel with a pointer parameter we know that it is struct input
    603          * that has been promoted.  As such we don't need to convert its type to
    604          * a pointer.  Later we will need to know to avoid a load, so we save
    605          * this information in InIsStructPointer.
    606          */
    607           if (!InType->isPointerTy()) {
    608             InType = InType->getPointerTo();
    609             InIsStructPointer.push_back(false);
    610           } else {
    611             InIsStructPointer.push_back(true);
    612           }
    613 
    614           llvm::Value *InStep = getStepValue(&DL, InType, InStepArg);
    615 
    616           InStep->setName("instep");
    617 
    618           llvm::Value    *InputAddr = Builder.CreateGEP(InsBasePtr, IndexVal);
    619           llvm::LoadInst *InBasePtr = Builder.CreateLoad(InputAddr,
    620                                                          "input_base");
    621 
    622           if (gEnableRsTbaa) {
    623             InBasePtr->setMetadata("tbaa", TBAAPointer);
    624           }
    625 
    626           InTypes.push_back(InType);
    627           InSteps.push_back(InStep);
    628           InBasePtrs.push_back(InBasePtr);
    629       }
    630     }
    631 
    632     llvm::PHINode *IV;
    633     createLoop(Builder, Arg_x1, Arg_x2, &IV);
    634 
    635     // Populate the actual call to kernel().
    636     llvm::SmallVector<llvm::Value*, 8> RootArgs;
    637 
    638     // Calculate the current input and output pointers
    639     //
    640     //
    641     // We always calculate the input/output pointers with a GEP operating on i8
    642     // values combined with a multiplication and only cast at the very end to
    643     // OutTy.  This is to account for dynamic stepping sizes when the value
    644     // isn't apparent at compile time.  In the (very common) case when we know
    645     // the step size at compile time, due to haveing complete type information
    646     // this multiplication will optmized out and produces code equivalent to a
    647     // a GEP on a pointer of the correct type.
    648 
    649     // Output
    650 
    651     llvm::Value *OutPtr = NULL;
    652     if (OutBasePtr) {
    653       llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
    654 
    655       OutOffset = Builder.CreateMul(OutOffset, OutStep);
    656       OutPtr    = Builder.CreateGEP(OutBasePtr, OutOffset);
    657       OutPtr    = Builder.CreatePointerCast(OutPtr, OutTy);
    658 
    659       if (PassOutByReference) {
    660         RootArgs.push_back(OutPtr);
    661       }
    662     }
    663 
    664     // Inputs
    665 
    666     if (NumInputs > 0) {
    667       llvm::Value *Offset = Builder.CreateSub(IV, Arg_x1);
    668 
    669       for (size_t Index = 0; Index < NumInputs; ++Index) {
    670         llvm::Value *InOffset = Builder.CreateMul(Offset, InSteps[Index]);
    671         llvm::Value *InPtr    = Builder.CreateGEP(InBasePtrs[Index], InOffset);
    672 
    673         InPtr = Builder.CreatePointerCast(InPtr, InTypes[Index]);
    674 
    675         llvm::Value *Input;
    676 
    677         if (InIsStructPointer[Index]) {
    678           Input = InPtr;
    679 
    680         } else {
    681           llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input");
    682 
    683           if (gEnableRsTbaa) {
    684             InputLoad->setMetadata("tbaa", TBAAAllocation);
    685           }
    686 
    687           Input = InputLoad;
    688         }
    689 
    690         RootArgs.push_back(Input);
    691       }
    692     }
    693 
    694     llvm::Value *X = IV;
    695     if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
    696       RootArgs.push_back(X);
    697     }
    698 
    699     if (Y) {
    700       RootArgs.push_back(Y);
    701     }
    702 
    703     llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs);
    704 
    705     if (OutPtr && !PassOutByReference) {
    706       llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr);
    707       if (gEnableRsTbaa) {
    708         Store->setMetadata("tbaa", TBAAAllocation);
    709       }
    710     }
    711 
    712     return true;
    713   }
    714 
    715   /// @brief Checks if pointers to allocation internals are exposed
    716   ///
    717   /// This function verifies if through the parameters passed to the kernel
    718   /// or through calls to the runtime library the script gains access to
    719   /// pointers pointing to data within a RenderScript Allocation.
    720   /// If we know we control all loads from and stores to data within
    721   /// RenderScript allocations and if we know the run-time internal accesses
    722   /// are all annotated with RenderScript TBAA metadata, only then we
    723   /// can safely use TBAA to distinguish between generic and from-allocation
    724   /// pointers.
    725   bool allocPointersExposed(llvm::Module &Module) {
    726     // Old style kernel function can expose pointers to elements within
    727     // allocations.
    728     // TODO: Extend analysis to allow simple cases of old-style kernels.
    729     for (size_t i = 0; i < mExportForEachCount; ++i) {
    730       const char *Name = mExportForEachNameList[i];
    731       uint32_t Signature = mExportForEachSignatureList[i];
    732       if (Module.getFunction(Name) &&
    733           !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) {
    734         return true;
    735       }
    736     }
    737 
    738     // Check for library functions that expose a pointer to an Allocation or
    739     // that are not yet annotated with RenderScript-specific tbaa information.
    740     static std::vector<std::string> Funcs;
    741 
    742     // rsGetElementAt(...)
    743     Funcs.push_back("_Z14rsGetElementAt13rs_allocationj");
    744     Funcs.push_back("_Z14rsGetElementAt13rs_allocationjj");
    745     Funcs.push_back("_Z14rsGetElementAt13rs_allocationjjj");
    746     // rsSetElementAt()
    747     Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvj");
    748     Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjj");
    749     Funcs.push_back("_Z14rsSetElementAt13rs_allocationPvjjj");
    750     // rsGetElementAtYuv_uchar_Y()
    751     Funcs.push_back("_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj");
    752     // rsGetElementAtYuv_uchar_U()
    753     Funcs.push_back("_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj");
    754     // rsGetElementAtYuv_uchar_V()
    755     Funcs.push_back("_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj");
    756 
    757     for (std::vector<std::string>::iterator FI = Funcs.begin(),
    758                                             FE = Funcs.end();
    759          FI != FE; ++FI) {
    760       llvm::Function *Function = Module.getFunction(*FI);
    761 
    762       if (!Function) {
    763         ALOGE("Missing run-time function '%s'", FI->c_str());
    764         return true;
    765       }
    766 
    767       if (Function->getNumUses() > 0) {
    768         return true;
    769       }
    770     }
    771 
    772     return false;
    773   }
    774 
    775   /// @brief Connect RenderScript TBAA metadata to C/C++ metadata
    776   ///
    777   /// The TBAA metadata used to annotate loads/stores from RenderScript
    778   /// Allocations is generated in a separate TBAA tree with a "RenderScript TBAA"
    779   /// root node. LLVM does assume may-alias for all nodes in unrelated alias
    780   /// analysis trees. This function makes the RenderScript TBAA a subtree of the
    781   /// normal C/C++ TBAA tree aside of normal C/C++ types. With the connected trees
    782   /// every access to an Allocation is resolved to must-alias if compared to
    783   /// a normal C/C++ access.
    784   void connectRenderScriptTBAAMetadata(llvm::Module &Module) {
    785     llvm::MDBuilder MDHelper(*Context);
    786     llvm::MDNode *TBAARenderScript =
    787       MDHelper.createTBAARoot("RenderScript TBAA");
    788 
    789     llvm::MDNode *TBAARoot     = MDHelper.createTBAARoot("Simple C/C++ TBAA");
    790     llvm::MDNode *TBAAMergedRS = MDHelper.createTBAANode("RenderScript",
    791                                                          TBAARoot);
    792 
    793     TBAARenderScript->replaceAllUsesWith(TBAAMergedRS);
    794   }
    795 
    796   virtual bool runOnModule(llvm::Module &Module) {
    797     bool Changed  = false;
    798     this->Module  = &Module;
    799     this->Context = &Module.getContext();
    800 
    801     this->buildTypes();
    802 
    803     bcinfo::MetadataExtractor me(&Module);
    804     if (!me.extract()) {
    805       ALOGE("Could not extract metadata from module!");
    806       return false;
    807     }
    808     mExportForEachCount = me.getExportForEachSignatureCount();
    809     mExportForEachNameList = me.getExportForEachNameList();
    810     mExportForEachSignatureList = me.getExportForEachSignatureList();
    811 
    812     bool AllocsExposed = allocPointersExposed(Module);
    813 
    814     for (size_t i = 0; i < mExportForEachCount; ++i) {
    815       const char *name = mExportForEachNameList[i];
    816       uint32_t signature = mExportForEachSignatureList[i];
    817       llvm::Function *kernel = Module.getFunction(name);
    818       if (kernel) {
    819         if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) {
    820           Changed |= ExpandKernel(kernel, signature);
    821           kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
    822         } else if (kernel->getReturnType()->isVoidTy()) {
    823           Changed |= ExpandFunction(kernel, signature);
    824           kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
    825         } else {
    826           // There are some graphics root functions that are not
    827           // expanded, but that will be called directly. For those
    828           // functions, we can not set the linkage to internal.
    829         }
    830       }
    831     }
    832 
    833     if (gEnableRsTbaa && !AllocsExposed) {
    834       connectRenderScriptTBAAMetadata(Module);
    835     }
    836 
    837     return Changed;
    838   }
    839 
    840   virtual const char *getPassName() const {
    841     return "ForEach-able Function Expansion";
    842   }
    843 
    844 }; // end RSForEachExpandPass
    845 
    846 } // end anonymous namespace
    847 
    848 char RSForEachExpandPass::ID = 0;
    849 
    850 namespace bcc {
    851 
    852 llvm::ModulePass *
    853 createRSForEachExpandPass(bool pEnableStepOpt){
    854   return new RSForEachExpandPass(pEnableStepOpt);
    855 }
    856 
    857 } // end namespace bcc
    858