Home | History | Annotate | Download | only in cpu_ref
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef RSD_CPU_CORE_H
     18 #define RSD_CPU_CORE_H
     19 
     20 #include "rsd_cpu.h"
     21 #include "rsSignal.h"
     22 #include "rsContext.h"
     23 #include "rsCppUtils.h"
     24 #include "rsElement.h"
     25 #include "rsScriptC.h"
     26 #include "rsCpuCoreRuntime.h"
     27 
     28 namespace android {
     29 namespace renderscript {
     30 
     31 // Whether the CPU we're running on supports SIMD instructions
     32 extern bool gArchUseSIMD;
     33 
     34 // Function types found in RenderScript code
     35 typedef void (*ReduceAccumulatorFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint8_t *accum);
     36 typedef void (*ReduceCombinerFunc_t)(uint8_t *accum, const uint8_t *other);
     37 typedef void (*ReduceInitializerFunc_t)(uint8_t *accum);
     38 typedef void (*ReduceOutConverterFunc_t)(uint8_t *out, const uint8_t *accum);
     39 typedef void (*ForEachFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint32_t outStride);
     40 typedef void (*InvokeFunc_t)(void *params);
     41 typedef void (*InitOrDtorFunc_t)(void);
     42 typedef int  (*RootFunc_t)(void);
     43 
     44 struct ReduceDescription {
     45     ReduceAccumulatorFunc_t  accumFunc;  // expanded accumulator function
     46     ReduceInitializerFunc_t  initFunc;   // user initializer function
     47     ReduceCombinerFunc_t     combFunc;   // user combiner function
     48     ReduceOutConverterFunc_t outFunc;    // user outconverter function
     49     size_t                   accumSize;  // accumulator datum size, in bytes
     50 };
     51 
     52 // Internal driver callback used to execute a kernel
     53 typedef void (*WorkerCallback_t)(void *usr, uint32_t idx);
     54 
     55 class RsdCpuScriptImpl;
     56 class RsdCpuReferenceImpl;
     57 
     58 struct ScriptTLSStruct {
     59     android::renderscript::Context * mContext;
     60     const android::renderscript::Script * mScript;
     61     RsdCpuScriptImpl *mImpl;
     62 };
     63 
     64 // MTLaunchStruct passes information about a multithreaded kernel launch.
     65 struct MTLaunchStructCommon {
     66     RsdCpuReferenceImpl *rs;
     67     RsdCpuScriptImpl *script;
     68 
     69     uint32_t mSliceSize;
     70     volatile int mSliceNum;
     71     bool isThreadable;
     72 
     73     // Boundary information about the launch
     74     RsLaunchDimensions start;
     75     RsLaunchDimensions end;
     76     // Points to MTLaunchStructForEach::fep::dim or
     77     // MTLaunchStructReduce::redp::dim.
     78     RsLaunchDimensions *dimPtr;
     79 };
     80 
     81 struct MTLaunchStructForEach : public MTLaunchStructCommon {
     82     // Driver info structure
     83     RsExpandKernelDriverInfo fep;
     84 
     85     ForEachFunc_t kernel;
     86     const Allocation *ains[RS_KERNEL_INPUT_LIMIT];
     87     Allocation *aout[RS_KERNEL_INPUT_LIMIT];
     88 };
     89 
     90 struct MTLaunchStructReduce : public MTLaunchStructCommon {
     91     // Driver info structure
     92     RsExpandKernelDriverInfo redp;
     93 
     94     const Allocation *ains[RS_KERNEL_INPUT_LIMIT];
     95 
     96     ReduceAccumulatorFunc_t accumFunc;
     97     ReduceInitializerFunc_t initFunc;
     98     ReduceCombinerFunc_t combFunc;
     99     ReduceOutConverterFunc_t outFunc;
    100 
    101     size_t accumSize;  // accumulator datum size in bytes
    102 
    103     size_t accumStride;  // stride between accumulators in accumAlloc (below)
    104 
    105     // These fields are used for managing accumulator data items in a
    106     // multithreaded execution.
    107     //
    108     // Let the number of threads be N.
    109     // Let Outc be true iff there is an outconverter.
    110     //
    111     // accumAlloc is a pointer to a single allocation of (N - !Outc)
    112     // accumulators.  (If there is no outconverter, then the output
    113     // allocation acts as an accumulator.)  It is created at kernel
    114     // launch time.  Within that allocation, the distance between the
    115     // start of adjacent accumulators is accumStride bytes -- this
    116     // might be the same as accumSize, or it might be larger, if we
    117     // are attempting to avoid false sharing.
    118     //
    119     // accumCount is an atomic counter of how many accumulators have
    120     // been grabbed by threads.  It is initialized to zero at kernel
    121     // launch time.  See accumPtr for further description.
    122     //
    123     // accumPtr is pointer to an array of N pointers to accumulators.
    124     // The array is created at kernel launch time, and each element is
    125     // initialized to nullptr.  When a particular thread goes to work,
    126     // that thread obtains its accumulator from its entry in this
    127     // array.  If the entry is nullptr, that thread needs to obtain an
    128     // accumulator, and initialize its entry in the array accordingly.
    129     // It does so via atomic access (fetch-and-add) to accumCount.
    130     // - If Outc, then the fetched value is used as an index into
    131     //   accumAlloc.
    132     // - If !Outc, then
    133     //   - If the fetched value is zero, then this thread gets the
    134     //     output allocation for its accumulator.
    135     //   - If the fetched value is nonzero, then (fetched value - 1)
    136     //     is used as an index into accumAlloc.
    137     uint8_t *accumAlloc;
    138     uint8_t **accumPtr;
    139     uint32_t accumCount;
    140 
    141     // Logging control
    142     uint32_t logReduce;
    143 };
    144 
    145 class RsdCpuReferenceImpl : public RsdCpuReference {
    146 public:
    147     ~RsdCpuReferenceImpl() override;
    148     RsdCpuReferenceImpl(Context *);
    149 
    150     void lockMutex();
    151     void unlockMutex();
    152 
    153     bool init(uint32_t version_major, uint32_t version_minor, sym_lookup_t, script_lookup_t);
    154     void setPriority(int32_t priority) override;
    155     virtual void launchThreads(WorkerCallback_t cbk, void *data);
    156     static void * helperThreadProc(void *vrsc);
    157     RsdCpuScriptImpl * setTLS(RsdCpuScriptImpl *sc);
    158 
    159     Context * getContext() {return mRSC;}
    160     uint32_t getThreadCount() const {
    161         return mWorkers.mCount + 1;
    162     }
    163 
    164     // Launch foreach kernel
    165     void launchForEach(const Allocation **ains, uint32_t inLen, Allocation *aout,
    166                        const RsScriptCall *sc, MTLaunchStructForEach *mtls);
    167 
    168     // Launch a general reduce kernel
    169     void launchReduce(const Allocation ** ains, uint32_t inLen, Allocation *aout,
    170                       MTLaunchStructReduce *mtls);
    171 
    172     CpuScript * createScript(const ScriptC *s, char const *resName, char const *cacheDir,
    173                              uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags) override;
    174     CpuScript * createIntrinsic(const Script *s, RsScriptIntrinsicID iid, Element *e) override;
    175     void* createScriptGroup(const ScriptGroupBase *sg) override;
    176 
    177     const RsdCpuReference::CpuSymbol *symLookup(const char *);
    178 
    179     RsdCpuReference::CpuScript *lookupScript(const Script *s) {
    180         return mScriptLookupFn(mRSC, s);
    181     }
    182 
    183     void setSelectRTCallback(RSSelectRTCallback pSelectRTCallback) {
    184         mSelectRTCallback = pSelectRTCallback;
    185     }
    186     RSSelectRTCallback getSelectRTCallback() {
    187         return mSelectRTCallback;
    188     }
    189 
    190     virtual void setBccPluginName(const char *name) {
    191         mBccPluginName.setTo(name);
    192     }
    193     virtual const char *getBccPluginName() const {
    194         return mBccPluginName.string();
    195     }
    196     bool getInKernel() override { return mInKernel; }
    197 
    198     // Set to true if we should embed global variable information in the code.
    199     void setEmbedGlobalInfo(bool v) override {
    200         mEmbedGlobalInfo = v;
    201     }
    202 
    203     // Returns true if we should embed global variable information in the code.
    204     bool getEmbedGlobalInfo() const override {
    205         return mEmbedGlobalInfo;
    206     }
    207 
    208     // Set to true if we should skip constant (immutable) global variables when
    209     // potentially embedding information about globals.
    210     void setEmbedGlobalInfoSkipConstant(bool v) override {
    211         mEmbedGlobalInfoSkipConstant = v;
    212     }
    213 
    214     // Returns true if we should skip constant (immutable) global variables when
    215     // potentially embedding information about globals.
    216     bool getEmbedGlobalInfoSkipConstant() const override {
    217         return mEmbedGlobalInfoSkipConstant;
    218     }
    219 
    220 protected:
    221     Context *mRSC;
    222     uint32_t version_major;
    223     uint32_t version_minor;
    224     //bool mHasGraphics;
    225     bool mInKernel;  // Is a parallel kernel execution underway?
    226 
    227     struct Workers {
    228         volatile int mRunningCount;
    229         volatile int mLaunchCount;
    230         uint32_t mCount;
    231         pthread_t *mThreadId;
    232         pid_t *mNativeThreadId;
    233         Signal mCompleteSignal;
    234         Signal *mLaunchSignals;
    235         WorkerCallback_t mLaunchCallback;
    236         void *mLaunchData;
    237     };
    238     Workers mWorkers;
    239     bool mExit;
    240     sym_lookup_t mSymLookupFn;
    241     script_lookup_t mScriptLookupFn;
    242 
    243     ScriptTLSStruct mTlsStruct;
    244 
    245     RSSelectRTCallback mSelectRTCallback;
    246     String8 mBccPluginName;
    247 
    248     // Specifies whether we should embed global variable information in the
    249     // code via special RS variables that can be examined later by the driver.
    250     // Defaults to true.
    251     bool mEmbedGlobalInfo;
    252 
    253     // Specifies whether we should skip constant (immutable) global variables
    254     // when potentially embedding information about globals.
    255     // Defaults to true.
    256     bool mEmbedGlobalInfoSkipConstant;
    257 
    258     long mPageSize;
    259 
    260     // Launch a general reduce kernel
    261     void launchReduceSerial(const Allocation ** ains, uint32_t inLen, Allocation *aout,
    262                             MTLaunchStructReduce *mtls);
    263     void launchReduceParallel(const Allocation ** ains, uint32_t inLen, Allocation *aout,
    264                               MTLaunchStructReduce *mtls);
    265 };
    266 
    267 
    268 }
    269 }
    270 
    271 #endif
    272