Home | History | Annotate | Download | only in cpu_ref
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef RSD_CPU_CORE_H
     18 #define RSD_CPU_CORE_H
     19 
     20 #include "rsd_cpu.h"
     21 #include "rsSignal.h"
     22 #include "rsContext.h"
     23 #include "rsCppUtils.h"
     24 #include "rsElement.h"
     25 #include "rsScriptC.h"
     26 #include "rsCpuCoreRuntime.h"
     27 
     28 #include <string>
     29 
     30 namespace android {
     31 namespace renderscript {
     32 
     33 // Whether the CPU we're running on supports SIMD instructions
     34 extern bool gArchUseSIMD;
     35 
     36 // Function types found in RenderScript code
     37 typedef void (*ReduceAccumulatorFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint8_t *accum);
     38 typedef void (*ReduceCombinerFunc_t)(uint8_t *accum, const uint8_t *other);
     39 typedef void (*ReduceInitializerFunc_t)(uint8_t *accum);
     40 typedef void (*ReduceOutConverterFunc_t)(uint8_t *out, const uint8_t *accum);
     41 typedef void (*ForEachFunc_t)(const RsExpandKernelDriverInfo *info, uint32_t x1, uint32_t x2, uint32_t outStride);
     42 typedef void (*InvokeFunc_t)(void *params);
     43 typedef void (*InitOrDtorFunc_t)(void);
     44 typedef int  (*RootFunc_t)(void);
     45 
     46 struct ReduceDescription {
     47     ReduceAccumulatorFunc_t  accumFunc;  // expanded accumulator function
     48     ReduceInitializerFunc_t  initFunc;   // user initializer function
     49     ReduceCombinerFunc_t     combFunc;   // user combiner function
     50     ReduceOutConverterFunc_t outFunc;    // user outconverter function
     51     size_t                   accumSize;  // accumulator datum size, in bytes
     52 };
     53 
     54 // Internal driver callback used to execute a kernel
     55 typedef void (*WorkerCallback_t)(void *usr, uint32_t idx);
     56 
     57 class RsdCpuScriptImpl;
     58 class RsdCpuReferenceImpl;
     59 
     60 struct ScriptTLSStruct {
     61     android::renderscript::Context * mContext;
     62     const android::renderscript::Script * mScript;
     63     RsdCpuScriptImpl *mImpl;
     64 };
     65 
     66 // MTLaunchStruct passes information about a multithreaded kernel launch.
     67 struct MTLaunchStructCommon {
     68     RsdCpuReferenceImpl *rs;
     69     RsdCpuScriptImpl *script;
     70 
     71     uint32_t mSliceSize;
     72     volatile int mSliceNum;
     73     bool isThreadable;
     74 
     75     // Boundary information about the launch
     76     RsLaunchDimensions start;
     77     RsLaunchDimensions end;
     78     // Points to MTLaunchStructForEach::fep::dim or
     79     // MTLaunchStructReduce::redp::dim.
     80     RsLaunchDimensions *dimPtr;
     81 };
     82 
     83 struct MTLaunchStructForEach : public MTLaunchStructCommon {
     84     // Driver info structure
     85     RsExpandKernelDriverInfo fep;
     86 
     87     ForEachFunc_t kernel;
     88     const Allocation *ains[RS_KERNEL_INPUT_LIMIT];
     89     Allocation *aout[RS_KERNEL_INPUT_LIMIT];
     90 };
     91 
     92 struct MTLaunchStructReduce : public MTLaunchStructCommon {
     93     // Driver info structure
     94     RsExpandKernelDriverInfo redp;
     95 
     96     const Allocation *ains[RS_KERNEL_INPUT_LIMIT];
     97 
     98     ReduceAccumulatorFunc_t accumFunc;
     99     ReduceInitializerFunc_t initFunc;
    100     ReduceCombinerFunc_t combFunc;
    101     ReduceOutConverterFunc_t outFunc;
    102 
    103     size_t accumSize;  // accumulator datum size in bytes
    104 
    105     size_t accumStride;  // stride between accumulators in accumAlloc (below)
    106 
    107     // These fields are used for managing accumulator data items in a
    108     // multithreaded execution.
    109     //
    110     // Let the number of threads be N.
    111     // Let Outc be true iff there is an outconverter.
    112     //
    113     // accumAlloc is a pointer to a single allocation of (N - !Outc)
    114     // accumulators.  (If there is no outconverter, then the output
    115     // allocation acts as an accumulator.)  It is created at kernel
    116     // launch time.  Within that allocation, the distance between the
    117     // start of adjacent accumulators is accumStride bytes -- this
    118     // might be the same as accumSize, or it might be larger, if we
    119     // are attempting to avoid false sharing.
    120     //
    121     // accumCount is an atomic counter of how many accumulators have
    122     // been grabbed by threads.  It is initialized to zero at kernel
    123     // launch time.  See accumPtr for further description.
    124     //
    125     // accumPtr is pointer to an array of N pointers to accumulators.
    126     // The array is created at kernel launch time, and each element is
    127     // initialized to nullptr.  When a particular thread goes to work,
    128     // that thread obtains its accumulator from its entry in this
    129     // array.  If the entry is nullptr, that thread needs to obtain an
    130     // accumulator, and initialize its entry in the array accordingly.
    131     // It does so via atomic access (fetch-and-add) to accumCount.
    132     // - If Outc, then the fetched value is used as an index into
    133     //   accumAlloc.
    134     // - If !Outc, then
    135     //   - If the fetched value is zero, then this thread gets the
    136     //     output allocation for its accumulator.
    137     //   - If the fetched value is nonzero, then (fetched value - 1)
    138     //     is used as an index into accumAlloc.
    139     uint8_t *accumAlloc;
    140     uint8_t **accumPtr;
    141     uint32_t accumCount;
    142 
    143     // Logging control
    144     uint32_t logReduce;
    145 };
    146 
    147 class RsdCpuReferenceImpl : public RsdCpuReference {
    148 public:
    149     ~RsdCpuReferenceImpl() override;
    150     explicit RsdCpuReferenceImpl(Context *);
    151 
    152     void lockMutex();
    153     void unlockMutex();
    154 
    155     bool init(uint32_t version_major, uint32_t version_minor, sym_lookup_t, script_lookup_t);
    156     void setPriority(int32_t priority) override;
    157     virtual void launchThreads(WorkerCallback_t cbk, void *data);
    158     static void * helperThreadProc(void *vrsc);
    159     RsdCpuScriptImpl * setTLS(RsdCpuScriptImpl *sc);
    160 
    161     Context * getContext() {return mRSC;}
    162     uint32_t getThreadCount() const {
    163         return mWorkers.mCount + 1;
    164     }
    165 
    166     // Launch foreach kernel
    167     void launchForEach(const Allocation **ains, uint32_t inLen, Allocation *aout,
    168                        const RsScriptCall *sc, MTLaunchStructForEach *mtls);
    169 
    170     // Launch a general reduce kernel
    171     void launchReduce(const Allocation ** ains, uint32_t inLen, Allocation *aout,
    172                       MTLaunchStructReduce *mtls);
    173 
    174     CpuScript * createScript(const ScriptC *s, char const *resName, char const *cacheDir,
    175                              uint8_t const *bitcode, size_t bitcodeSize, uint32_t flags) override;
    176     CpuScript * createIntrinsic(const Script *s, RsScriptIntrinsicID iid, Element *e) override;
    177     void* createScriptGroup(const ScriptGroupBase *sg) override;
    178 
    179     const RsdCpuReference::CpuSymbol *symLookup(const char *);
    180 
    181     RsdCpuReference::CpuScript *lookupScript(const Script *s) {
    182         return mScriptLookupFn(mRSC, s);
    183     }
    184 
    185     void setSelectRTCallback(RSSelectRTCallback pSelectRTCallback) {
    186         mSelectRTCallback = pSelectRTCallback;
    187     }
    188     RSSelectRTCallback getSelectRTCallback() {
    189         return mSelectRTCallback;
    190     }
    191 
    192     virtual void setBccPluginName(const char *name) {
    193         mBccPluginName.assign(name);
    194     }
    195     virtual const char *getBccPluginName() const {
    196         return mBccPluginName.c_str();
    197     }
    198     bool getInKernel() override { return mInKernel; }
    199 
    200     // Set to true if we should embed global variable information in the code.
    201     void setEmbedGlobalInfo(bool v) override {
    202         mEmbedGlobalInfo = v;
    203     }
    204 
    205     // Returns true if we should embed global variable information in the code.
    206     bool getEmbedGlobalInfo() const override {
    207         return mEmbedGlobalInfo;
    208     }
    209 
    210     // Set to true if we should skip constant (immutable) global variables when
    211     // potentially embedding information about globals.
    212     void setEmbedGlobalInfoSkipConstant(bool v) override {
    213         mEmbedGlobalInfoSkipConstant = v;
    214     }
    215 
    216     // Returns true if we should skip constant (immutable) global variables when
    217     // potentially embedding information about globals.
    218     bool getEmbedGlobalInfoSkipConstant() const override {
    219         return mEmbedGlobalInfoSkipConstant;
    220     }
    221 
    222 protected:
    223     Context *mRSC;
    224     uint32_t version_major;
    225     uint32_t version_minor;
    226     //bool mHasGraphics;
    227     bool mInKernel;  // Is a parallel kernel execution underway?
    228 
    229     struct Workers {
    230         volatile int mRunningCount;
    231         volatile int mLaunchCount;
    232         uint32_t mCount;
    233         pthread_t *mThreadId;
    234         pid_t *mNativeThreadId;
    235         Signal mCompleteSignal;
    236         Signal *mLaunchSignals;
    237         WorkerCallback_t mLaunchCallback;
    238         void *mLaunchData;
    239     };
    240     Workers mWorkers;
    241     bool mExit;
    242     sym_lookup_t mSymLookupFn;
    243     script_lookup_t mScriptLookupFn;
    244 
    245     ScriptTLSStruct mTlsStruct;
    246 
    247     RSSelectRTCallback mSelectRTCallback;
    248     std::string mBccPluginName;
    249 
    250     // Specifies whether we should embed global variable information in the
    251     // code via special RS variables that can be examined later by the driver.
    252     // Defaults to true.
    253     bool mEmbedGlobalInfo;
    254 
    255     // Specifies whether we should skip constant (immutable) global variables
    256     // when potentially embedding information about globals.
    257     // Defaults to true.
    258     bool mEmbedGlobalInfoSkipConstant;
    259 
    260     long mPageSize;
    261 
    262     // Launch a general reduce kernel
    263     void launchReduceSerial(const Allocation ** ains, uint32_t inLen, Allocation *aout,
    264                             MTLaunchStructReduce *mtls);
    265     void launchReduceParallel(const Allocation ** ains, uint32_t inLen, Allocation *aout,
    266                               MTLaunchStructReduce *mtls);
    267 };
    268 
    269 
    270 } // namespace renderscript
    271 } // namespace android
    272 
    273 #endif
    274