1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "rsCpuIntrinsic.h" 18 #include "rsCpuIntrinsicInlines.h" 19 20 using namespace android; 21 using namespace android::renderscript; 22 23 namespace android { 24 namespace renderscript { 25 26 27 class RsdCpuScriptIntrinsicHistogram : public RsdCpuScriptIntrinsic { 28 public: 29 virtual void populateScript(Script *); 30 virtual void invokeFreeChildren(); 31 32 virtual void setGlobalVar(uint32_t slot, const void *data, size_t dataLength); 33 virtual void setGlobalObj(uint32_t slot, ObjectBase *data); 34 35 virtual ~RsdCpuScriptIntrinsicHistogram(); 36 RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e); 37 38 protected: 39 void preLaunch(uint32_t slot, const Allocation * ain, 40 Allocation * aout, const void * usr, 41 uint32_t usrLen, const RsScriptCall *sc); 42 void postLaunch(uint32_t slot, const Allocation * ain, 43 Allocation * aout, const void * usr, 44 uint32_t usrLen, const RsScriptCall *sc); 45 46 47 float mDot[4]; 48 int mDotI[4]; 49 int *mSums; 50 ObjectBaseRef<Allocation> mAllocOut; 51 52 static void kernelP1U4(const RsForEachStubParamStruct *p, 53 uint32_t xstart, uint32_t xend, 54 uint32_t instep, uint32_t outstep); 55 static void kernelP1U3(const RsForEachStubParamStruct *p, 56 uint32_t xstart, uint32_t xend, 57 uint32_t instep, uint32_t outstep); 58 static void kernelP1U2(const RsForEachStubParamStruct *p, 59 uint32_t xstart, uint32_t xend, 60 uint32_t instep, uint32_t outstep); 61 static void kernelP1U1(const RsForEachStubParamStruct *p, 62 uint32_t xstart, uint32_t xend, 63 uint32_t instep, uint32_t outstep); 64 65 static void kernelP1L4(const RsForEachStubParamStruct *p, 66 uint32_t xstart, uint32_t xend, 67 uint32_t instep, uint32_t outstep); 68 static void kernelP1L3(const RsForEachStubParamStruct *p, 69 uint32_t xstart, uint32_t xend, 70 uint32_t instep, uint32_t outstep); 71 static void kernelP1L2(const RsForEachStubParamStruct *p, 72 uint32_t xstart, uint32_t xend, 73 uint32_t instep, uint32_t outstep); 74 static void kernelP1L1(const RsForEachStubParamStruct *p, 75 uint32_t xstart, uint32_t xend, 76 uint32_t instep, uint32_t outstep); 77 78 }; 79 80 } 81 } 82 83 void RsdCpuScriptIntrinsicHistogram::setGlobalObj(uint32_t slot, ObjectBase *data) { 84 rsAssert(slot == 1); 85 mAllocOut.set(static_cast<Allocation *>(data)); 86 } 87 88 void RsdCpuScriptIntrinsicHistogram::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) { 89 rsAssert(slot == 0); 90 rsAssert(dataLength == 16); 91 memcpy(mDot, data, 16); 92 mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f); 93 mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f); 94 mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f); 95 mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f); 96 } 97 98 99 100 void RsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot, const Allocation * ain, 101 Allocation * aout, const void * usr, 102 uint32_t usrLen, const RsScriptCall *sc) { 103 104 const uint32_t threads = mCtx->getThreadCount(); 105 uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize(); 106 107 switch (slot) { 108 case 0: 109 switch(vSize) { 110 case 1: 111 mRootPtr = &kernelP1U1; 112 break; 113 case 2: 114 mRootPtr = &kernelP1U2; 115 break; 116 case 3: 117 mRootPtr = &kernelP1U3; 118 vSize = 4; 119 break; 120 case 4: 121 mRootPtr = &kernelP1U4; 122 break; 123 } 124 break; 125 case 1: 126 switch(ain->getType()->getElement()->getVectorSize()) { 127 case 1: 128 mRootPtr = &kernelP1L1; 129 break; 130 case 2: 131 mRootPtr = &kernelP1L2; 132 break; 133 case 3: 134 mRootPtr = &kernelP1L3; 135 break; 136 case 4: 137 mRootPtr = &kernelP1L4; 138 break; 139 } 140 break; 141 } 142 memset(mSums, 0, 256 * sizeof(int32_t) * threads * vSize); 143 } 144 145 void RsdCpuScriptIntrinsicHistogram::postLaunch(uint32_t slot, const Allocation * ain, 146 Allocation * aout, const void * usr, 147 uint32_t usrLen, const RsScriptCall *sc) { 148 149 unsigned int *o = (unsigned int *)mAllocOut->mHal.drvState.lod[0].mallocPtr; 150 uint32_t threads = mCtx->getThreadCount(); 151 uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize(); 152 153 if (vSize == 3) vSize = 4; 154 155 for (uint32_t ct=0; ct < (256 * vSize); ct++) { 156 o[ct] = mSums[ct]; 157 for (uint32_t t=1; t < threads; t++) { 158 o[ct] += mSums[ct + (256 * vSize * t)]; 159 } 160 } 161 } 162 163 void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsForEachStubParamStruct *p, 164 uint32_t xstart, uint32_t xend, 165 uint32_t instep, uint32_t outstep) { 166 167 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; 168 uchar *in = (uchar *)p->in; 169 int * sums = &cp->mSums[256 * 4 * p->lid]; 170 171 for (uint32_t x = xstart; x < xend; x++) { 172 sums[(in[0] << 2) ] ++; 173 sums[(in[1] << 2) + 1] ++; 174 sums[(in[2] << 2) + 2] ++; 175 sums[(in[3] << 2) + 3] ++; 176 in += instep; 177 } 178 } 179 180 void RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsForEachStubParamStruct *p, 181 uint32_t xstart, uint32_t xend, 182 uint32_t instep, uint32_t outstep) { 183 184 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; 185 uchar *in = (uchar *)p->in; 186 int * sums = &cp->mSums[256 * 4 * p->lid]; 187 188 for (uint32_t x = xstart; x < xend; x++) { 189 sums[(in[0] << 2) ] ++; 190 sums[(in[1] << 2) + 1] ++; 191 sums[(in[2] << 2) + 2] ++; 192 in += instep; 193 } 194 } 195 196 void RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsForEachStubParamStruct *p, 197 uint32_t xstart, uint32_t xend, 198 uint32_t instep, uint32_t outstep) { 199 200 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; 201 uchar *in = (uchar *)p->in; 202 int * sums = &cp->mSums[256 * 2 * p->lid]; 203 204 for (uint32_t x = xstart; x < xend; x++) { 205 sums[(in[0] << 1) ] ++; 206 sums[(in[1] << 1) + 1] ++; 207 in += instep; 208 } 209 } 210 211 void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsForEachStubParamStruct *p, 212 uint32_t xstart, uint32_t xend, 213 uint32_t instep, uint32_t outstep) { 214 215 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; 216 uchar *in = (uchar *)p->in; 217 int * sums = &cp->mSums[256 * p->lid]; 218 219 for (uint32_t x = xstart; x < xend; x++) { 220 int t = (cp->mDotI[0] * in[0]) + 221 (cp->mDotI[1] * in[1]) + 222 (cp->mDotI[2] * in[2]) + 223 (cp->mDotI[3] * in[3]); 224 sums[(t + 0x7f) >> 8] ++; 225 in += instep; 226 } 227 } 228 229 void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsForEachStubParamStruct *p, 230 uint32_t xstart, uint32_t xend, 231 uint32_t instep, uint32_t outstep) { 232 233 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; 234 uchar *in = (uchar *)p->in; 235 int * sums = &cp->mSums[256 * p->lid]; 236 237 for (uint32_t x = xstart; x < xend; x++) { 238 int t = (cp->mDotI[0] * in[0]) + 239 (cp->mDotI[1] * in[1]) + 240 (cp->mDotI[2] * in[2]); 241 sums[(t + 0x7f) >> 8] ++; 242 in += instep; 243 } 244 } 245 246 void RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsForEachStubParamStruct *p, 247 uint32_t xstart, uint32_t xend, 248 uint32_t instep, uint32_t outstep) { 249 250 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; 251 uchar *in = (uchar *)p->in; 252 int * sums = &cp->mSums[256 * p->lid]; 253 254 for (uint32_t x = xstart; x < xend; x++) { 255 int t = (cp->mDotI[0] * in[0]) + 256 (cp->mDotI[1] * in[1]); 257 sums[(t + 0x7f) >> 8] ++; 258 in += instep; 259 } 260 } 261 262 void RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsForEachStubParamStruct *p, 263 uint32_t xstart, uint32_t xend, 264 uint32_t instep, uint32_t outstep) { 265 266 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; 267 uchar *in = (uchar *)p->in; 268 int * sums = &cp->mSums[256 * p->lid]; 269 270 for (uint32_t x = xstart; x < xend; x++) { 271 int t = (cp->mDotI[0] * in[0]); 272 sums[(t + 0x7f) >> 8] ++; 273 in += instep; 274 } 275 } 276 277 void RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsForEachStubParamStruct *p, 278 uint32_t xstart, uint32_t xend, 279 uint32_t instep, uint32_t outstep) { 280 281 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr; 282 uchar *in = (uchar *)p->in; 283 int * sums = &cp->mSums[256 * p->lid]; 284 285 for (uint32_t x = xstart; x < xend; x++) { 286 sums[in[0]] ++; 287 in += instep; 288 } 289 } 290 291 292 RsdCpuScriptIntrinsicHistogram::RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, 293 const Script *s, const Element *e) 294 : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_HISTOGRAM) { 295 296 mRootPtr = NULL; 297 mSums = new int[256 * 4 * mCtx->getThreadCount()]; 298 mDot[0] = 0.299f; 299 mDot[1] = 0.587f; 300 mDot[2] = 0.114f; 301 mDot[3] = 0; 302 mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f); 303 mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f); 304 mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f); 305 mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f); 306 } 307 308 RsdCpuScriptIntrinsicHistogram::~RsdCpuScriptIntrinsicHistogram() { 309 if (mSums) { 310 delete []mSums; 311 } 312 } 313 314 void RsdCpuScriptIntrinsicHistogram::populateScript(Script *s) { 315 s->mHal.info.exportedVariableCount = 2; 316 } 317 318 void RsdCpuScriptIntrinsicHistogram::invokeFreeChildren() { 319 } 320 321 322 RsdCpuScriptImpl * rsdIntrinsic_Histogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) { 323 324 return new RsdCpuScriptIntrinsicHistogram(ctx, s, e); 325 } 326 327 328