1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "rsCpuIntrinsic.h" 18 #include "rsCpuIntrinsicInlines.h" 19 20 using namespace android; 21 using namespace android::renderscript; 22 23 namespace android { 24 namespace renderscript { 25 26 27 class RsdCpuScriptIntrinsicHistogram : public RsdCpuScriptIntrinsic { 28 public: 29 void populateScript(Script *) override; 30 void invokeFreeChildren() override; 31 32 void setGlobalVar(uint32_t slot, const void *data, size_t dataLength) override; 33 void setGlobalObj(uint32_t slot, ObjectBase *data) override; 34 35 ~RsdCpuScriptIntrinsicHistogram() override; 36 RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e); 37 38 protected: 39 void preLaunch(uint32_t slot, const Allocation ** ains, uint32_t inLen, 40 Allocation * aout, const void * usr, 41 uint32_t usrLen, const RsScriptCall *sc); 42 void postLaunch(uint32_t slot, const Allocation ** ains, uint32_t inLen, 43 Allocation * aout, const void * usr, 44 uint32_t usrLen, const RsScriptCall *sc); 45 46 47 float mDot[4]; 48 int mDotI[4]; 49 int *mSums; 50 ObjectBaseRef<Allocation> mAllocOut; 51 52 static void kernelP1U4(const RsExpandKernelDriverInfo *info, 53 uint32_t xstart, uint32_t xend, 54 uint32_t outstep); 55 static void kernelP1U3(const RsExpandKernelDriverInfo *info, 56 uint32_t xstart, uint32_t xend, 57 uint32_t outstep); 58 static void kernelP1U2(const RsExpandKernelDriverInfo *info, 59 uint32_t xstart, uint32_t xend, 60 uint32_t outstep); 61 static void kernelP1U1(const RsExpandKernelDriverInfo *info, 62 uint32_t xstart, uint32_t xend, 63 uint32_t outstep); 64 65 static void kernelP1L4(const RsExpandKernelDriverInfo *info, 66 uint32_t xstart, uint32_t xend, 67 uint32_t outstep); 68 static void kernelP1L3(const RsExpandKernelDriverInfo *info, 69 uint32_t xstart, uint32_t xend, 70 uint32_t outstep); 71 static void kernelP1L2(const RsExpandKernelDriverInfo *info, 72 uint32_t xstart, uint32_t xend, 73 uint32_t outstep); 74 static void kernelP1L1(const RsExpandKernelDriverInfo *info, 75 uint32_t xstart, uint32_t xend, 76 uint32_t outstep); 77 78 }; 79 80 } 81 } 82 83 void RsdCpuScriptIntrinsicHistogram::setGlobalObj(uint32_t slot, ObjectBase *data) { 84 rsAssert(slot == 1); 85 mAllocOut.set(static_cast<Allocation *>(data)); 86 } 87 88 void RsdCpuScriptIntrinsicHistogram::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) { 89 rsAssert(slot == 0); 90 rsAssert(dataLength == 16); 91 memcpy(mDot, data, 16); 92 mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f); 93 mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f); 94 mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f); 95 mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f); 96 } 97 98 99 100 void 101 RsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot, 102 const Allocation ** ains, 103 uint32_t inLen, Allocation * aout, 104 const void * usr, uint32_t usrLen, 105 const RsScriptCall *sc) { 106 107 const uint32_t threads = mCtx->getThreadCount(); 108 uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize(); 109 110 switch (slot) { 111 case 0: 112 switch(vSize) { 113 case 1: 114 mRootPtr = &kernelP1U1; 115 break; 116 case 2: 117 mRootPtr = &kernelP1U2; 118 break; 119 case 3: 120 mRootPtr = &kernelP1U3; 121 vSize = 4; 122 break; 123 case 4: 124 mRootPtr = &kernelP1U4; 125 break; 126 } 127 break; 128 case 1: 129 switch(ains[0]->getType()->getElement()->getVectorSize()) { 130 case 1: 131 mRootPtr = &kernelP1L1; 132 break; 133 case 2: 134 mRootPtr = &kernelP1L2; 135 break; 136 case 3: 137 mRootPtr = &kernelP1L3; 138 break; 139 case 4: 140 mRootPtr = &kernelP1L4; 141 break; 142 } 143 break; 144 } 145 memset(mSums, 0, 256 * sizeof(int32_t) * threads * vSize); 146 } 147 148 void 149 RsdCpuScriptIntrinsicHistogram::postLaunch(uint32_t slot, 150 const Allocation ** ains, 151 uint32_t inLen, Allocation * aout, 152 const void * usr, uint32_t usrLen, 153 const RsScriptCall *sc) { 154 155 unsigned int *o = (unsigned int *)mAllocOut->mHal.drvState.lod[0].mallocPtr; 156 uint32_t threads = mCtx->getThreadCount(); 157 uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize(); 158 159 if (vSize == 3) vSize = 4; 160 161 for (uint32_t ct=0; ct < (256 * vSize); ct++) { 162 o[ct] = mSums[ct]; 163 for (uint32_t t=1; t < threads; t++) { 164 o[ct] += mSums[ct + (256 * vSize * t)]; 165 } 166 } 167 } 168 169 void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsExpandKernelDriverInfo *info, 170 uint32_t xstart, uint32_t xend, 171 uint32_t outstep) { 172 173 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr; 174 uchar *in = (uchar *)info->inPtr[0]; 175 int * sums = &cp->mSums[256 * 4 * info->lid]; 176 177 for (uint32_t x = xstart; x < xend; x++) { 178 sums[(in[0] << 2) ] ++; 179 sums[(in[1] << 2) + 1] ++; 180 sums[(in[2] << 2) + 2] ++; 181 sums[(in[3] << 2) + 3] ++; 182 in += info->inStride[0]; 183 } 184 } 185 186 void RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsExpandKernelDriverInfo *info, 187 uint32_t xstart, uint32_t xend, 188 uint32_t outstep) { 189 190 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr; 191 uchar *in = (uchar *)info->inPtr[0]; 192 int * sums = &cp->mSums[256 * 4 * info->lid]; 193 194 for (uint32_t x = xstart; x < xend; x++) { 195 sums[(in[0] << 2) ] ++; 196 sums[(in[1] << 2) + 1] ++; 197 sums[(in[2] << 2) + 2] ++; 198 in += info->inStride[0]; 199 } 200 } 201 202 void RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsExpandKernelDriverInfo *info, 203 uint32_t xstart, uint32_t xend, 204 uint32_t outstep) { 205 206 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr; 207 uchar *in = (uchar *)info->inPtr[0]; 208 int * sums = &cp->mSums[256 * 2 * info->lid]; 209 210 for (uint32_t x = xstart; x < xend; x++) { 211 sums[(in[0] << 1) ] ++; 212 sums[(in[1] << 1) + 1] ++; 213 in += info->inStride[0]; 214 } 215 } 216 217 void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsExpandKernelDriverInfo *info, 218 uint32_t xstart, uint32_t xend, 219 uint32_t outstep) { 220 221 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr; 222 uchar *in = (uchar *)info->inPtr[0]; 223 int * sums = &cp->mSums[256 * info->lid]; 224 225 for (uint32_t x = xstart; x < xend; x++) { 226 int t = (cp->mDotI[0] * in[0]) + 227 (cp->mDotI[1] * in[1]) + 228 (cp->mDotI[2] * in[2]) + 229 (cp->mDotI[3] * in[3]); 230 sums[(t + 0x7f) >> 8] ++; 231 in += info->inStride[0]; 232 } 233 } 234 235 void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsExpandKernelDriverInfo *info, 236 uint32_t xstart, uint32_t xend, 237 uint32_t outstep) { 238 239 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr; 240 uchar *in = (uchar *)info->inPtr[0]; 241 int * sums = &cp->mSums[256 * info->lid]; 242 243 for (uint32_t x = xstart; x < xend; x++) { 244 int t = (cp->mDotI[0] * in[0]) + 245 (cp->mDotI[1] * in[1]) + 246 (cp->mDotI[2] * in[2]); 247 sums[(t + 0x7f) >> 8] ++; 248 in += info->inStride[0]; 249 } 250 } 251 252 void RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsExpandKernelDriverInfo *info, 253 uint32_t xstart, uint32_t xend, 254 uint32_t outstep) { 255 256 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr; 257 uchar *in = (uchar *)info->inPtr[0]; 258 int * sums = &cp->mSums[256 * info->lid]; 259 260 for (uint32_t x = xstart; x < xend; x++) { 261 int t = (cp->mDotI[0] * in[0]) + 262 (cp->mDotI[1] * in[1]); 263 sums[(t + 0x7f) >> 8] ++; 264 in += info->inStride[0]; 265 } 266 } 267 268 void RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsExpandKernelDriverInfo *info, 269 uint32_t xstart, uint32_t xend, 270 uint32_t outstep) { 271 272 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr; 273 uchar *in = (uchar *)info->inPtr[0]; 274 int * sums = &cp->mSums[256 * info->lid]; 275 276 for (uint32_t x = xstart; x < xend; x++) { 277 int t = (cp->mDotI[0] * in[0]); 278 sums[(t + 0x7f) >> 8] ++; 279 in += info->inStride[0]; 280 } 281 } 282 283 void RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsExpandKernelDriverInfo *info, 284 uint32_t xstart, uint32_t xend, 285 uint32_t outstep) { 286 287 RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr; 288 uchar *in = (uchar *)info->inPtr[0]; 289 int * sums = &cp->mSums[256 * info->lid]; 290 291 for (uint32_t x = xstart; x < xend; x++) { 292 sums[in[0]] ++; 293 in += info->inStride[0]; 294 } 295 } 296 297 298 RsdCpuScriptIntrinsicHistogram::RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, 299 const Script *s, const Element *e) 300 : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_HISTOGRAM) { 301 302 mRootPtr = nullptr; 303 mSums = new int[256 * 4 * mCtx->getThreadCount()]; 304 mDot[0] = 0.299f; 305 mDot[1] = 0.587f; 306 mDot[2] = 0.114f; 307 mDot[3] = 0; 308 mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f); 309 mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f); 310 mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f); 311 mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f); 312 } 313 314 RsdCpuScriptIntrinsicHistogram::~RsdCpuScriptIntrinsicHistogram() { 315 if (mSums) { 316 delete []mSums; 317 } 318 } 319 320 void RsdCpuScriptIntrinsicHistogram::populateScript(Script *s) { 321 s->mHal.info.exportedVariableCount = 2; 322 } 323 324 void RsdCpuScriptIntrinsicHistogram::invokeFreeChildren() { 325 } 326 327 328 RsdCpuScriptImpl * rsdIntrinsic_Histogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) { 329 330 return new RsdCpuScriptIntrinsicHistogram(ctx, s, e); 331 } 332