1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 18 #include "rsCpuIntrinsic.h" 19 #include "rsCpuIntrinsicInlines.h" 20 21 using namespace android; 22 using namespace android::renderscript; 23 24 namespace android { 25 namespace renderscript { 26 27 28 class RsdCpuScriptIntrinsicBlend : public RsdCpuScriptIntrinsic { 29 public: 30 virtual void populateScript(Script *); 31 32 virtual ~RsdCpuScriptIntrinsicBlend(); 33 RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e); 34 35 protected: 36 static void kernel(const RsForEachStubParamStruct *p, 37 uint32_t xstart, uint32_t xend, 38 uint32_t instep, uint32_t outstep); 39 }; 40 41 } 42 } 43 44 45 enum { 46 BLEND_CLEAR = 0, 47 BLEND_SRC = 1, 48 BLEND_DST = 2, 49 BLEND_SRC_OVER = 3, 50 BLEND_DST_OVER = 4, 51 BLEND_SRC_IN = 5, 52 BLEND_DST_IN = 6, 53 BLEND_SRC_OUT = 7, 54 BLEND_DST_OUT = 8, 55 BLEND_SRC_ATOP = 9, 56 BLEND_DST_ATOP = 10, 57 BLEND_XOR = 11, 58 59 BLEND_NORMAL = 12, 60 BLEND_AVERAGE = 13, 61 BLEND_MULTIPLY = 14, 62 BLEND_SCREEN = 15, 63 BLEND_DARKEN = 16, 64 BLEND_LIGHTEN = 17, 65 BLEND_OVERLAY = 18, 66 BLEND_HARDLIGHT = 19, 67 BLEND_SOFTLIGHT = 20, 68 BLEND_DIFFERENCE = 21, 69 BLEND_NEGATION = 22, 70 BLEND_EXCLUSION = 23, 71 BLEND_COLOR_DODGE = 24, 72 BLEND_INVERSE_COLOR_DODGE = 25, 73 BLEND_SOFT_DODGE = 26, 74 BLEND_COLOR_BURN = 27, 75 BLEND_INVERSE_COLOR_BURN = 28, 76 BLEND_SOFT_BURN = 29, 77 BLEND_REFLECT = 30, 78 BLEND_GLOW = 31, 79 BLEND_FREEZE = 32, 80 BLEND_HEAT = 33, 81 BLEND_ADD = 34, 82 BLEND_SUBTRACT = 35, 83 BLEND_STAMP = 36, 84 BLEND_RED = 37, 85 BLEND_GREEN = 38, 86 BLEND_BLUE = 39, 87 BLEND_HUE = 40, 88 BLEND_SATURATION = 41, 89 BLEND_COLOR = 42, 90 BLEND_LUMINOSITY = 43 91 }; 92 93 #if defined(ARCH_ARM_USE_INTRINSICS) 94 extern "C" int rsdIntrinsicBlend_K(uchar4 *out, uchar4 const *in, int slot, 95 uint32_t xstart, uint32_t xend); 96 #endif 97 98 #if defined(ARCH_X86_HAVE_SSSE3) 99 extern "C" void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8); 100 extern "C" void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8); 101 extern "C" void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8); 102 extern "C" void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8); 103 extern "C" void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8); 104 extern "C" void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8); 105 extern "C" void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8); 106 extern "C" void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8); 107 extern "C" void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8); 108 extern "C" void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8); 109 extern "C" void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8); 110 extern "C" void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8); 111 #endif 112 113 void RsdCpuScriptIntrinsicBlend::kernel(const RsForEachStubParamStruct *p, 114 uint32_t xstart, uint32_t xend, 115 uint32_t instep, uint32_t outstep) { 116 RsdCpuScriptIntrinsicBlend *cp = (RsdCpuScriptIntrinsicBlend *)p->usr; 117 118 // instep/outstep can be ignored--sizeof(uchar4) known at compile time 119 uchar4 *out = (uchar4 *)p->out; 120 uchar4 *in = (uchar4 *)p->in; 121 uint32_t x1 = xstart; 122 uint32_t x2 = xend; 123 124 #if defined(ARCH_ARM_USE_INTRINSICS) && !defined(ARCH_ARM64_USE_INTRINSICS) 125 if (gArchUseSIMD) { 126 if (rsdIntrinsicBlend_K(out, in, p->slot, x1, x2) >= 0) 127 return; 128 } 129 #endif 130 switch (p->slot) { 131 case BLEND_CLEAR: 132 for (;x1 < x2; x1++, out++) { 133 *out = 0; 134 } 135 break; 136 case BLEND_SRC: 137 for (;x1 < x2; x1++, out++, in++) { 138 *out = *in; 139 } 140 break; 141 //BLEND_DST is a NOP 142 case BLEND_DST: 143 break; 144 case BLEND_SRC_OVER: 145 #if defined(ARCH_X86_HAVE_SSSE3) 146 if (gArchUseSIMD) { 147 if ((x1 + 8) < x2) { 148 uint32_t len = (x2 - x1) >> 3; 149 rsdIntrinsicBlendSrcOver_K(out, in, len); 150 x1 += len << 3; 151 out += len << 3; 152 in += len << 3; 153 } 154 } 155 #endif 156 for (;x1 < x2; x1++, out++, in++) { 157 short4 in_s = convert_short4(*in); 158 short4 out_s = convert_short4(*out); 159 in_s = in_s + ((out_s * (short4)(255 - in_s.w)) >> (short4)8); 160 *out = convert_uchar4(in_s); 161 } 162 break; 163 case BLEND_DST_OVER: 164 #if defined(ARCH_X86_HAVE_SSSE3) 165 if (gArchUseSIMD) { 166 if ((x1 + 8) < x2) { 167 uint32_t len = (x2 - x1) >> 3; 168 rsdIntrinsicBlendDstOver_K(out, in, len); 169 x1 += len << 3; 170 out += len << 3; 171 in += len << 3; 172 } 173 } 174 #endif 175 for (;x1 < x2; x1++, out++, in++) { 176 short4 in_s = convert_short4(*in); 177 short4 out_s = convert_short4(*out); 178 in_s = out_s + ((in_s * (short4)(255 - out_s.w)) >> (short4)8); 179 *out = convert_uchar4(in_s); 180 } 181 break; 182 case BLEND_SRC_IN: 183 #if defined(ARCH_X86_HAVE_SSSE3) 184 if (gArchUseSIMD) { 185 if ((x1 + 8) < x2) { 186 uint32_t len = (x2 - x1) >> 3; 187 rsdIntrinsicBlendSrcIn_K(out, in, len); 188 x1 += len << 3; 189 out += len << 3; 190 in += len << 3; 191 } 192 } 193 #endif 194 for (;x1 < x2; x1++, out++, in++) { 195 short4 in_s = convert_short4(*in); 196 in_s = (in_s * out->w) >> (short4)8; 197 *out = convert_uchar4(in_s); 198 } 199 break; 200 case BLEND_DST_IN: 201 #if defined(ARCH_X86_HAVE_SSSE3) 202 if (gArchUseSIMD) { 203 if ((x1 + 8) < x2) { 204 uint32_t len = (x2 - x1) >> 3; 205 rsdIntrinsicBlendDstIn_K(out, in, len); 206 x1 += len << 3; 207 out += len << 3; 208 in += len << 3; 209 } 210 } 211 #endif 212 for (;x1 < x2; x1++, out++, in++) { 213 short4 out_s = convert_short4(*out); 214 out_s = (out_s * in->w) >> (short4)8; 215 *out = convert_uchar4(out_s); 216 } 217 break; 218 case BLEND_SRC_OUT: 219 #if defined(ARCH_X86_HAVE_SSSE3) 220 if (gArchUseSIMD) { 221 if ((x1 + 8) < x2) { 222 uint32_t len = (x2 - x1) >> 3; 223 rsdIntrinsicBlendSrcOut_K(out, in, len); 224 x1 += len << 3; 225 out += len << 3; 226 in += len << 3; 227 } 228 } 229 #endif 230 for (;x1 < x2; x1++, out++, in++) { 231 short4 in_s = convert_short4(*in); 232 in_s = (in_s * (short4)(255 - out->w)) >> (short4)8; 233 *out = convert_uchar4(in_s); 234 } 235 break; 236 case BLEND_DST_OUT: 237 #if defined(ARCH_X86_HAVE_SSSE3) 238 if (gArchUseSIMD) { 239 if ((x1 + 8) < x2) { 240 uint32_t len = (x2 - x1) >> 3; 241 rsdIntrinsicBlendDstOut_K(out, in, len); 242 x1 += len << 3; 243 out += len << 3; 244 in += len << 3; 245 } 246 } 247 #endif 248 for (;x1 < x2; x1++, out++, in++) { 249 short4 out_s = convert_short4(*out); 250 out_s = (out_s * (short4)(255 - in->w)) >> (short4)8; 251 *out = convert_uchar4(out_s); 252 } 253 break; 254 case BLEND_SRC_ATOP: 255 #if defined(ARCH_X86_HAVE_SSSE3) 256 if (gArchUseSIMD) { 257 if ((x1 + 8) < x2) { 258 uint32_t len = (x2 - x1) >> 3; 259 rsdIntrinsicBlendSrcAtop_K(out, in, len); 260 x1 += len << 3; 261 out += len << 3; 262 in += len << 3; 263 } 264 } 265 #endif 266 for (;x1 < x2; x1++, out++, in++) { 267 short4 in_s = convert_short4(*in); 268 short4 out_s = convert_short4(*out); 269 out_s.xyz = ((in_s.xyz * out_s.w) + 270 (out_s.xyz * ((short3)255 - (short3)in_s.w))) >> (short3)8; 271 *out = convert_uchar4(out_s); 272 } 273 break; 274 case BLEND_DST_ATOP: 275 #if defined(ARCH_X86_HAVE_SSSE3) 276 if (gArchUseSIMD) { 277 if ((x1 + 8) < x2) { 278 uint32_t len = (x2 - x1) >> 3; 279 rsdIntrinsicBlendDstAtop_K(out, in, len); 280 x1 += len << 3; 281 out += len << 3; 282 in += len << 3; 283 } 284 } 285 #endif 286 for (;x1 < x2; x1++, out++, in++) { 287 short4 in_s = convert_short4(*in); 288 short4 out_s = convert_short4(*out); 289 out_s.xyz = ((out_s.xyz * in_s.w) + 290 (in_s.xyz * ((short3)255 - (short3)out_s.w))) >> (short3)8; 291 *out = convert_uchar4(out_s); 292 } 293 break; 294 case BLEND_XOR: 295 #if defined(ARCH_X86_HAVE_SSSE3) 296 if (gArchUseSIMD) { 297 if ((x1 + 8) < x2) { 298 uint32_t len = (x2 - x1) >> 3; 299 rsdIntrinsicBlendXor_K(out, in, len); 300 x1 += len << 3; 301 out += len << 3; 302 in += len << 3; 303 } 304 } 305 #endif 306 for (;x1 < x2; x1++, out++, in++) { 307 *out = *in ^ *out; 308 } 309 break; 310 case BLEND_NORMAL: 311 ALOGE("Called unimplemented blend intrinsic BLEND_NORMAL"); 312 rsAssert(false); 313 break; 314 case BLEND_AVERAGE: 315 ALOGE("Called unimplemented blend intrinsic BLEND_AVERAGE"); 316 rsAssert(false); 317 break; 318 case BLEND_MULTIPLY: 319 #if defined(ARCH_X86_HAVE_SSSE3) 320 if (gArchUseSIMD) { 321 if ((x1 + 8) < x2) { 322 uint32_t len = (x2 - x1) >> 3; 323 rsdIntrinsicBlendMultiply_K(out, in, len); 324 x1 += len << 3; 325 out += len << 3; 326 in += len << 3; 327 } 328 } 329 #endif 330 for (;x1 < x2; x1++, out++, in++) { 331 *out = convert_uchar4((convert_short4(*in) * convert_short4(*out)) 332 >> (short4)8); 333 } 334 break; 335 case BLEND_SCREEN: 336 ALOGE("Called unimplemented blend intrinsic BLEND_SCREEN"); 337 rsAssert(false); 338 break; 339 case BLEND_DARKEN: 340 ALOGE("Called unimplemented blend intrinsic BLEND_DARKEN"); 341 rsAssert(false); 342 break; 343 case BLEND_LIGHTEN: 344 ALOGE("Called unimplemented blend intrinsic BLEND_LIGHTEN"); 345 rsAssert(false); 346 break; 347 case BLEND_OVERLAY: 348 ALOGE("Called unimplemented blend intrinsic BLEND_OVERLAY"); 349 rsAssert(false); 350 break; 351 case BLEND_HARDLIGHT: 352 ALOGE("Called unimplemented blend intrinsic BLEND_HARDLIGHT"); 353 rsAssert(false); 354 break; 355 case BLEND_SOFTLIGHT: 356 ALOGE("Called unimplemented blend intrinsic BLEND_SOFTLIGHT"); 357 rsAssert(false); 358 break; 359 case BLEND_DIFFERENCE: 360 ALOGE("Called unimplemented blend intrinsic BLEND_DIFFERENCE"); 361 rsAssert(false); 362 break; 363 case BLEND_NEGATION: 364 ALOGE("Called unimplemented blend intrinsic BLEND_NEGATION"); 365 rsAssert(false); 366 break; 367 case BLEND_EXCLUSION: 368 ALOGE("Called unimplemented blend intrinsic BLEND_EXCLUSION"); 369 rsAssert(false); 370 break; 371 case BLEND_COLOR_DODGE: 372 ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_DODGE"); 373 rsAssert(false); 374 break; 375 case BLEND_INVERSE_COLOR_DODGE: 376 ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_DODGE"); 377 rsAssert(false); 378 break; 379 case BLEND_SOFT_DODGE: 380 ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_DODGE"); 381 rsAssert(false); 382 break; 383 case BLEND_COLOR_BURN: 384 ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_BURN"); 385 rsAssert(false); 386 break; 387 case BLEND_INVERSE_COLOR_BURN: 388 ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_BURN"); 389 rsAssert(false); 390 break; 391 case BLEND_SOFT_BURN: 392 ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_BURN"); 393 rsAssert(false); 394 break; 395 case BLEND_REFLECT: 396 ALOGE("Called unimplemented blend intrinsic BLEND_REFLECT"); 397 rsAssert(false); 398 break; 399 case BLEND_GLOW: 400 ALOGE("Called unimplemented blend intrinsic BLEND_GLOW"); 401 rsAssert(false); 402 break; 403 case BLEND_FREEZE: 404 ALOGE("Called unimplemented blend intrinsic BLEND_FREEZE"); 405 rsAssert(false); 406 break; 407 case BLEND_HEAT: 408 ALOGE("Called unimplemented blend intrinsic BLEND_HEAT"); 409 rsAssert(false); 410 break; 411 case BLEND_ADD: 412 #if defined(ARCH_X86_HAVE_SSSE3) 413 if (gArchUseSIMD) { 414 if((x1 + 8) < x2) { 415 uint32_t len = (x2 - x1) >> 3; 416 rsdIntrinsicBlendAdd_K(out, in, len); 417 x1 += len << 3; 418 out += len << 3; 419 in += len << 3; 420 } 421 } 422 #endif 423 for (;x1 < x2; x1++, out++, in++) { 424 uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w, 425 oR = out->x, oG = out->y, oB = out->z, oA = out->w; 426 out->x = (oR + iR) > 255 ? 255 : oR + iR; 427 out->y = (oG + iG) > 255 ? 255 : oG + iG; 428 out->z = (oB + iB) > 255 ? 255 : oB + iB; 429 out->w = (oA + iA) > 255 ? 255 : oA + iA; 430 } 431 break; 432 case BLEND_SUBTRACT: 433 #if defined(ARCH_X86_HAVE_SSSE3) 434 if (gArchUseSIMD) { 435 if((x1 + 8) < x2) { 436 uint32_t len = (x2 - x1) >> 3; 437 rsdIntrinsicBlendSub_K(out, in, len); 438 x1 += len << 3; 439 out += len << 3; 440 in += len << 3; 441 } 442 } 443 #endif 444 for (;x1 < x2; x1++, out++, in++) { 445 int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w, 446 oR = out->x, oG = out->y, oB = out->z, oA = out->w; 447 out->x = (oR - iR) < 0 ? 0 : oR - iR; 448 out->y = (oG - iG) < 0 ? 0 : oG - iG; 449 out->z = (oB - iB) < 0 ? 0 : oB - iB; 450 out->w = (oA - iA) < 0 ? 0 : oA - iA; 451 } 452 break; 453 case BLEND_STAMP: 454 ALOGE("Called unimplemented blend intrinsic BLEND_STAMP"); 455 rsAssert(false); 456 break; 457 case BLEND_RED: 458 ALOGE("Called unimplemented blend intrinsic BLEND_RED"); 459 rsAssert(false); 460 break; 461 case BLEND_GREEN: 462 ALOGE("Called unimplemented blend intrinsic BLEND_GREEN"); 463 rsAssert(false); 464 break; 465 case BLEND_BLUE: 466 ALOGE("Called unimplemented blend intrinsic BLEND_BLUE"); 467 rsAssert(false); 468 break; 469 case BLEND_HUE: 470 ALOGE("Called unimplemented blend intrinsic BLEND_HUE"); 471 rsAssert(false); 472 break; 473 case BLEND_SATURATION: 474 ALOGE("Called unimplemented blend intrinsic BLEND_SATURATION"); 475 rsAssert(false); 476 break; 477 case BLEND_COLOR: 478 ALOGE("Called unimplemented blend intrinsic BLEND_COLOR"); 479 rsAssert(false); 480 break; 481 case BLEND_LUMINOSITY: 482 ALOGE("Called unimplemented blend intrinsic BLEND_LUMINOSITY"); 483 rsAssert(false); 484 break; 485 486 default: 487 ALOGE("Called unimplemented value %d", p->slot); 488 rsAssert(false); 489 490 } 491 } 492 493 494 RsdCpuScriptIntrinsicBlend::RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, 495 const Script *s, const Element *e) 496 : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLEND) { 497 498 mRootPtr = &kernel; 499 } 500 501 RsdCpuScriptIntrinsicBlend::~RsdCpuScriptIntrinsicBlend() { 502 } 503 504 void RsdCpuScriptIntrinsicBlend::populateScript(Script *s) { 505 s->mHal.info.exportedVariableCount = 0; 506 } 507 508 RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx, 509 const Script *s, const Element *e) { 510 return new RsdCpuScriptIntrinsicBlend(ctx, s, e); 511 } 512 513 514 515