1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 18 #include "rsCpuIntrinsic.h" 19 #include "rsCpuIntrinsicInlines.h" 20 21 using namespace android; 22 using namespace android::renderscript; 23 24 namespace android { 25 namespace renderscript { 26 27 28 class RsdCpuScriptIntrinsicBlend : public RsdCpuScriptIntrinsic { 29 public: 30 void populateScript(Script *) override; 31 32 ~RsdCpuScriptIntrinsicBlend() override; 33 RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e); 34 35 protected: 36 static void kernel(const RsExpandKernelDriverInfo *info, uint32_t xstart, 37 uint32_t xend, uint32_t outstep); 38 }; 39 40 } 41 } 42 43 44 enum { 45 BLEND_CLEAR = 0, 46 BLEND_SRC = 1, 47 BLEND_DST = 2, 48 BLEND_SRC_OVER = 3, 49 BLEND_DST_OVER = 4, 50 BLEND_SRC_IN = 5, 51 BLEND_DST_IN = 6, 52 BLEND_SRC_OUT = 7, 53 BLEND_DST_OUT = 8, 54 BLEND_SRC_ATOP = 9, 55 BLEND_DST_ATOP = 10, 56 BLEND_XOR = 11, 57 58 BLEND_NORMAL = 12, 59 BLEND_AVERAGE = 13, 60 BLEND_MULTIPLY = 14, 61 BLEND_SCREEN = 15, 62 BLEND_DARKEN = 16, 63 BLEND_LIGHTEN = 17, 64 BLEND_OVERLAY = 18, 65 BLEND_HARDLIGHT = 19, 66 BLEND_SOFTLIGHT = 20, 67 BLEND_DIFFERENCE = 21, 68 BLEND_NEGATION = 22, 69 BLEND_EXCLUSION = 23, 70 BLEND_COLOR_DODGE = 24, 71 BLEND_INVERSE_COLOR_DODGE = 25, 72 BLEND_SOFT_DODGE = 26, 73 BLEND_COLOR_BURN = 27, 74 BLEND_INVERSE_COLOR_BURN = 28, 75 BLEND_SOFT_BURN = 29, 76 BLEND_REFLECT = 30, 77 BLEND_GLOW = 31, 78 BLEND_FREEZE = 32, 79 BLEND_HEAT = 33, 80 BLEND_ADD = 34, 81 BLEND_SUBTRACT = 35, 82 BLEND_STAMP = 36, 83 BLEND_RED = 37, 84 BLEND_GREEN = 38, 85 BLEND_BLUE = 39, 86 BLEND_HUE = 40, 87 BLEND_SATURATION = 41, 88 BLEND_COLOR = 42, 89 BLEND_LUMINOSITY = 43 90 }; 91 92 #if defined(ARCH_ARM_USE_INTRINSICS) 93 extern "C" int rsdIntrinsicBlend_K(uchar4 *out, uchar4 const *in, int slot, 94 uint32_t xstart, uint32_t xend); 95 #endif 96 97 #if defined(ARCH_X86_HAVE_SSSE3) 98 extern void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8); 99 extern void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8); 100 extern void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8); 101 extern void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8); 102 extern void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8); 103 extern void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8); 104 extern void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8); 105 extern void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8); 106 extern void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8); 107 extern void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8); 108 extern void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8); 109 extern void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8); 110 #endif 111 112 void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelDriverInfo *info, 113 uint32_t xstart, uint32_t xend, 114 uint32_t outstep) { 115 RsdCpuScriptIntrinsicBlend *cp = (RsdCpuScriptIntrinsicBlend *)info->usr; 116 117 // instep/outstep can be ignored--sizeof(uchar4) known at compile time 118 uchar4 *out = (uchar4 *)info->outPtr[0]; 119 uchar4 *in = (uchar4 *)info->inPtr[0]; 120 uint32_t x1 = xstart; 121 uint32_t x2 = xend; 122 123 #if defined(ARCH_ARM_USE_INTRINSICS) 124 // Bug: 22047392 - Skip optimized version for BLEND_DST_ATOP until this 125 // been fixed. 126 if (gArchUseSIMD && info->slot != BLEND_DST_ATOP) { 127 if (rsdIntrinsicBlend_K(out, in, info->slot, x1, x2) >= 0) 128 return; 129 } 130 #endif 131 switch (info->slot) { 132 case BLEND_CLEAR: 133 for (;x1 < x2; x1++, out++) { 134 *out = 0; 135 } 136 break; 137 case BLEND_SRC: 138 for (;x1 < x2; x1++, out++, in++) { 139 *out = *in; 140 } 141 break; 142 //BLEND_DST is a NOP 143 case BLEND_DST: 144 break; 145 case BLEND_SRC_OVER: 146 #if defined(ARCH_X86_HAVE_SSSE3) 147 if (gArchUseSIMD) { 148 if ((x1 + 8) < x2) { 149 uint32_t len = (x2 - x1) >> 3; 150 rsdIntrinsicBlendSrcOver_K(out, in, len); 151 x1 += len << 3; 152 out += len << 3; 153 in += len << 3; 154 } 155 } 156 #endif 157 for (;x1 < x2; x1++, out++, in++) { 158 short4 in_s = convert_short4(*in); 159 short4 out_s = convert_short4(*out); 160 in_s = in_s + ((out_s * (short4)(255 - in_s.w)) >> (short4)8); 161 *out = convert_uchar4(in_s); 162 } 163 break; 164 case BLEND_DST_OVER: 165 #if defined(ARCH_X86_HAVE_SSSE3) 166 if (gArchUseSIMD) { 167 if ((x1 + 8) < x2) { 168 uint32_t len = (x2 - x1) >> 3; 169 rsdIntrinsicBlendDstOver_K(out, in, len); 170 x1 += len << 3; 171 out += len << 3; 172 in += len << 3; 173 } 174 } 175 #endif 176 for (;x1 < x2; x1++, out++, in++) { 177 short4 in_s = convert_short4(*in); 178 short4 out_s = convert_short4(*out); 179 in_s = out_s + ((in_s * (short4)(255 - out_s.w)) >> (short4)8); 180 *out = convert_uchar4(in_s); 181 } 182 break; 183 case BLEND_SRC_IN: 184 #if defined(ARCH_X86_HAVE_SSSE3) 185 if (gArchUseSIMD) { 186 if ((x1 + 8) < x2) { 187 uint32_t len = (x2 - x1) >> 3; 188 rsdIntrinsicBlendSrcIn_K(out, in, len); 189 x1 += len << 3; 190 out += len << 3; 191 in += len << 3; 192 } 193 } 194 #endif 195 for (;x1 < x2; x1++, out++, in++) { 196 short4 in_s = convert_short4(*in); 197 in_s = (in_s * out->w) >> (short4)8; 198 *out = convert_uchar4(in_s); 199 } 200 break; 201 case BLEND_DST_IN: 202 #if defined(ARCH_X86_HAVE_SSSE3) 203 if (gArchUseSIMD) { 204 if ((x1 + 8) < x2) { 205 uint32_t len = (x2 - x1) >> 3; 206 rsdIntrinsicBlendDstIn_K(out, in, len); 207 x1 += len << 3; 208 out += len << 3; 209 in += len << 3; 210 } 211 } 212 #endif 213 for (;x1 < x2; x1++, out++, in++) { 214 short4 out_s = convert_short4(*out); 215 out_s = (out_s * in->w) >> (short4)8; 216 *out = convert_uchar4(out_s); 217 } 218 break; 219 case BLEND_SRC_OUT: 220 #if defined(ARCH_X86_HAVE_SSSE3) 221 if (gArchUseSIMD) { 222 if ((x1 + 8) < x2) { 223 uint32_t len = (x2 - x1) >> 3; 224 rsdIntrinsicBlendSrcOut_K(out, in, len); 225 x1 += len << 3; 226 out += len << 3; 227 in += len << 3; 228 } 229 } 230 #endif 231 for (;x1 < x2; x1++, out++, in++) { 232 short4 in_s = convert_short4(*in); 233 in_s = (in_s * (short4)(255 - out->w)) >> (short4)8; 234 *out = convert_uchar4(in_s); 235 } 236 break; 237 case BLEND_DST_OUT: 238 #if defined(ARCH_X86_HAVE_SSSE3) 239 if (gArchUseSIMD) { 240 if ((x1 + 8) < x2) { 241 uint32_t len = (x2 - x1) >> 3; 242 rsdIntrinsicBlendDstOut_K(out, in, len); 243 x1 += len << 3; 244 out += len << 3; 245 in += len << 3; 246 } 247 } 248 #endif 249 for (;x1 < x2; x1++, out++, in++) { 250 short4 out_s = convert_short4(*out); 251 out_s = (out_s * (short4)(255 - in->w)) >> (short4)8; 252 *out = convert_uchar4(out_s); 253 } 254 break; 255 case BLEND_SRC_ATOP: 256 #if defined(ARCH_X86_HAVE_SSSE3) 257 if (gArchUseSIMD) { 258 if ((x1 + 8) < x2) { 259 uint32_t len = (x2 - x1) >> 3; 260 rsdIntrinsicBlendSrcAtop_K(out, in, len); 261 x1 += len << 3; 262 out += len << 3; 263 in += len << 3; 264 } 265 } 266 #endif 267 for (;x1 < x2; x1++, out++, in++) { 268 short4 in_s = convert_short4(*in); 269 short4 out_s = convert_short4(*out); 270 out_s.xyz = ((in_s.xyz * out_s.w) + 271 (out_s.xyz * ((short3)255 - (short3)in_s.w))) >> (short3)8; 272 *out = convert_uchar4(out_s); 273 } 274 break; 275 case BLEND_DST_ATOP: 276 #if defined(ARCH_X86_HAVE_SSSE3) 277 if (gArchUseSIMD) { 278 if ((x1 + 8) < x2) { 279 uint32_t len = (x2 - x1) >> 3; 280 rsdIntrinsicBlendDstAtop_K(out, in, len); 281 x1 += len << 3; 282 out += len << 3; 283 in += len << 3; 284 } 285 } 286 #endif 287 for (;x1 < x2; x1++, out++, in++) { 288 short4 in_s = convert_short4(*in); 289 short4 out_s = convert_short4(*out); 290 out_s.xyz = ((out_s.xyz * in_s.w) + 291 (in_s.xyz * ((short3)255 - (short3)out_s.w))) >> (short3)8; 292 out_s.w = in_s.w; 293 *out = convert_uchar4(out_s); 294 } 295 break; 296 case BLEND_XOR: 297 #if defined(ARCH_X86_HAVE_SSSE3) 298 if (gArchUseSIMD) { 299 if ((x1 + 8) < x2) { 300 uint32_t len = (x2 - x1) >> 3; 301 rsdIntrinsicBlendXor_K(out, in, len); 302 x1 += len << 3; 303 out += len << 3; 304 in += len << 3; 305 } 306 } 307 #endif 308 for (;x1 < x2; x1++, out++, in++) { 309 *out = *in ^ *out; 310 } 311 break; 312 case BLEND_NORMAL: 313 ALOGE("Called unimplemented blend intrinsic BLEND_NORMAL"); 314 rsAssert(false); 315 break; 316 case BLEND_AVERAGE: 317 ALOGE("Called unimplemented blend intrinsic BLEND_AVERAGE"); 318 rsAssert(false); 319 break; 320 case BLEND_MULTIPLY: 321 #if defined(ARCH_X86_HAVE_SSSE3) 322 if (gArchUseSIMD) { 323 if ((x1 + 8) < x2) { 324 uint32_t len = (x2 - x1) >> 3; 325 rsdIntrinsicBlendMultiply_K(out, in, len); 326 x1 += len << 3; 327 out += len << 3; 328 in += len << 3; 329 } 330 } 331 #endif 332 for (;x1 < x2; x1++, out++, in++) { 333 *out = convert_uchar4((convert_short4(*in) * convert_short4(*out)) 334 >> (short4)8); 335 } 336 break; 337 case BLEND_SCREEN: 338 ALOGE("Called unimplemented blend intrinsic BLEND_SCREEN"); 339 rsAssert(false); 340 break; 341 case BLEND_DARKEN: 342 ALOGE("Called unimplemented blend intrinsic BLEND_DARKEN"); 343 rsAssert(false); 344 break; 345 case BLEND_LIGHTEN: 346 ALOGE("Called unimplemented blend intrinsic BLEND_LIGHTEN"); 347 rsAssert(false); 348 break; 349 case BLEND_OVERLAY: 350 ALOGE("Called unimplemented blend intrinsic BLEND_OVERLAY"); 351 rsAssert(false); 352 break; 353 case BLEND_HARDLIGHT: 354 ALOGE("Called unimplemented blend intrinsic BLEND_HARDLIGHT"); 355 rsAssert(false); 356 break; 357 case BLEND_SOFTLIGHT: 358 ALOGE("Called unimplemented blend intrinsic BLEND_SOFTLIGHT"); 359 rsAssert(false); 360 break; 361 case BLEND_DIFFERENCE: 362 ALOGE("Called unimplemented blend intrinsic BLEND_DIFFERENCE"); 363 rsAssert(false); 364 break; 365 case BLEND_NEGATION: 366 ALOGE("Called unimplemented blend intrinsic BLEND_NEGATION"); 367 rsAssert(false); 368 break; 369 case BLEND_EXCLUSION: 370 ALOGE("Called unimplemented blend intrinsic BLEND_EXCLUSION"); 371 rsAssert(false); 372 break; 373 case BLEND_COLOR_DODGE: 374 ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_DODGE"); 375 rsAssert(false); 376 break; 377 case BLEND_INVERSE_COLOR_DODGE: 378 ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_DODGE"); 379 rsAssert(false); 380 break; 381 case BLEND_SOFT_DODGE: 382 ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_DODGE"); 383 rsAssert(false); 384 break; 385 case BLEND_COLOR_BURN: 386 ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_BURN"); 387 rsAssert(false); 388 break; 389 case BLEND_INVERSE_COLOR_BURN: 390 ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_BURN"); 391 rsAssert(false); 392 break; 393 case BLEND_SOFT_BURN: 394 ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_BURN"); 395 rsAssert(false); 396 break; 397 case BLEND_REFLECT: 398 ALOGE("Called unimplemented blend intrinsic BLEND_REFLECT"); 399 rsAssert(false); 400 break; 401 case BLEND_GLOW: 402 ALOGE("Called unimplemented blend intrinsic BLEND_GLOW"); 403 rsAssert(false); 404 break; 405 case BLEND_FREEZE: 406 ALOGE("Called unimplemented blend intrinsic BLEND_FREEZE"); 407 rsAssert(false); 408 break; 409 case BLEND_HEAT: 410 ALOGE("Called unimplemented blend intrinsic BLEND_HEAT"); 411 rsAssert(false); 412 break; 413 case BLEND_ADD: 414 #if defined(ARCH_X86_HAVE_SSSE3) 415 if (gArchUseSIMD) { 416 if((x1 + 8) < x2) { 417 uint32_t len = (x2 - x1) >> 3; 418 rsdIntrinsicBlendAdd_K(out, in, len); 419 x1 += len << 3; 420 out += len << 3; 421 in += len << 3; 422 } 423 } 424 #endif 425 for (;x1 < x2; x1++, out++, in++) { 426 uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w, 427 oR = out->x, oG = out->y, oB = out->z, oA = out->w; 428 out->x = (oR + iR) > 255 ? 255 : oR + iR; 429 out->y = (oG + iG) > 255 ? 255 : oG + iG; 430 out->z = (oB + iB) > 255 ? 255 : oB + iB; 431 out->w = (oA + iA) > 255 ? 255 : oA + iA; 432 } 433 break; 434 case BLEND_SUBTRACT: 435 #if defined(ARCH_X86_HAVE_SSSE3) 436 if (gArchUseSIMD) { 437 if((x1 + 8) < x2) { 438 uint32_t len = (x2 - x1) >> 3; 439 rsdIntrinsicBlendSub_K(out, in, len); 440 x1 += len << 3; 441 out += len << 3; 442 in += len << 3; 443 } 444 } 445 #endif 446 for (;x1 < x2; x1++, out++, in++) { 447 int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w, 448 oR = out->x, oG = out->y, oB = out->z, oA = out->w; 449 out->x = (oR - iR) < 0 ? 0 : oR - iR; 450 out->y = (oG - iG) < 0 ? 0 : oG - iG; 451 out->z = (oB - iB) < 0 ? 0 : oB - iB; 452 out->w = (oA - iA) < 0 ? 0 : oA - iA; 453 } 454 break; 455 case BLEND_STAMP: 456 ALOGE("Called unimplemented blend intrinsic BLEND_STAMP"); 457 rsAssert(false); 458 break; 459 case BLEND_RED: 460 ALOGE("Called unimplemented blend intrinsic BLEND_RED"); 461 rsAssert(false); 462 break; 463 case BLEND_GREEN: 464 ALOGE("Called unimplemented blend intrinsic BLEND_GREEN"); 465 rsAssert(false); 466 break; 467 case BLEND_BLUE: 468 ALOGE("Called unimplemented blend intrinsic BLEND_BLUE"); 469 rsAssert(false); 470 break; 471 case BLEND_HUE: 472 ALOGE("Called unimplemented blend intrinsic BLEND_HUE"); 473 rsAssert(false); 474 break; 475 case BLEND_SATURATION: 476 ALOGE("Called unimplemented blend intrinsic BLEND_SATURATION"); 477 rsAssert(false); 478 break; 479 case BLEND_COLOR: 480 ALOGE("Called unimplemented blend intrinsic BLEND_COLOR"); 481 rsAssert(false); 482 break; 483 case BLEND_LUMINOSITY: 484 ALOGE("Called unimplemented blend intrinsic BLEND_LUMINOSITY"); 485 rsAssert(false); 486 break; 487 488 default: 489 ALOGE("Called unimplemented value %d", info->slot); 490 rsAssert(false); 491 492 } 493 } 494 495 496 RsdCpuScriptIntrinsicBlend::RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, 497 const Script *s, const Element *e) 498 : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLEND) { 499 500 mRootPtr = &kernel; 501 } 502 503 RsdCpuScriptIntrinsicBlend::~RsdCpuScriptIntrinsicBlend() { 504 } 505 506 void RsdCpuScriptIntrinsicBlend::populateScript(Script *s) { 507 s->mHal.info.exportedVariableCount = 0; 508 } 509 510 RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx, 511 const Script *s, const Element *e) { 512 return new RsdCpuScriptIntrinsicBlend(ctx, s, e); 513 } 514