1 2 /* 3 * Copyright 2006 The Android Open Source Project 4 * 5 * Use of this source code is governed by a BSD-style license that can be 6 * found in the LICENSE file. 7 */ 8 9 10 #include "SkBlurMask.h" 11 #include "SkMath.h" 12 #include "SkTemplates.h" 13 #include "SkEndian.h" 14 15 const SkScalar SkBlurMask::kBlurRadiusFudgeFactor = SkFloatToScalar(.57735f); 16 17 #define UNROLL_SEPARABLE_LOOPS 18 19 /** 20 * This function performs a box blur in X, of the given radius. If the 21 * "transpose" parameter is true, it will transpose the pixels on write, 22 * such that X and Y are swapped. Reads are always performed from contiguous 23 * memory in X, for speed. The destination buffer (dst) must be at least 24 * (width + leftRadius + rightRadius) * height bytes in size. 25 * 26 * This is what the inner loop looks like before unrolling, and with the two 27 * cases broken out separately (width < diameter, width >= diameter): 28 * 29 * if (width < diameter) { 30 * for (int x = 0; x < width; ++x) { 31 * sum += *right++; 32 * *dptr = (sum * scale + half) >> 24; 33 * dptr += dst_x_stride; 34 * } 35 * for (int x = width; x < diameter; ++x) { 36 * *dptr = (sum * scale + half) >> 24; 37 * dptr += dst_x_stride; 38 * } 39 * for (int x = 0; x < width; ++x) { 40 * *dptr = (sum * scale + half) >> 24; 41 * sum -= *left++; 42 * dptr += dst_x_stride; 43 * } 44 * } else { 45 * for (int x = 0; x < diameter; ++x) { 46 * sum += *right++; 47 * *dptr = (sum * scale + half) >> 24; 48 * dptr += dst_x_stride; 49 * } 50 * for (int x = diameter; x < width; ++x) { 51 * sum += *right++; 52 * *dptr = (sum * scale + half) >> 24; 53 * sum -= *left++; 54 * dptr += dst_x_stride; 55 * } 56 * for (int x = 0; x < diameter; ++x) { 57 * *dptr = (sum * scale + half) >> 24; 58 * sum -= *left++; 59 * dptr += dst_x_stride; 60 * } 61 * } 62 */ 63 static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst, 64 int leftRadius, int rightRadius, int width, int height, 65 bool transpose) 66 { 67 int diameter = leftRadius + rightRadius; 68 int kernelSize = diameter + 1; 69 int border = SkMin32(width, diameter); 70 uint32_t scale = (1 << 24) / kernelSize; 71 int new_width = width + SkMax32(leftRadius, rightRadius) * 2; 72 int dst_x_stride = transpose ? height : 1; 73 int dst_y_stride = transpose ? 1 : new_width; 74 #ifndef SK_DISABLE_BLUR_ROUNDING 75 uint32_t half = 1 << 23; 76 #else 77 uint32_t half = 0; 78 #endif 79 for (int y = 0; y < height; ++y) { 80 uint32_t sum = 0; 81 uint8_t* dptr = dst + y * dst_y_stride; 82 const uint8_t* right = src + y * src_y_stride; 83 const uint8_t* left = right; 84 for (int x = 0; x < rightRadius - leftRadius; x++) { 85 *dptr = 0; 86 dptr += dst_x_stride; 87 } 88 #define LEFT_BORDER_ITER \ 89 sum += *right++; \ 90 *dptr = (sum * scale + half) >> 24; \ 91 dptr += dst_x_stride; 92 93 int x = 0; 94 #ifdef UNROLL_SEPARABLE_LOOPS 95 for (; x < border - 16; x += 16) { 96 LEFT_BORDER_ITER 97 LEFT_BORDER_ITER 98 LEFT_BORDER_ITER 99 LEFT_BORDER_ITER 100 LEFT_BORDER_ITER 101 LEFT_BORDER_ITER 102 LEFT_BORDER_ITER 103 LEFT_BORDER_ITER 104 LEFT_BORDER_ITER 105 LEFT_BORDER_ITER 106 LEFT_BORDER_ITER 107 LEFT_BORDER_ITER 108 LEFT_BORDER_ITER 109 LEFT_BORDER_ITER 110 LEFT_BORDER_ITER 111 LEFT_BORDER_ITER 112 } 113 #endif 114 for (; x < border; ++x) { 115 LEFT_BORDER_ITER 116 } 117 #undef LEFT_BORDER_ITER 118 #define TRIVIAL_ITER \ 119 *dptr = (sum * scale + half) >> 24; \ 120 dptr += dst_x_stride; 121 x = width; 122 #ifdef UNROLL_SEPARABLE_LOOPS 123 for (; x < diameter - 16; x += 16) { 124 TRIVIAL_ITER 125 TRIVIAL_ITER 126 TRIVIAL_ITER 127 TRIVIAL_ITER 128 TRIVIAL_ITER 129 TRIVIAL_ITER 130 TRIVIAL_ITER 131 TRIVIAL_ITER 132 TRIVIAL_ITER 133 TRIVIAL_ITER 134 TRIVIAL_ITER 135 TRIVIAL_ITER 136 TRIVIAL_ITER 137 TRIVIAL_ITER 138 TRIVIAL_ITER 139 TRIVIAL_ITER 140 } 141 #endif 142 for (; x < diameter; ++x) { 143 TRIVIAL_ITER 144 } 145 #undef TRIVIAL_ITER 146 #define CENTER_ITER \ 147 sum += *right++; \ 148 *dptr = (sum * scale + half) >> 24; \ 149 sum -= *left++; \ 150 dptr += dst_x_stride; 151 152 x = diameter; 153 #ifdef UNROLL_SEPARABLE_LOOPS 154 for (; x < width - 16; x += 16) { 155 CENTER_ITER 156 CENTER_ITER 157 CENTER_ITER 158 CENTER_ITER 159 CENTER_ITER 160 CENTER_ITER 161 CENTER_ITER 162 CENTER_ITER 163 CENTER_ITER 164 CENTER_ITER 165 CENTER_ITER 166 CENTER_ITER 167 CENTER_ITER 168 CENTER_ITER 169 CENTER_ITER 170 CENTER_ITER 171 } 172 #endif 173 for (; x < width; ++x) { 174 CENTER_ITER 175 } 176 #undef CENTER_ITER 177 #define RIGHT_BORDER_ITER \ 178 *dptr = (sum * scale + half) >> 24; \ 179 sum -= *left++; \ 180 dptr += dst_x_stride; 181 182 x = 0; 183 #ifdef UNROLL_SEPARABLE_LOOPS 184 for (; x < border - 16; x += 16) { 185 RIGHT_BORDER_ITER 186 RIGHT_BORDER_ITER 187 RIGHT_BORDER_ITER 188 RIGHT_BORDER_ITER 189 RIGHT_BORDER_ITER 190 RIGHT_BORDER_ITER 191 RIGHT_BORDER_ITER 192 RIGHT_BORDER_ITER 193 RIGHT_BORDER_ITER 194 RIGHT_BORDER_ITER 195 RIGHT_BORDER_ITER 196 RIGHT_BORDER_ITER 197 RIGHT_BORDER_ITER 198 RIGHT_BORDER_ITER 199 RIGHT_BORDER_ITER 200 RIGHT_BORDER_ITER 201 } 202 #endif 203 for (; x < border; ++x) { 204 RIGHT_BORDER_ITER 205 } 206 #undef RIGHT_BORDER_ITER 207 for (int x = 0; x < leftRadius - rightRadius; ++x) { 208 *dptr = 0; 209 dptr += dst_x_stride; 210 } 211 SkASSERT(sum == 0); 212 } 213 return new_width; 214 } 215 216 /** 217 * This variant of the box blur handles blurring of non-integer radii. It 218 * keeps two running sums: an outer sum for the rounded-up kernel radius, and 219 * an inner sum for the rounded-down kernel radius. For each pixel, it linearly 220 * interpolates between them. In float this would be: 221 * outer_weight * outer_sum / kernelSize + 222 * (1.0 - outer_weight) * innerSum / (kernelSize - 2) 223 * 224 * This is what the inner loop looks like before unrolling, and with the two 225 * cases broken out separately (width < diameter, width >= diameter): 226 * 227 * if (width < diameter) { 228 * for (int x = 0; x < width; x++) { 229 * inner_sum = outer_sum; 230 * outer_sum += *right++; 231 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; 232 * dptr += dst_x_stride; 233 * } 234 * for (int x = width; x < diameter; ++x) { 235 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; 236 * dptr += dst_x_stride; 237 * } 238 * for (int x = 0; x < width; x++) { 239 * inner_sum = outer_sum - *left++; 240 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; 241 * dptr += dst_x_stride; 242 * outer_sum = inner_sum; 243 * } 244 * } else { 245 * for (int x = 0; x < diameter; x++) { 246 * inner_sum = outer_sum; 247 * outer_sum += *right++; 248 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; 249 * dptr += dst_x_stride; 250 * } 251 * for (int x = diameter; x < width; ++x) { 252 * inner_sum = outer_sum - *left; 253 * outer_sum += *right++; 254 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; 255 * dptr += dst_x_stride; 256 * outer_sum -= *left++; 257 * } 258 * for (int x = 0; x < diameter; x++) { 259 * inner_sum = outer_sum - *left++; 260 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; 261 * dptr += dst_x_stride; 262 * outer_sum = inner_sum; 263 * } 264 * } 265 * } 266 * return new_width; 267 */ 268 269 static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst, 270 int radius, int width, int height, 271 bool transpose, uint8_t outer_weight) 272 { 273 int diameter = radius * 2; 274 int kernelSize = diameter + 1; 275 int border = SkMin32(width, diameter); 276 int inner_weight = 255 - outer_weight; 277 outer_weight += outer_weight >> 7; 278 inner_weight += inner_weight >> 7; 279 uint32_t outer_scale = (outer_weight << 16) / kernelSize; 280 uint32_t inner_scale = (inner_weight << 16) / (kernelSize - 2); 281 #ifndef SK_DISABLE_BLUR_ROUNDING 282 uint32_t half = 1 << 23; 283 #else 284 uint32_t half = 0; 285 #endif 286 int new_width = width + diameter; 287 int dst_x_stride = transpose ? height : 1; 288 int dst_y_stride = transpose ? 1 : new_width; 289 for (int y = 0; y < height; ++y) { 290 uint32_t outer_sum = 0, inner_sum = 0; 291 uint8_t* dptr = dst + y * dst_y_stride; 292 const uint8_t* right = src + y * src_y_stride; 293 const uint8_t* left = right; 294 int x = 0; 295 296 #define LEFT_BORDER_ITER \ 297 inner_sum = outer_sum; \ 298 outer_sum += *right++; \ 299 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \ 300 dptr += dst_x_stride; 301 302 #ifdef UNROLL_SEPARABLE_LOOPS 303 for (;x < border - 16; x += 16) { 304 LEFT_BORDER_ITER 305 LEFT_BORDER_ITER 306 LEFT_BORDER_ITER 307 LEFT_BORDER_ITER 308 LEFT_BORDER_ITER 309 LEFT_BORDER_ITER 310 LEFT_BORDER_ITER 311 LEFT_BORDER_ITER 312 LEFT_BORDER_ITER 313 LEFT_BORDER_ITER 314 LEFT_BORDER_ITER 315 LEFT_BORDER_ITER 316 LEFT_BORDER_ITER 317 LEFT_BORDER_ITER 318 LEFT_BORDER_ITER 319 LEFT_BORDER_ITER 320 } 321 #endif 322 323 for (;x < border; ++x) { 324 LEFT_BORDER_ITER 325 } 326 #undef LEFT_BORDER_ITER 327 for (int x = width; x < diameter; ++x) { 328 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; 329 dptr += dst_x_stride; 330 } 331 x = diameter; 332 333 #define CENTER_ITER \ 334 inner_sum = outer_sum - *left; \ 335 outer_sum += *right++; \ 336 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \ 337 dptr += dst_x_stride; \ 338 outer_sum -= *left++; 339 340 #ifdef UNROLL_SEPARABLE_LOOPS 341 for (; x < width - 16; x += 16) { 342 CENTER_ITER 343 CENTER_ITER 344 CENTER_ITER 345 CENTER_ITER 346 CENTER_ITER 347 CENTER_ITER 348 CENTER_ITER 349 CENTER_ITER 350 CENTER_ITER 351 CENTER_ITER 352 CENTER_ITER 353 CENTER_ITER 354 CENTER_ITER 355 CENTER_ITER 356 CENTER_ITER 357 CENTER_ITER 358 } 359 #endif 360 for (; x < width; ++x) { 361 CENTER_ITER 362 } 363 #undef CENTER_ITER 364 365 #define RIGHT_BORDER_ITER \ 366 inner_sum = outer_sum - *left++; \ 367 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \ 368 dptr += dst_x_stride; \ 369 outer_sum = inner_sum; 370 371 x = 0; 372 #ifdef UNROLL_SEPARABLE_LOOPS 373 for (; x < border - 16; x += 16) { 374 RIGHT_BORDER_ITER 375 RIGHT_BORDER_ITER 376 RIGHT_BORDER_ITER 377 RIGHT_BORDER_ITER 378 RIGHT_BORDER_ITER 379 RIGHT_BORDER_ITER 380 RIGHT_BORDER_ITER 381 RIGHT_BORDER_ITER 382 RIGHT_BORDER_ITER 383 RIGHT_BORDER_ITER 384 RIGHT_BORDER_ITER 385 RIGHT_BORDER_ITER 386 RIGHT_BORDER_ITER 387 RIGHT_BORDER_ITER 388 RIGHT_BORDER_ITER 389 RIGHT_BORDER_ITER 390 } 391 #endif 392 for (; x < border; ++x) { 393 RIGHT_BORDER_ITER 394 } 395 #undef RIGHT_BORDER_ITER 396 SkASSERT(outer_sum == 0 && inner_sum == 0); 397 } 398 return new_width; 399 } 400 401 static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius) 402 { 403 *loRadius = *hiRadius = SkScalarCeil(passRadius); 404 if (SkIntToScalar(*hiRadius) - passRadius > SkFloatToScalar(0.5f)) { 405 *loRadius = *hiRadius - 1; 406 } 407 } 408 409 #include "SkColorPriv.h" 410 411 static void merge_src_with_blur(uint8_t dst[], int dstRB, 412 const uint8_t src[], int srcRB, 413 const uint8_t blur[], int blurRB, 414 int sw, int sh) { 415 dstRB -= sw; 416 srcRB -= sw; 417 blurRB -= sw; 418 while (--sh >= 0) { 419 for (int x = sw - 1; x >= 0; --x) { 420 *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src))); 421 dst += 1; 422 src += 1; 423 blur += 1; 424 } 425 dst += dstRB; 426 src += srcRB; 427 blur += blurRB; 428 } 429 } 430 431 static void clamp_with_orig(uint8_t dst[], int dstRowBytes, 432 const uint8_t src[], int srcRowBytes, 433 int sw, int sh, 434 SkBlurMask::Style style) { 435 int x; 436 while (--sh >= 0) { 437 switch (style) { 438 case SkBlurMask::kSolid_Style: 439 for (x = sw - 1; x >= 0; --x) { 440 int s = *src; 441 int d = *dst; 442 *dst = SkToU8(s + d - SkMulDiv255Round(s, d)); 443 dst += 1; 444 src += 1; 445 } 446 break; 447 case SkBlurMask::kOuter_Style: 448 for (x = sw - 1; x >= 0; --x) { 449 if (*src) { 450 *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src))); 451 } 452 dst += 1; 453 src += 1; 454 } 455 break; 456 default: 457 SkDEBUGFAIL("Unexpected blur style here"); 458 break; 459 } 460 dst += dstRowBytes - sw; 461 src += srcRowBytes - sw; 462 } 463 } 464 465 /////////////////////////////////////////////////////////////////////////////// 466 467 // we use a local function to wrap the class static method to work around 468 // a bug in gcc98 469 void SkMask_FreeImage(uint8_t* image); 470 void SkMask_FreeImage(uint8_t* image) { 471 SkMask::FreeImage(image); 472 } 473 474 bool SkBlurMask::Blur(SkMask* dst, const SkMask& src, 475 SkScalar radius, Style style, Quality quality, 476 SkIPoint* margin) 477 { 478 479 if (src.fFormat != SkMask::kA8_Format) { 480 return false; 481 } 482 483 // Force high quality off for small radii (performance) 484 if (radius < SkIntToScalar(3)) { 485 quality = kLow_Quality; 486 } 487 488 // highQuality: use three box blur passes as a cheap way 489 // to approximate a Gaussian blur 490 int passCount = (kHigh_Quality == quality) ? 3 : 1; 491 SkScalar passRadius = (kHigh_Quality == quality) ? 492 SkScalarMul( radius, kBlurRadiusFudgeFactor): 493 radius; 494 495 int rx = SkScalarCeil(passRadius); 496 int outerWeight = 255 - SkScalarRound((SkIntToScalar(rx) - passRadius) * 255); 497 498 SkASSERT(rx >= 0); 499 SkASSERT((unsigned)outerWeight <= 255); 500 if (rx <= 0) { 501 return false; 502 } 503 504 int ry = rx; // only do square blur for now 505 506 int padx = passCount * rx; 507 int pady = passCount * ry; 508 509 if (margin) { 510 margin->set(padx, pady); 511 } 512 dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady, 513 src.fBounds.fRight + padx, src.fBounds.fBottom + pady); 514 515 dst->fRowBytes = dst->fBounds.width(); 516 dst->fFormat = SkMask::kA8_Format; 517 dst->fImage = NULL; 518 519 if (src.fImage) { 520 size_t dstSize = dst->computeImageSize(); 521 if (0 == dstSize) { 522 return false; // too big to allocate, abort 523 } 524 525 int sw = src.fBounds.width(); 526 int sh = src.fBounds.height(); 527 const uint8_t* sp = src.fImage; 528 uint8_t* dp = SkMask::AllocImage(dstSize); 529 SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp); 530 531 // build the blurry destination 532 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize); 533 uint8_t* tp = tmpBuffer.get(); 534 int w = sw, h = sh; 535 536 if (outerWeight == 255) { 537 int loRadius, hiRadius; 538 get_adjusted_radii(passRadius, &loRadius, &hiRadius); 539 if (kHigh_Quality == quality) { 540 // Do three X blurs, with a transpose on the final one. 541 w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false); 542 w = boxBlur(tp, w, dp, hiRadius, loRadius, w, h, false); 543 w = boxBlur(dp, w, tp, hiRadius, hiRadius, w, h, true); 544 // Do three Y blurs, with a transpose on the final one. 545 h = boxBlur(tp, h, dp, loRadius, hiRadius, h, w, false); 546 h = boxBlur(dp, h, tp, hiRadius, loRadius, h, w, false); 547 h = boxBlur(tp, h, dp, hiRadius, hiRadius, h, w, true); 548 } else { 549 w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true); 550 h = boxBlur(tp, h, dp, ry, ry, h, w, true); 551 } 552 } else { 553 if (kHigh_Quality == quality) { 554 // Do three X blurs, with a transpose on the final one. 555 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, outerWeight); 556 w = boxBlurInterp(tp, w, dp, rx, w, h, false, outerWeight); 557 w = boxBlurInterp(dp, w, tp, rx, w, h, true, outerWeight); 558 // Do three Y blurs, with a transpose on the final one. 559 h = boxBlurInterp(tp, h, dp, ry, h, w, false, outerWeight); 560 h = boxBlurInterp(dp, h, tp, ry, h, w, false, outerWeight); 561 h = boxBlurInterp(tp, h, dp, ry, h, w, true, outerWeight); 562 } else { 563 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, true, outerWeight); 564 h = boxBlurInterp(tp, h, dp, ry, h, w, true, outerWeight); 565 } 566 } 567 568 dst->fImage = dp; 569 // if need be, alloc the "real" dst (same size as src) and copy/merge 570 // the blur into it (applying the src) 571 if (style == kInner_Style) { 572 // now we allocate the "real" dst, mirror the size of src 573 size_t srcSize = src.computeImageSize(); 574 if (0 == srcSize) { 575 return false; // too big to allocate, abort 576 } 577 dst->fImage = SkMask::AllocImage(srcSize); 578 merge_src_with_blur(dst->fImage, src.fRowBytes, 579 sp, src.fRowBytes, 580 dp + passCount * (rx + ry * dst->fRowBytes), 581 dst->fRowBytes, sw, sh); 582 SkMask::FreeImage(dp); 583 } else if (style != kNormal_Style) { 584 clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes), 585 dst->fRowBytes, sp, src.fRowBytes, sw, sh, style); 586 } 587 (void)autoCall.detach(); 588 } 589 590 if (style == kInner_Style) { 591 dst->fBounds = src.fBounds; // restore trimmed bounds 592 dst->fRowBytes = src.fRowBytes; 593 } 594 595 return true; 596 } 597 598 /* Convolving a box with itself three times results in a piecewise 599 quadratic function: 600 601 0 x <= -1.5 602 9/8 + 3/2 x + 1/2 x^2 -1.5 < x <= -.5 603 3/4 - x^2 -.5 < x <= .5 604 9/8 - 3/2 x + 1/2 x^2 0.5 < x <= 1.5 605 0 1.5 < x 606 607 Mathematica: 608 609 g[x_] := Piecewise [ { 610 {9/8 + 3/2 x + 1/2 x^2 , -1.5 < x <= -.5}, 611 {3/4 - x^2 , -.5 < x <= .5}, 612 {9/8 - 3/2 x + 1/2 x^2 , 0.5 < x <= 1.5} 613 }, 0] 614 615 To get the profile curve of the blurred step function at the rectangle 616 edge, we evaluate the indefinite integral, which is piecewise cubic: 617 618 0 x <= -1.5 619 9/16 + 9/8 x + 3/4 x^2 + 1/6 x^3 -1.5 < x <= -0.5 620 1/2 + 3/4 x - 1/3 x^3 -.5 < x <= .5 621 7/16 + 9/8 x - 3/4 x^2 + 1/6 x^3 .5 < x <= 1.5 622 1 1.5 < x 623 624 in Mathematica code: 625 626 gi[x_] := Piecewise[ { 627 { 0 , x <= -1.5 }, 628 { 9/16 + 9/8 x + 3/4 x^2 + 1/6 x^3, -1.5 < x <= -0.5 }, 629 { 1/2 + 3/4 x - 1/3 x^3 , -.5 < x <= .5}, 630 { 7/16 + 9/8 x - 3/4 x^2 + 1/6 x^3, .5 < x <= 1.5} 631 },1] 632 */ 633 634 static float gaussianIntegral(float x) { 635 if (x > 1.5f) { 636 return 0.0f; 637 } 638 if (x < -1.5f) { 639 return 1.0f; 640 } 641 642 float x2 = x*x; 643 float x3 = x2*x; 644 645 if ( x > 0.5f ) { 646 return 0.5625f - (x3 / 6.0f - 3.0f * x2 * 0.25f + 1.125f * x); 647 } 648 if ( x > -0.5f ) { 649 return 0.5f - (0.75f * x - x3 / 3.0f); 650 } 651 return 0.4375f + (-x3 / 6.0f - 3.0f * x2 * 0.25f - 1.125f * x); 652 } 653 654 // Compute the size of the array allocated for the profile. 655 656 static int compute_profile_size(SkScalar radius) { 657 return SkScalarRoundToInt(radius * 3); 658 659 } 660 661 /* compute_profile allocates and fills in an array of floating 662 point values between 0 and 255 for the profile signature of 663 a blurred half-plane with the given blur radius. Since we're 664 going to be doing screened multiplications (i.e., 1 - (1-x)(1-y)) 665 all the time, we actually fill in the profile pre-inverted 666 (already done 255-x). 667 668 It's the responsibility of the caller to delete the 669 memory returned in profile_out. 670 */ 671 672 static void compute_profile(SkScalar radius, unsigned int **profile_out) { 673 int size = compute_profile_size(radius); 674 675 int center = size >> 1; 676 unsigned int *profile = SkNEW_ARRAY(unsigned int, size); 677 678 float invr = 1.f/radius; 679 680 profile[0] = 255; 681 for (int x = 1 ; x < size ; ++x) { 682 float scaled_x = (center - x - .5f) * invr; 683 float gi = gaussianIntegral(scaled_x); 684 profile[x] = 255 - (uint8_t) (255.f * gi); 685 } 686 687 *profile_out = profile; 688 } 689 690 // TODO MAYBE: Maintain a profile cache to avoid recomputing this for 691 // commonly used radii. Consider baking some of the most common blur radii 692 // directly in as static data? 693 694 // Implementation adapted from Michael Herf's approach: 695 // http://stereopsis.com/shadowrect/ 696 697 static inline unsigned int profile_lookup( unsigned int *profile, int loc, int blurred_width, int sharp_width ) { 698 int dx = SkAbs32(((loc << 1) + 1) - blurred_width) - sharp_width; // how far are we from the original edge? 699 int ox = dx >> 1; 700 if (ox < 0) { 701 ox = 0; 702 } 703 704 return profile[ox]; 705 } 706 707 bool SkBlurMask::BlurRect(SkMask *dst, const SkRect &src, 708 SkScalar provided_radius, Style style, 709 SkIPoint *margin, SkMask::CreateMode createMode) { 710 int profile_size; 711 712 float radius = SkScalarToFloat(SkScalarMul(provided_radius, kBlurRadiusFudgeFactor)); 713 714 // adjust blur radius to match interpretation from boxfilter code 715 radius = (radius + .5f) * 2.f; 716 717 profile_size = compute_profile_size(radius); 718 719 int pad = profile_size/2; 720 if (margin) { 721 margin->set( pad, pad ); 722 } 723 724 dst->fBounds.set(SkScalarRoundToInt(src.fLeft - pad), 725 SkScalarRoundToInt(src.fTop - pad), 726 SkScalarRoundToInt(src.fRight + pad), 727 SkScalarRoundToInt(src.fBottom + pad)); 728 729 dst->fRowBytes = dst->fBounds.width(); 730 dst->fFormat = SkMask::kA8_Format; 731 dst->fImage = NULL; 732 733 int sw = SkScalarFloorToInt(src.width()); 734 int sh = SkScalarFloorToInt(src.height()); 735 736 if (createMode == SkMask::kJustComputeBounds_CreateMode) { 737 if (style == kInner_Style) { 738 dst->fBounds.set(SkScalarRoundToInt(src.fLeft), 739 SkScalarRoundToInt(src.fTop), 740 SkScalarRoundToInt(src.fRight), 741 SkScalarRoundToInt(src.fBottom)); // restore trimmed bounds 742 dst->fRowBytes = sw; 743 } 744 return true; 745 } 746 unsigned int *profile = NULL; 747 748 compute_profile(radius, &profile); 749 SkAutoTDeleteArray<unsigned int> ada(profile); 750 751 size_t dstSize = dst->computeImageSize(); 752 if (0 == dstSize) { 753 return false; // too big to allocate, abort 754 } 755 756 uint8_t* dp = SkMask::AllocImage(dstSize); 757 758 dst->fImage = dp; 759 760 int dstHeight = dst->fBounds.height(); 761 int dstWidth = dst->fBounds.width(); 762 763 // nearest odd number less than the profile size represents the center 764 // of the (2x scaled) profile 765 int center = ( profile_size & ~1 ) - 1; 766 767 int w = sw - center; 768 int h = sh - center; 769 770 uint8_t *outptr = dp; 771 772 SkAutoTMalloc<uint8_t> horizontalScanline(dstWidth); 773 774 for (int x = 0 ; x < dstWidth ; ++x) { 775 if (profile_size <= sw) { 776 horizontalScanline[x] = profile_lookup(profile, x, dstWidth, w); 777 } else { 778 float span = float(sw)/radius; 779 float giX = 1.5f - (x+.5f)/radius; 780 horizontalScanline[x] = (uint8_t) (255 * (gaussianIntegral(giX) - gaussianIntegral(giX + span))); 781 } 782 } 783 784 for (int y = 0 ; y < dstHeight ; ++y) { 785 unsigned int profile_y; 786 if (profile_size <= sh) { 787 profile_y = profile_lookup(profile, y, dstHeight, h); 788 } else { 789 float span = float(sh)/radius; 790 float giY = 1.5f - (y+.5f)/radius; 791 profile_y = (uint8_t) (255 * (gaussianIntegral(giY) - gaussianIntegral(giY + span))); 792 } 793 794 for (int x = 0 ; x < dstWidth ; x++) { 795 unsigned int maskval = SkMulDiv255Round(horizontalScanline[x], profile_y); 796 *(outptr++) = maskval; 797 } 798 } 799 800 if (style == kInner_Style) { 801 // now we allocate the "real" dst, mirror the size of src 802 size_t srcSize = (size_t)(src.width() * src.height()); 803 if (0 == srcSize) { 804 return false; // too big to allocate, abort 805 } 806 dst->fImage = SkMask::AllocImage(srcSize); 807 for (int y = 0 ; y < sh ; y++) { 808 uint8_t *blur_scanline = dp + (y+pad)*dstWidth + pad; 809 uint8_t *inner_scanline = dst->fImage + y*sw; 810 memcpy(inner_scanline, blur_scanline, sw); 811 } 812 SkMask::FreeImage(dp); 813 814 dst->fBounds.set(SkScalarRoundToInt(src.fLeft), 815 SkScalarRoundToInt(src.fTop), 816 SkScalarRoundToInt(src.fRight), 817 SkScalarRoundToInt(src.fBottom)); // restore trimmed bounds 818 dst->fRowBytes = sw; 819 820 } else if (style == kOuter_Style) { 821 for (int y = pad ; y < dstHeight-pad ; y++) { 822 uint8_t *dst_scanline = dp + y*dstWidth + pad; 823 memset(dst_scanline, 0, sw); 824 } 825 } else if (style == kSolid_Style) { 826 for (int y = pad ; y < dstHeight-pad ; y++) { 827 uint8_t *dst_scanline = dp + y*dstWidth + pad; 828 memset(dst_scanline, 0xff, sw); 829 } 830 } 831 // normal and solid styles are the same for analytic rect blurs, so don't 832 // need to handle solid specially. 833 834 return true; 835 } 836 837 // The "simple" blur is a direct implementation of separable convolution with a discrete 838 // gaussian kernel. It's "ground truth" in a sense; too slow to be used, but very 839 // useful for correctness comparisons. 840 841 bool SkBlurMask::BlurGroundTruth(SkMask* dst, const SkMask& src, SkScalar provided_radius, 842 Style style, SkIPoint* margin) { 843 844 if (src.fFormat != SkMask::kA8_Format) { 845 return false; 846 } 847 848 float radius = SkScalarToFloat(SkScalarMul(provided_radius, kBlurRadiusFudgeFactor)); 849 float stddev = SkScalarToFloat(radius) /2.0f; 850 float variance = stddev * stddev; 851 852 int windowSize = SkScalarCeil(stddev*4); 853 // round window size up to nearest odd number 854 windowSize |= 1; 855 856 SkAutoTMalloc<float> gaussWindow(windowSize); 857 858 int halfWindow = windowSize >> 1; 859 860 gaussWindow[halfWindow] = 1; 861 862 float windowSum = 1; 863 for (int x = 1 ; x <= halfWindow ; ++x) { 864 float gaussian = expf(-x*x / variance); 865 gaussWindow[halfWindow + x] = gaussWindow[halfWindow-x] = gaussian; 866 windowSum += 2*gaussian; 867 } 868 869 // leave the filter un-normalized for now; we will divide by the normalization 870 // sum later; 871 872 int pad = halfWindow; 873 if (margin) { 874 margin->set( pad, pad ); 875 } 876 877 dst->fBounds = src.fBounds; 878 dst->fBounds.outset(pad, pad); 879 880 dst->fRowBytes = dst->fBounds.width(); 881 dst->fFormat = SkMask::kA8_Format; 882 dst->fImage = NULL; 883 884 if (src.fImage) { 885 886 size_t dstSize = dst->computeImageSize(); 887 if (0 == dstSize) { 888 return false; // too big to allocate, abort 889 } 890 891 int srcWidth = src.fBounds.width(); 892 int srcHeight = src.fBounds.height(); 893 int dstWidth = dst->fBounds.width(); 894 895 const uint8_t* srcPixels = src.fImage; 896 uint8_t* dstPixels = SkMask::AllocImage(dstSize); 897 SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dstPixels); 898 899 // do the actual blur. First, make a padded copy of the source. 900 // use double pad so we never have to check if we're outside anything 901 902 int padWidth = srcWidth + 4*pad; 903 int padHeight = srcHeight; 904 int padSize = padWidth * padHeight; 905 906 SkAutoTMalloc<uint8_t> padPixels(padSize); 907 memset(padPixels, 0, padSize); 908 909 for (int y = 0 ; y < srcHeight; ++y) { 910 uint8_t* padptr = padPixels + y * padWidth + 2*pad; 911 const uint8_t* srcptr = srcPixels + y * srcWidth; 912 memcpy(padptr, srcptr, srcWidth); 913 } 914 915 // blur in X, transposing the result into a temporary floating point buffer. 916 // also double-pad the intermediate result so that the second blur doesn't 917 // have to do extra conditionals. 918 919 int tmpWidth = padHeight + 4*pad; 920 int tmpHeight = padWidth - 2*pad; 921 int tmpSize = tmpWidth * tmpHeight; 922 923 SkAutoTMalloc<float> tmpImage(tmpSize); 924 memset(tmpImage, 0, tmpSize*sizeof(tmpImage[0])); 925 926 for (int y = 0 ; y < padHeight ; ++y) { 927 uint8_t *srcScanline = padPixels + y*padWidth; 928 for (int x = pad ; x < padWidth - pad ; ++x) { 929 float *outPixel = tmpImage + (x-pad)*tmpWidth + y + 2*pad; // transposed output 930 uint8_t *windowCenter = srcScanline + x; 931 for (int i = -pad ; i <= pad ; ++i) { 932 *outPixel += gaussWindow[pad+i]*windowCenter[i]; 933 } 934 *outPixel /= windowSum; 935 } 936 } 937 938 // blur in Y; now filling in the actual desired destination. We have to do 939 // the transpose again; these transposes guarantee that we read memory in 940 // linear order. 941 942 for (int y = 0 ; y < tmpHeight ; ++y) { 943 float *srcScanline = tmpImage + y*tmpWidth; 944 for (int x = pad ; x < tmpWidth - pad ; ++x) { 945 float *windowCenter = srcScanline + x; 946 float finalValue = 0; 947 for (int i = -pad ; i <= pad ; ++i) { 948 finalValue += gaussWindow[pad+i]*windowCenter[i]; 949 } 950 finalValue /= windowSum; 951 uint8_t *outPixel = dstPixels + (x-pad)*dstWidth + y; // transposed output 952 int integerPixel = int(finalValue + 0.5f); 953 *outPixel = SkClampMax( SkClampPos(integerPixel), 255 ); 954 } 955 } 956 957 dst->fImage = dstPixels; 958 // if need be, alloc the "real" dst (same size as src) and copy/merge 959 // the blur into it (applying the src) 960 if (style == kInner_Style) { 961 // now we allocate the "real" dst, mirror the size of src 962 size_t srcSize = src.computeImageSize(); 963 if (0 == srcSize) { 964 return false; // too big to allocate, abort 965 } 966 dst->fImage = SkMask::AllocImage(srcSize); 967 merge_src_with_blur(dst->fImage, src.fRowBytes, 968 srcPixels, src.fRowBytes, 969 dstPixels + pad*dst->fRowBytes + pad, 970 dst->fRowBytes, srcWidth, srcHeight); 971 SkMask::FreeImage(dstPixels); 972 } else if (style != kNormal_Style) { 973 clamp_with_orig(dstPixels + pad*dst->fRowBytes + pad, 974 dst->fRowBytes, srcPixels, src.fRowBytes, srcWidth, srcHeight, style); 975 } 976 (void)autoCall.detach(); 977 } 978 979 if (style == kInner_Style) { 980 dst->fBounds = src.fBounds; // restore trimmed bounds 981 dst->fRowBytes = src.fRowBytes; 982 } 983 984 return true; 985 } 986