1 2 /* 3 * Copyright 2006 The Android Open Source Project 4 * 5 * Use of this source code is governed by a BSD-style license that can be 6 * found in the LICENSE file. 7 */ 8 9 10 #include "SkBlurMask.h" 11 #include "SkMath.h" 12 #include "SkTemplates.h" 13 #include "SkEndian.h" 14 15 16 // This constant approximates the scaling done in the software path's 17 // "high quality" mode, in SkBlurMask::Blur() (1 / sqrt(3)). 18 // IMHO, it actually should be 1: we blur "less" than we should do 19 // according to the CSS and canvas specs, simply because Safari does the same. 20 // Firefox used to do the same too, until 4.0 where they fixed it. So at some 21 // point we should probably get rid of these scaling constants and rebaseline 22 // all the blur tests. 23 static const SkScalar kBLUR_SIGMA_SCALE = 0.57735f; 24 25 SkScalar SkBlurMask::ConvertRadiusToSigma(SkScalar radius) { 26 return radius > 0 ? kBLUR_SIGMA_SCALE * radius + 0.5f : 0.0f; 27 } 28 29 SkScalar SkBlurMask::ConvertSigmaToRadius(SkScalar sigma) { 30 return sigma > 0.5f ? (sigma - 0.5f) / kBLUR_SIGMA_SCALE : 0.0f; 31 } 32 33 #define UNROLL_SEPARABLE_LOOPS 34 35 /** 36 * This function performs a box blur in X, of the given radius. If the 37 * "transpose" parameter is true, it will transpose the pixels on write, 38 * such that X and Y are swapped. Reads are always performed from contiguous 39 * memory in X, for speed. The destination buffer (dst) must be at least 40 * (width + leftRadius + rightRadius) * height bytes in size. 41 * 42 * This is what the inner loop looks like before unrolling, and with the two 43 * cases broken out separately (width < diameter, width >= diameter): 44 * 45 * if (width < diameter) { 46 * for (int x = 0; x < width; ++x) { 47 * sum += *right++; 48 * *dptr = (sum * scale + half) >> 24; 49 * dptr += dst_x_stride; 50 * } 51 * for (int x = width; x < diameter; ++x) { 52 * *dptr = (sum * scale + half) >> 24; 53 * dptr += dst_x_stride; 54 * } 55 * for (int x = 0; x < width; ++x) { 56 * *dptr = (sum * scale + half) >> 24; 57 * sum -= *left++; 58 * dptr += dst_x_stride; 59 * } 60 * } else { 61 * for (int x = 0; x < diameter; ++x) { 62 * sum += *right++; 63 * *dptr = (sum * scale + half) >> 24; 64 * dptr += dst_x_stride; 65 * } 66 * for (int x = diameter; x < width; ++x) { 67 * sum += *right++; 68 * *dptr = (sum * scale + half) >> 24; 69 * sum -= *left++; 70 * dptr += dst_x_stride; 71 * } 72 * for (int x = 0; x < diameter; ++x) { 73 * *dptr = (sum * scale + half) >> 24; 74 * sum -= *left++; 75 * dptr += dst_x_stride; 76 * } 77 * } 78 */ 79 static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst, 80 int leftRadius, int rightRadius, int width, int height, 81 bool transpose) 82 { 83 int diameter = leftRadius + rightRadius; 84 int kernelSize = diameter + 1; 85 int border = SkMin32(width, diameter); 86 uint32_t scale = (1 << 24) / kernelSize; 87 int new_width = width + SkMax32(leftRadius, rightRadius) * 2; 88 int dst_x_stride = transpose ? height : 1; 89 int dst_y_stride = transpose ? 1 : new_width; 90 uint32_t half = 1 << 23; 91 for (int y = 0; y < height; ++y) { 92 uint32_t sum = 0; 93 uint8_t* dptr = dst + y * dst_y_stride; 94 const uint8_t* right = src + y * src_y_stride; 95 const uint8_t* left = right; 96 for (int x = 0; x < rightRadius - leftRadius; x++) { 97 *dptr = 0; 98 dptr += dst_x_stride; 99 } 100 #define LEFT_BORDER_ITER \ 101 sum += *right++; \ 102 *dptr = (sum * scale + half) >> 24; \ 103 dptr += dst_x_stride; 104 105 int x = 0; 106 #ifdef UNROLL_SEPARABLE_LOOPS 107 for (; x < border - 16; x += 16) { 108 LEFT_BORDER_ITER 109 LEFT_BORDER_ITER 110 LEFT_BORDER_ITER 111 LEFT_BORDER_ITER 112 LEFT_BORDER_ITER 113 LEFT_BORDER_ITER 114 LEFT_BORDER_ITER 115 LEFT_BORDER_ITER 116 LEFT_BORDER_ITER 117 LEFT_BORDER_ITER 118 LEFT_BORDER_ITER 119 LEFT_BORDER_ITER 120 LEFT_BORDER_ITER 121 LEFT_BORDER_ITER 122 LEFT_BORDER_ITER 123 LEFT_BORDER_ITER 124 } 125 #endif 126 for (; x < border; ++x) { 127 LEFT_BORDER_ITER 128 } 129 #undef LEFT_BORDER_ITER 130 #define TRIVIAL_ITER \ 131 *dptr = (sum * scale + half) >> 24; \ 132 dptr += dst_x_stride; 133 x = width; 134 #ifdef UNROLL_SEPARABLE_LOOPS 135 for (; x < diameter - 16; x += 16) { 136 TRIVIAL_ITER 137 TRIVIAL_ITER 138 TRIVIAL_ITER 139 TRIVIAL_ITER 140 TRIVIAL_ITER 141 TRIVIAL_ITER 142 TRIVIAL_ITER 143 TRIVIAL_ITER 144 TRIVIAL_ITER 145 TRIVIAL_ITER 146 TRIVIAL_ITER 147 TRIVIAL_ITER 148 TRIVIAL_ITER 149 TRIVIAL_ITER 150 TRIVIAL_ITER 151 TRIVIAL_ITER 152 } 153 #endif 154 for (; x < diameter; ++x) { 155 TRIVIAL_ITER 156 } 157 #undef TRIVIAL_ITER 158 #define CENTER_ITER \ 159 sum += *right++; \ 160 *dptr = (sum * scale + half) >> 24; \ 161 sum -= *left++; \ 162 dptr += dst_x_stride; 163 164 x = diameter; 165 #ifdef UNROLL_SEPARABLE_LOOPS 166 for (; x < width - 16; x += 16) { 167 CENTER_ITER 168 CENTER_ITER 169 CENTER_ITER 170 CENTER_ITER 171 CENTER_ITER 172 CENTER_ITER 173 CENTER_ITER 174 CENTER_ITER 175 CENTER_ITER 176 CENTER_ITER 177 CENTER_ITER 178 CENTER_ITER 179 CENTER_ITER 180 CENTER_ITER 181 CENTER_ITER 182 CENTER_ITER 183 } 184 #endif 185 for (; x < width; ++x) { 186 CENTER_ITER 187 } 188 #undef CENTER_ITER 189 #define RIGHT_BORDER_ITER \ 190 *dptr = (sum * scale + half) >> 24; \ 191 sum -= *left++; \ 192 dptr += dst_x_stride; 193 194 x = 0; 195 #ifdef UNROLL_SEPARABLE_LOOPS 196 for (; x < border - 16; x += 16) { 197 RIGHT_BORDER_ITER 198 RIGHT_BORDER_ITER 199 RIGHT_BORDER_ITER 200 RIGHT_BORDER_ITER 201 RIGHT_BORDER_ITER 202 RIGHT_BORDER_ITER 203 RIGHT_BORDER_ITER 204 RIGHT_BORDER_ITER 205 RIGHT_BORDER_ITER 206 RIGHT_BORDER_ITER 207 RIGHT_BORDER_ITER 208 RIGHT_BORDER_ITER 209 RIGHT_BORDER_ITER 210 RIGHT_BORDER_ITER 211 RIGHT_BORDER_ITER 212 RIGHT_BORDER_ITER 213 } 214 #endif 215 for (; x < border; ++x) { 216 RIGHT_BORDER_ITER 217 } 218 #undef RIGHT_BORDER_ITER 219 for (int x = 0; x < leftRadius - rightRadius; ++x) { 220 *dptr = 0; 221 dptr += dst_x_stride; 222 } 223 SkASSERT(sum == 0); 224 } 225 return new_width; 226 } 227 228 /** 229 * This variant of the box blur handles blurring of non-integer radii. It 230 * keeps two running sums: an outer sum for the rounded-up kernel radius, and 231 * an inner sum for the rounded-down kernel radius. For each pixel, it linearly 232 * interpolates between them. In float this would be: 233 * outer_weight * outer_sum / kernelSize + 234 * (1.0 - outer_weight) * innerSum / (kernelSize - 2) 235 * 236 * This is what the inner loop looks like before unrolling, and with the two 237 * cases broken out separately (width < diameter, width >= diameter): 238 * 239 * if (width < diameter) { 240 * for (int x = 0; x < width; x++) { 241 * inner_sum = outer_sum; 242 * outer_sum += *right++; 243 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; 244 * dptr += dst_x_stride; 245 * } 246 * for (int x = width; x < diameter; ++x) { 247 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; 248 * dptr += dst_x_stride; 249 * } 250 * for (int x = 0; x < width; x++) { 251 * inner_sum = outer_sum - *left++; 252 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; 253 * dptr += dst_x_stride; 254 * outer_sum = inner_sum; 255 * } 256 * } else { 257 * for (int x = 0; x < diameter; x++) { 258 * inner_sum = outer_sum; 259 * outer_sum += *right++; 260 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; 261 * dptr += dst_x_stride; 262 * } 263 * for (int x = diameter; x < width; ++x) { 264 * inner_sum = outer_sum - *left; 265 * outer_sum += *right++; 266 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; 267 * dptr += dst_x_stride; 268 * outer_sum -= *left++; 269 * } 270 * for (int x = 0; x < diameter; x++) { 271 * inner_sum = outer_sum - *left++; 272 * *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; 273 * dptr += dst_x_stride; 274 * outer_sum = inner_sum; 275 * } 276 * } 277 * } 278 * return new_width; 279 */ 280 281 static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst, 282 int radius, int width, int height, 283 bool transpose, uint8_t outer_weight) 284 { 285 int diameter = radius * 2; 286 int kernelSize = diameter + 1; 287 int border = SkMin32(width, diameter); 288 int inner_weight = 255 - outer_weight; 289 outer_weight += outer_weight >> 7; 290 inner_weight += inner_weight >> 7; 291 uint32_t outer_scale = (outer_weight << 16) / kernelSize; 292 uint32_t inner_scale = (inner_weight << 16) / (kernelSize - 2); 293 uint32_t half = 1 << 23; 294 int new_width = width + diameter; 295 int dst_x_stride = transpose ? height : 1; 296 int dst_y_stride = transpose ? 1 : new_width; 297 for (int y = 0; y < height; ++y) { 298 uint32_t outer_sum = 0, inner_sum = 0; 299 uint8_t* dptr = dst + y * dst_y_stride; 300 const uint8_t* right = src + y * src_y_stride; 301 const uint8_t* left = right; 302 int x = 0; 303 304 #define LEFT_BORDER_ITER \ 305 inner_sum = outer_sum; \ 306 outer_sum += *right++; \ 307 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \ 308 dptr += dst_x_stride; 309 310 #ifdef UNROLL_SEPARABLE_LOOPS 311 for (;x < border - 16; x += 16) { 312 LEFT_BORDER_ITER 313 LEFT_BORDER_ITER 314 LEFT_BORDER_ITER 315 LEFT_BORDER_ITER 316 LEFT_BORDER_ITER 317 LEFT_BORDER_ITER 318 LEFT_BORDER_ITER 319 LEFT_BORDER_ITER 320 LEFT_BORDER_ITER 321 LEFT_BORDER_ITER 322 LEFT_BORDER_ITER 323 LEFT_BORDER_ITER 324 LEFT_BORDER_ITER 325 LEFT_BORDER_ITER 326 LEFT_BORDER_ITER 327 LEFT_BORDER_ITER 328 } 329 #endif 330 331 for (;x < border; ++x) { 332 LEFT_BORDER_ITER 333 } 334 #undef LEFT_BORDER_ITER 335 for (int x = width; x < diameter; ++x) { 336 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; 337 dptr += dst_x_stride; 338 } 339 x = diameter; 340 341 #define CENTER_ITER \ 342 inner_sum = outer_sum - *left; \ 343 outer_sum += *right++; \ 344 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \ 345 dptr += dst_x_stride; \ 346 outer_sum -= *left++; 347 348 #ifdef UNROLL_SEPARABLE_LOOPS 349 for (; x < width - 16; x += 16) { 350 CENTER_ITER 351 CENTER_ITER 352 CENTER_ITER 353 CENTER_ITER 354 CENTER_ITER 355 CENTER_ITER 356 CENTER_ITER 357 CENTER_ITER 358 CENTER_ITER 359 CENTER_ITER 360 CENTER_ITER 361 CENTER_ITER 362 CENTER_ITER 363 CENTER_ITER 364 CENTER_ITER 365 CENTER_ITER 366 } 367 #endif 368 for (; x < width; ++x) { 369 CENTER_ITER 370 } 371 #undef CENTER_ITER 372 373 #define RIGHT_BORDER_ITER \ 374 inner_sum = outer_sum - *left++; \ 375 *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \ 376 dptr += dst_x_stride; \ 377 outer_sum = inner_sum; 378 379 x = 0; 380 #ifdef UNROLL_SEPARABLE_LOOPS 381 for (; x < border - 16; x += 16) { 382 RIGHT_BORDER_ITER 383 RIGHT_BORDER_ITER 384 RIGHT_BORDER_ITER 385 RIGHT_BORDER_ITER 386 RIGHT_BORDER_ITER 387 RIGHT_BORDER_ITER 388 RIGHT_BORDER_ITER 389 RIGHT_BORDER_ITER 390 RIGHT_BORDER_ITER 391 RIGHT_BORDER_ITER 392 RIGHT_BORDER_ITER 393 RIGHT_BORDER_ITER 394 RIGHT_BORDER_ITER 395 RIGHT_BORDER_ITER 396 RIGHT_BORDER_ITER 397 RIGHT_BORDER_ITER 398 } 399 #endif 400 for (; x < border; ++x) { 401 RIGHT_BORDER_ITER 402 } 403 #undef RIGHT_BORDER_ITER 404 SkASSERT(outer_sum == 0 && inner_sum == 0); 405 } 406 return new_width; 407 } 408 409 static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius) 410 { 411 *loRadius = *hiRadius = SkScalarCeilToInt(passRadius); 412 if (SkIntToScalar(*hiRadius) - passRadius > 0.5f) { 413 *loRadius = *hiRadius - 1; 414 } 415 } 416 417 #include "SkColorPriv.h" 418 419 static void merge_src_with_blur(uint8_t dst[], int dstRB, 420 const uint8_t src[], int srcRB, 421 const uint8_t blur[], int blurRB, 422 int sw, int sh) { 423 dstRB -= sw; 424 srcRB -= sw; 425 blurRB -= sw; 426 while (--sh >= 0) { 427 for (int x = sw - 1; x >= 0; --x) { 428 *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src))); 429 dst += 1; 430 src += 1; 431 blur += 1; 432 } 433 dst += dstRB; 434 src += srcRB; 435 blur += blurRB; 436 } 437 } 438 439 static void clamp_with_orig(uint8_t dst[], int dstRowBytes, 440 const uint8_t src[], int srcRowBytes, 441 int sw, int sh, 442 SkBlurStyle style) { 443 int x; 444 while (--sh >= 0) { 445 switch (style) { 446 case kSolid_SkBlurStyle: 447 for (x = sw - 1; x >= 0; --x) { 448 int s = *src; 449 int d = *dst; 450 *dst = SkToU8(s + d - SkMulDiv255Round(s, d)); 451 dst += 1; 452 src += 1; 453 } 454 break; 455 case kOuter_SkBlurStyle: 456 for (x = sw - 1; x >= 0; --x) { 457 if (*src) { 458 *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src))); 459 } 460 dst += 1; 461 src += 1; 462 } 463 break; 464 default: 465 SkDEBUGFAIL("Unexpected blur style here"); 466 break; 467 } 468 dst += dstRowBytes - sw; 469 src += srcRowBytes - sw; 470 } 471 } 472 473 /////////////////////////////////////////////////////////////////////////////// 474 475 // we use a local function to wrap the class static method to work around 476 // a bug in gcc98 477 void SkMask_FreeImage(uint8_t* image); 478 void SkMask_FreeImage(uint8_t* image) { 479 SkMask::FreeImage(image); 480 } 481 482 bool SkBlurMask::BoxBlur(SkMask* dst, const SkMask& src, 483 SkScalar sigma, SkBlurStyle style, SkBlurQuality quality, 484 SkIPoint* margin, bool force_quality) { 485 486 if (src.fFormat != SkMask::kA8_Format) { 487 return false; 488 } 489 490 // Force high quality off for small radii (performance) 491 if (!force_quality && sigma <= SkIntToScalar(2)) { 492 quality = kLow_SkBlurQuality; 493 } 494 495 SkScalar passRadius; 496 if (kHigh_SkBlurQuality == quality) { 497 // For the high quality path the 3 pass box blur kernel width is 498 // 6*rad+1 while the full Gaussian width is 6*sigma. 499 passRadius = sigma - (1/6.0f); 500 } else { 501 // For the low quality path we only attempt to cover 3*sigma of the 502 // Gaussian blur area (1.5*sigma on each side). The single pass box 503 // blur's kernel size is 2*rad+1. 504 passRadius = 1.5f*sigma - 0.5f; 505 } 506 507 // highQuality: use three box blur passes as a cheap way 508 // to approximate a Gaussian blur 509 int passCount = (kHigh_SkBlurQuality == quality) ? 3 : 1; 510 511 int rx = SkScalarCeilToInt(passRadius); 512 int outerWeight = 255 - SkScalarRoundToInt((SkIntToScalar(rx) - passRadius) * 255); 513 514 SkASSERT(rx >= 0); 515 SkASSERT((unsigned)outerWeight <= 255); 516 if (rx <= 0) { 517 return false; 518 } 519 520 int ry = rx; // only do square blur for now 521 522 int padx = passCount * rx; 523 int pady = passCount * ry; 524 525 if (margin) { 526 margin->set(padx, pady); 527 } 528 dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady, 529 src.fBounds.fRight + padx, src.fBounds.fBottom + pady); 530 531 dst->fRowBytes = dst->fBounds.width(); 532 dst->fFormat = SkMask::kA8_Format; 533 dst->fImage = nullptr; 534 535 if (src.fImage) { 536 size_t dstSize = dst->computeImageSize(); 537 if (0 == dstSize) { 538 return false; // too big to allocate, abort 539 } 540 541 int sw = src.fBounds.width(); 542 int sh = src.fBounds.height(); 543 const uint8_t* sp = src.fImage; 544 uint8_t* dp = SkMask::AllocImage(dstSize); 545 SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp); 546 547 // build the blurry destination 548 SkAutoTMalloc<uint8_t> tmpBuffer(dstSize); 549 uint8_t* tp = tmpBuffer.get(); 550 int w = sw, h = sh; 551 552 if (outerWeight == 255) { 553 int loRadius, hiRadius; 554 get_adjusted_radii(passRadius, &loRadius, &hiRadius); 555 if (kHigh_SkBlurQuality == quality) { 556 // Do three X blurs, with a transpose on the final one. 557 w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false); 558 w = boxBlur(tp, w, dp, hiRadius, loRadius, w, h, false); 559 w = boxBlur(dp, w, tp, hiRadius, hiRadius, w, h, true); 560 // Do three Y blurs, with a transpose on the final one. 561 h = boxBlur(tp, h, dp, loRadius, hiRadius, h, w, false); 562 h = boxBlur(dp, h, tp, hiRadius, loRadius, h, w, false); 563 h = boxBlur(tp, h, dp, hiRadius, hiRadius, h, w, true); 564 } else { 565 w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true); 566 h = boxBlur(tp, h, dp, ry, ry, h, w, true); 567 } 568 } else { 569 if (kHigh_SkBlurQuality == quality) { 570 // Do three X blurs, with a transpose on the final one. 571 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, outerWeight); 572 w = boxBlurInterp(tp, w, dp, rx, w, h, false, outerWeight); 573 w = boxBlurInterp(dp, w, tp, rx, w, h, true, outerWeight); 574 // Do three Y blurs, with a transpose on the final one. 575 h = boxBlurInterp(tp, h, dp, ry, h, w, false, outerWeight); 576 h = boxBlurInterp(dp, h, tp, ry, h, w, false, outerWeight); 577 h = boxBlurInterp(tp, h, dp, ry, h, w, true, outerWeight); 578 } else { 579 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, true, outerWeight); 580 h = boxBlurInterp(tp, h, dp, ry, h, w, true, outerWeight); 581 } 582 } 583 584 dst->fImage = dp; 585 // if need be, alloc the "real" dst (same size as src) and copy/merge 586 // the blur into it (applying the src) 587 if (style == kInner_SkBlurStyle) { 588 // now we allocate the "real" dst, mirror the size of src 589 size_t srcSize = src.computeImageSize(); 590 if (0 == srcSize) { 591 return false; // too big to allocate, abort 592 } 593 dst->fImage = SkMask::AllocImage(srcSize); 594 merge_src_with_blur(dst->fImage, src.fRowBytes, 595 sp, src.fRowBytes, 596 dp + passCount * (rx + ry * dst->fRowBytes), 597 dst->fRowBytes, sw, sh); 598 SkMask::FreeImage(dp); 599 } else if (style != kNormal_SkBlurStyle) { 600 clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes), 601 dst->fRowBytes, sp, src.fRowBytes, sw, sh, style); 602 } 603 (void)autoCall.detach(); 604 } 605 606 if (style == kInner_SkBlurStyle) { 607 dst->fBounds = src.fBounds; // restore trimmed bounds 608 dst->fRowBytes = src.fRowBytes; 609 } 610 611 return true; 612 } 613 614 /* Convolving a box with itself three times results in a piecewise 615 quadratic function: 616 617 0 x <= -1.5 618 9/8 + 3/2 x + 1/2 x^2 -1.5 < x <= -.5 619 3/4 - x^2 -.5 < x <= .5 620 9/8 - 3/2 x + 1/2 x^2 0.5 < x <= 1.5 621 0 1.5 < x 622 623 Mathematica: 624 625 g[x_] := Piecewise [ { 626 {9/8 + 3/2 x + 1/2 x^2 , -1.5 < x <= -.5}, 627 {3/4 - x^2 , -.5 < x <= .5}, 628 {9/8 - 3/2 x + 1/2 x^2 , 0.5 < x <= 1.5} 629 }, 0] 630 631 To get the profile curve of the blurred step function at the rectangle 632 edge, we evaluate the indefinite integral, which is piecewise cubic: 633 634 0 x <= -1.5 635 9/16 + 9/8 x + 3/4 x^2 + 1/6 x^3 -1.5 < x <= -0.5 636 1/2 + 3/4 x - 1/3 x^3 -.5 < x <= .5 637 7/16 + 9/8 x - 3/4 x^2 + 1/6 x^3 .5 < x <= 1.5 638 1 1.5 < x 639 640 in Mathematica code: 641 642 gi[x_] := Piecewise[ { 643 { 0 , x <= -1.5 }, 644 { 9/16 + 9/8 x + 3/4 x^2 + 1/6 x^3, -1.5 < x <= -0.5 }, 645 { 1/2 + 3/4 x - 1/3 x^3 , -.5 < x <= .5}, 646 { 7/16 + 9/8 x - 3/4 x^2 + 1/6 x^3, .5 < x <= 1.5} 647 },1] 648 */ 649 650 static float gaussianIntegral(float x) { 651 if (x > 1.5f) { 652 return 0.0f; 653 } 654 if (x < -1.5f) { 655 return 1.0f; 656 } 657 658 float x2 = x*x; 659 float x3 = x2*x; 660 661 if ( x > 0.5f ) { 662 return 0.5625f - (x3 / 6.0f - 3.0f * x2 * 0.25f + 1.125f * x); 663 } 664 if ( x > -0.5f ) { 665 return 0.5f - (0.75f * x - x3 / 3.0f); 666 } 667 return 0.4375f + (-x3 / 6.0f - 3.0f * x2 * 0.25f - 1.125f * x); 668 } 669 670 /* ComputeBlurProfile allocates and fills in an array of floating 671 point values between 0 and 255 for the profile signature of 672 a blurred half-plane with the given blur radius. Since we're 673 going to be doing screened multiplications (i.e., 1 - (1-x)(1-y)) 674 all the time, we actually fill in the profile pre-inverted 675 (already done 255-x). 676 677 It's the responsibility of the caller to delete the 678 memory returned in profile_out. 679 */ 680 681 uint8_t* SkBlurMask::ComputeBlurProfile(SkScalar sigma) { 682 int size = SkScalarCeilToInt(6*sigma); 683 684 int center = size >> 1; 685 uint8_t* profile = new uint8_t[size]; 686 687 float invr = 1.f/(2*sigma); 688 689 profile[0] = 255; 690 for (int x = 1 ; x < size ; ++x) { 691 float scaled_x = (center - x - .5f) * invr; 692 float gi = gaussianIntegral(scaled_x); 693 profile[x] = 255 - (uint8_t) (255.f * gi); 694 } 695 696 return profile; 697 } 698 699 // TODO MAYBE: Maintain a profile cache to avoid recomputing this for 700 // commonly used radii. Consider baking some of the most common blur radii 701 // directly in as static data? 702 703 // Implementation adapted from Michael Herf's approach: 704 // http://stereopsis.com/shadowrect/ 705 706 uint8_t SkBlurMask::ProfileLookup(const uint8_t *profile, int loc, int blurred_width, int sharp_width) { 707 int dx = SkAbs32(((loc << 1) + 1) - blurred_width) - sharp_width; // how far are we from the original edge? 708 int ox = dx >> 1; 709 if (ox < 0) { 710 ox = 0; 711 } 712 713 return profile[ox]; 714 } 715 716 void SkBlurMask::ComputeBlurredScanline(uint8_t *pixels, const uint8_t *profile, 717 unsigned int width, SkScalar sigma) { 718 719 unsigned int profile_size = SkScalarCeilToInt(6*sigma); 720 SkAutoTMalloc<uint8_t> horizontalScanline(width); 721 722 unsigned int sw = width - profile_size; 723 // nearest odd number less than the profile size represents the center 724 // of the (2x scaled) profile 725 int center = ( profile_size & ~1 ) - 1; 726 727 int w = sw - center; 728 729 for (unsigned int x = 0 ; x < width ; ++x) { 730 if (profile_size <= sw) { 731 pixels[x] = ProfileLookup(profile, x, width, w); 732 } else { 733 float span = float(sw)/(2*sigma); 734 float giX = 1.5f - (x+.5f)/(2*sigma); 735 pixels[x] = (uint8_t) (255 * (gaussianIntegral(giX) - gaussianIntegral(giX + span))); 736 } 737 } 738 } 739 740 bool SkBlurMask::BlurRect(SkScalar sigma, SkMask *dst, 741 const SkRect &src, SkBlurStyle style, 742 SkIPoint *margin, SkMask::CreateMode createMode) { 743 int profile_size = SkScalarCeilToInt(6*sigma); 744 745 int pad = profile_size/2; 746 if (margin) { 747 margin->set( pad, pad ); 748 } 749 750 dst->fBounds.set(SkScalarRoundToInt(src.fLeft - pad), 751 SkScalarRoundToInt(src.fTop - pad), 752 SkScalarRoundToInt(src.fRight + pad), 753 SkScalarRoundToInt(src.fBottom + pad)); 754 755 dst->fRowBytes = dst->fBounds.width(); 756 dst->fFormat = SkMask::kA8_Format; 757 dst->fImage = nullptr; 758 759 int sw = SkScalarFloorToInt(src.width()); 760 int sh = SkScalarFloorToInt(src.height()); 761 762 if (createMode == SkMask::kJustComputeBounds_CreateMode) { 763 if (style == kInner_SkBlurStyle) { 764 dst->fBounds.set(SkScalarRoundToInt(src.fLeft), 765 SkScalarRoundToInt(src.fTop), 766 SkScalarRoundToInt(src.fRight), 767 SkScalarRoundToInt(src.fBottom)); // restore trimmed bounds 768 dst->fRowBytes = sw; 769 } 770 return true; 771 } 772 773 SkAutoTDeleteArray<uint8_t> profile(ComputeBlurProfile(sigma)); 774 775 size_t dstSize = dst->computeImageSize(); 776 if (0 == dstSize) { 777 return false; // too big to allocate, abort 778 } 779 780 uint8_t* dp = SkMask::AllocImage(dstSize); 781 782 dst->fImage = dp; 783 784 int dstHeight = dst->fBounds.height(); 785 int dstWidth = dst->fBounds.width(); 786 787 uint8_t *outptr = dp; 788 789 SkAutoTMalloc<uint8_t> horizontalScanline(dstWidth); 790 SkAutoTMalloc<uint8_t> verticalScanline(dstHeight); 791 792 ComputeBlurredScanline(horizontalScanline, profile.get(), dstWidth, sigma); 793 ComputeBlurredScanline(verticalScanline, profile.get(), dstHeight, sigma); 794 795 for (int y = 0 ; y < dstHeight ; ++y) { 796 for (int x = 0 ; x < dstWidth ; x++) { 797 unsigned int maskval = SkMulDiv255Round(horizontalScanline[x], verticalScanline[y]); 798 *(outptr++) = maskval; 799 } 800 } 801 802 if (style == kInner_SkBlurStyle) { 803 // now we allocate the "real" dst, mirror the size of src 804 size_t srcSize = (size_t)(src.width() * src.height()); 805 if (0 == srcSize) { 806 return false; // too big to allocate, abort 807 } 808 dst->fImage = SkMask::AllocImage(srcSize); 809 for (int y = 0 ; y < sh ; y++) { 810 uint8_t *blur_scanline = dp + (y+pad)*dstWidth + pad; 811 uint8_t *inner_scanline = dst->fImage + y*sw; 812 memcpy(inner_scanline, blur_scanline, sw); 813 } 814 SkMask::FreeImage(dp); 815 816 dst->fBounds.set(SkScalarRoundToInt(src.fLeft), 817 SkScalarRoundToInt(src.fTop), 818 SkScalarRoundToInt(src.fRight), 819 SkScalarRoundToInt(src.fBottom)); // restore trimmed bounds 820 dst->fRowBytes = sw; 821 822 } else if (style == kOuter_SkBlurStyle) { 823 for (int y = pad ; y < dstHeight-pad ; y++) { 824 uint8_t *dst_scanline = dp + y*dstWidth + pad; 825 memset(dst_scanline, 0, sw); 826 } 827 } else if (style == kSolid_SkBlurStyle) { 828 for (int y = pad ; y < dstHeight-pad ; y++) { 829 uint8_t *dst_scanline = dp + y*dstWidth + pad; 830 memset(dst_scanline, 0xff, sw); 831 } 832 } 833 // normal and solid styles are the same for analytic rect blurs, so don't 834 // need to handle solid specially. 835 836 return true; 837 } 838 839 bool SkBlurMask::BlurRRect(SkScalar sigma, SkMask *dst, 840 const SkRRect &src, SkBlurStyle style, 841 SkIPoint *margin, SkMask::CreateMode createMode) { 842 // Temporary for now -- always fail, should cause caller to fall back 843 // to old path. Plumbing just to land API and parallelize effort. 844 845 return false; 846 } 847 848 // The "simple" blur is a direct implementation of separable convolution with a discrete 849 // gaussian kernel. It's "ground truth" in a sense; too slow to be used, but very 850 // useful for correctness comparisons. 851 852 bool SkBlurMask::BlurGroundTruth(SkScalar sigma, SkMask* dst, const SkMask& src, 853 SkBlurStyle style, SkIPoint* margin) { 854 855 if (src.fFormat != SkMask::kA8_Format) { 856 return false; 857 } 858 859 float variance = sigma * sigma; 860 861 int windowSize = SkScalarCeilToInt(sigma*6); 862 // round window size up to nearest odd number 863 windowSize |= 1; 864 865 SkAutoTMalloc<float> gaussWindow(windowSize); 866 867 int halfWindow = windowSize >> 1; 868 869 gaussWindow[halfWindow] = 1; 870 871 float windowSum = 1; 872 for (int x = 1 ; x <= halfWindow ; ++x) { 873 float gaussian = expf(-x*x / (2*variance)); 874 gaussWindow[halfWindow + x] = gaussWindow[halfWindow-x] = gaussian; 875 windowSum += 2*gaussian; 876 } 877 878 // leave the filter un-normalized for now; we will divide by the normalization 879 // sum later; 880 881 int pad = halfWindow; 882 if (margin) { 883 margin->set( pad, pad ); 884 } 885 886 dst->fBounds = src.fBounds; 887 dst->fBounds.outset(pad, pad); 888 889 dst->fRowBytes = dst->fBounds.width(); 890 dst->fFormat = SkMask::kA8_Format; 891 dst->fImage = nullptr; 892 893 if (src.fImage) { 894 895 size_t dstSize = dst->computeImageSize(); 896 if (0 == dstSize) { 897 return false; // too big to allocate, abort 898 } 899 900 int srcWidth = src.fBounds.width(); 901 int srcHeight = src.fBounds.height(); 902 int dstWidth = dst->fBounds.width(); 903 904 const uint8_t* srcPixels = src.fImage; 905 uint8_t* dstPixels = SkMask::AllocImage(dstSize); 906 SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dstPixels); 907 908 // do the actual blur. First, make a padded copy of the source. 909 // use double pad so we never have to check if we're outside anything 910 911 int padWidth = srcWidth + 4*pad; 912 int padHeight = srcHeight; 913 int padSize = padWidth * padHeight; 914 915 SkAutoTMalloc<uint8_t> padPixels(padSize); 916 memset(padPixels, 0, padSize); 917 918 for (int y = 0 ; y < srcHeight; ++y) { 919 uint8_t* padptr = padPixels + y * padWidth + 2*pad; 920 const uint8_t* srcptr = srcPixels + y * srcWidth; 921 memcpy(padptr, srcptr, srcWidth); 922 } 923 924 // blur in X, transposing the result into a temporary floating point buffer. 925 // also double-pad the intermediate result so that the second blur doesn't 926 // have to do extra conditionals. 927 928 int tmpWidth = padHeight + 4*pad; 929 int tmpHeight = padWidth - 2*pad; 930 int tmpSize = tmpWidth * tmpHeight; 931 932 SkAutoTMalloc<float> tmpImage(tmpSize); 933 memset(tmpImage, 0, tmpSize*sizeof(tmpImage[0])); 934 935 for (int y = 0 ; y < padHeight ; ++y) { 936 uint8_t *srcScanline = padPixels + y*padWidth; 937 for (int x = pad ; x < padWidth - pad ; ++x) { 938 float *outPixel = tmpImage + (x-pad)*tmpWidth + y + 2*pad; // transposed output 939 uint8_t *windowCenter = srcScanline + x; 940 for (int i = -pad ; i <= pad ; ++i) { 941 *outPixel += gaussWindow[pad+i]*windowCenter[i]; 942 } 943 *outPixel /= windowSum; 944 } 945 } 946 947 // blur in Y; now filling in the actual desired destination. We have to do 948 // the transpose again; these transposes guarantee that we read memory in 949 // linear order. 950 951 for (int y = 0 ; y < tmpHeight ; ++y) { 952 float *srcScanline = tmpImage + y*tmpWidth; 953 for (int x = pad ; x < tmpWidth - pad ; ++x) { 954 float *windowCenter = srcScanline + x; 955 float finalValue = 0; 956 for (int i = -pad ; i <= pad ; ++i) { 957 finalValue += gaussWindow[pad+i]*windowCenter[i]; 958 } 959 finalValue /= windowSum; 960 uint8_t *outPixel = dstPixels + (x-pad)*dstWidth + y; // transposed output 961 int integerPixel = int(finalValue + 0.5f); 962 *outPixel = SkClampMax( SkClampPos(integerPixel), 255 ); 963 } 964 } 965 966 dst->fImage = dstPixels; 967 // if need be, alloc the "real" dst (same size as src) and copy/merge 968 // the blur into it (applying the src) 969 if (style == kInner_SkBlurStyle) { 970 // now we allocate the "real" dst, mirror the size of src 971 size_t srcSize = src.computeImageSize(); 972 if (0 == srcSize) { 973 return false; // too big to allocate, abort 974 } 975 dst->fImage = SkMask::AllocImage(srcSize); 976 merge_src_with_blur(dst->fImage, src.fRowBytes, 977 srcPixels, src.fRowBytes, 978 dstPixels + pad*dst->fRowBytes + pad, 979 dst->fRowBytes, srcWidth, srcHeight); 980 SkMask::FreeImage(dstPixels); 981 } else if (style != kNormal_SkBlurStyle) { 982 clamp_with_orig(dstPixels + pad*dst->fRowBytes + pad, 983 dst->fRowBytes, srcPixels, src.fRowBytes, srcWidth, srcHeight, style); 984 } 985 (void)autoCall.detach(); 986 } 987 988 if (style == kInner_SkBlurStyle) { 989 dst->fBounds = src.fBounds; // restore trimmed bounds 990 dst->fRowBytes = src.fRowBytes; 991 } 992 993 return true; 994 } 995