Home | History | Annotate | Download | only in effects
      1 
      2 /*
      3  * Copyright 2006 The Android Open Source Project
      4  *
      5  * Use of this source code is governed by a BSD-style license that can be
      6  * found in the LICENSE file.
      7  */
      8 
      9 
     10 #include "SkBlurMask.h"
     11 #include "SkMath.h"
     12 #include "SkTemplates.h"
     13 #include "SkEndian.h"
     14 
     15 
     16 SkScalar SkBlurMask::ConvertRadiusToSigma(SkScalar radius) {
     17     // This constant approximates the scaling done in the software path's
     18     // "high quality" mode, in SkBlurMask::Blur() (1 / sqrt(3)).
     19     // IMHO, it actually should be 1:  we blur "less" than we should do
     20     // according to the CSS and canvas specs, simply because Safari does the same.
     21     // Firefox used to do the same too, until 4.0 where they fixed it.  So at some
     22     // point we should probably get rid of these scaling constants and rebaseline
     23     // all the blur tests.
     24     static const SkScalar kBLUR_SIGMA_SCALE = 0.57735f;
     25 
     26     return radius ? kBLUR_SIGMA_SCALE * radius + 0.5f : 0.0f;
     27 }
     28 
     29 #define UNROLL_SEPARABLE_LOOPS
     30 
     31 /**
     32  * This function performs a box blur in X, of the given radius.  If the
     33  * "transpose" parameter is true, it will transpose the pixels on write,
     34  * such that X and Y are swapped. Reads are always performed from contiguous
     35  * memory in X, for speed. The destination buffer (dst) must be at least
     36  * (width + leftRadius + rightRadius) * height bytes in size.
     37  *
     38  * This is what the inner loop looks like before unrolling, and with the two
     39  * cases broken out separately (width < diameter, width >= diameter):
     40  *
     41  *      if (width < diameter) {
     42  *          for (int x = 0; x < width; ++x) {
     43  *              sum += *right++;
     44  *              *dptr = (sum * scale + half) >> 24;
     45  *              dptr += dst_x_stride;
     46  *          }
     47  *          for (int x = width; x < diameter; ++x) {
     48  *              *dptr = (sum * scale + half) >> 24;
     49  *              dptr += dst_x_stride;
     50  *          }
     51  *          for (int x = 0; x < width; ++x) {
     52  *              *dptr = (sum * scale + half) >> 24;
     53  *              sum -= *left++;
     54  *              dptr += dst_x_stride;
     55  *          }
     56  *      } else {
     57  *          for (int x = 0; x < diameter; ++x) {
     58  *              sum += *right++;
     59  *              *dptr = (sum * scale + half) >> 24;
     60  *              dptr += dst_x_stride;
     61  *          }
     62  *          for (int x = diameter; x < width; ++x) {
     63  *              sum += *right++;
     64  *              *dptr = (sum * scale + half) >> 24;
     65  *              sum -= *left++;
     66  *              dptr += dst_x_stride;
     67  *          }
     68  *          for (int x = 0; x < diameter; ++x) {
     69  *              *dptr = (sum * scale + half) >> 24;
     70  *              sum -= *left++;
     71  *              dptr += dst_x_stride;
     72  *          }
     73  *      }
     74  */
     75 static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
     76                    int leftRadius, int rightRadius, int width, int height,
     77                    bool transpose)
     78 {
     79     int diameter = leftRadius + rightRadius;
     80     int kernelSize = diameter + 1;
     81     int border = SkMin32(width, diameter);
     82     uint32_t scale = (1 << 24) / kernelSize;
     83     int new_width = width + SkMax32(leftRadius, rightRadius) * 2;
     84     int dst_x_stride = transpose ? height : 1;
     85     int dst_y_stride = transpose ? 1 : new_width;
     86     uint32_t half = 1 << 23;
     87     for (int y = 0; y < height; ++y) {
     88         uint32_t sum = 0;
     89         uint8_t* dptr = dst + y * dst_y_stride;
     90         const uint8_t* right = src + y * src_y_stride;
     91         const uint8_t* left = right;
     92         for (int x = 0; x < rightRadius - leftRadius; x++) {
     93             *dptr = 0;
     94             dptr += dst_x_stride;
     95         }
     96 #define LEFT_BORDER_ITER \
     97             sum += *right++; \
     98             *dptr = (sum * scale + half) >> 24; \
     99             dptr += dst_x_stride;
    100 
    101         int x = 0;
    102 #ifdef UNROLL_SEPARABLE_LOOPS
    103         for (; x < border - 16; x += 16) {
    104             LEFT_BORDER_ITER
    105             LEFT_BORDER_ITER
    106             LEFT_BORDER_ITER
    107             LEFT_BORDER_ITER
    108             LEFT_BORDER_ITER
    109             LEFT_BORDER_ITER
    110             LEFT_BORDER_ITER
    111             LEFT_BORDER_ITER
    112             LEFT_BORDER_ITER
    113             LEFT_BORDER_ITER
    114             LEFT_BORDER_ITER
    115             LEFT_BORDER_ITER
    116             LEFT_BORDER_ITER
    117             LEFT_BORDER_ITER
    118             LEFT_BORDER_ITER
    119             LEFT_BORDER_ITER
    120         }
    121 #endif
    122         for (; x < border; ++x) {
    123             LEFT_BORDER_ITER
    124         }
    125 #undef LEFT_BORDER_ITER
    126 #define TRIVIAL_ITER \
    127             *dptr = (sum * scale + half) >> 24; \
    128             dptr += dst_x_stride;
    129         x = width;
    130 #ifdef UNROLL_SEPARABLE_LOOPS
    131         for (; x < diameter - 16; x += 16) {
    132             TRIVIAL_ITER
    133             TRIVIAL_ITER
    134             TRIVIAL_ITER
    135             TRIVIAL_ITER
    136             TRIVIAL_ITER
    137             TRIVIAL_ITER
    138             TRIVIAL_ITER
    139             TRIVIAL_ITER
    140             TRIVIAL_ITER
    141             TRIVIAL_ITER
    142             TRIVIAL_ITER
    143             TRIVIAL_ITER
    144             TRIVIAL_ITER
    145             TRIVIAL_ITER
    146             TRIVIAL_ITER
    147             TRIVIAL_ITER
    148         }
    149 #endif
    150         for (; x < diameter; ++x) {
    151             TRIVIAL_ITER
    152         }
    153 #undef TRIVIAL_ITER
    154 #define CENTER_ITER \
    155             sum += *right++; \
    156             *dptr = (sum * scale + half) >> 24; \
    157             sum -= *left++; \
    158             dptr += dst_x_stride;
    159 
    160         x = diameter;
    161 #ifdef UNROLL_SEPARABLE_LOOPS
    162         for (; x < width - 16; x += 16) {
    163             CENTER_ITER
    164             CENTER_ITER
    165             CENTER_ITER
    166             CENTER_ITER
    167             CENTER_ITER
    168             CENTER_ITER
    169             CENTER_ITER
    170             CENTER_ITER
    171             CENTER_ITER
    172             CENTER_ITER
    173             CENTER_ITER
    174             CENTER_ITER
    175             CENTER_ITER
    176             CENTER_ITER
    177             CENTER_ITER
    178             CENTER_ITER
    179         }
    180 #endif
    181         for (; x < width; ++x) {
    182             CENTER_ITER
    183         }
    184 #undef CENTER_ITER
    185 #define RIGHT_BORDER_ITER \
    186             *dptr = (sum * scale + half) >> 24; \
    187             sum -= *left++; \
    188             dptr += dst_x_stride;
    189 
    190         x = 0;
    191 #ifdef UNROLL_SEPARABLE_LOOPS
    192         for (; x < border - 16; x += 16) {
    193             RIGHT_BORDER_ITER
    194             RIGHT_BORDER_ITER
    195             RIGHT_BORDER_ITER
    196             RIGHT_BORDER_ITER
    197             RIGHT_BORDER_ITER
    198             RIGHT_BORDER_ITER
    199             RIGHT_BORDER_ITER
    200             RIGHT_BORDER_ITER
    201             RIGHT_BORDER_ITER
    202             RIGHT_BORDER_ITER
    203             RIGHT_BORDER_ITER
    204             RIGHT_BORDER_ITER
    205             RIGHT_BORDER_ITER
    206             RIGHT_BORDER_ITER
    207             RIGHT_BORDER_ITER
    208             RIGHT_BORDER_ITER
    209         }
    210 #endif
    211         for (; x < border; ++x) {
    212             RIGHT_BORDER_ITER
    213         }
    214 #undef RIGHT_BORDER_ITER
    215         for (int x = 0; x < leftRadius - rightRadius; ++x) {
    216             *dptr = 0;
    217             dptr += dst_x_stride;
    218         }
    219         SkASSERT(sum == 0);
    220     }
    221     return new_width;
    222 }
    223 
    224 /**
    225  * This variant of the box blur handles blurring of non-integer radii.  It
    226  * keeps two running sums: an outer sum for the rounded-up kernel radius, and
    227  * an inner sum for the rounded-down kernel radius.  For each pixel, it linearly
    228  * interpolates between them.  In float this would be:
    229  *  outer_weight * outer_sum / kernelSize +
    230  *  (1.0 - outer_weight) * innerSum / (kernelSize - 2)
    231  *
    232  * This is what the inner loop looks like before unrolling, and with the two
    233  * cases broken out separately (width < diameter, width >= diameter):
    234  *
    235  *      if (width < diameter) {
    236  *          for (int x = 0; x < width; x++) {
    237  *              inner_sum = outer_sum;
    238  *              outer_sum += *right++;
    239  *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
    240  *              dptr += dst_x_stride;
    241  *          }
    242  *          for (int x = width; x < diameter; ++x) {
    243  *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
    244  *              dptr += dst_x_stride;
    245  *          }
    246  *          for (int x = 0; x < width; x++) {
    247  *              inner_sum = outer_sum - *left++;
    248  *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
    249  *              dptr += dst_x_stride;
    250  *              outer_sum = inner_sum;
    251  *          }
    252  *      } else {
    253  *          for (int x = 0; x < diameter; x++) {
    254  *              inner_sum = outer_sum;
    255  *              outer_sum += *right++;
    256  *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
    257  *              dptr += dst_x_stride;
    258  *          }
    259  *          for (int x = diameter; x < width; ++x) {
    260  *              inner_sum = outer_sum - *left;
    261  *              outer_sum += *right++;
    262  *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
    263  *              dptr += dst_x_stride;
    264  *              outer_sum -= *left++;
    265  *          }
    266  *          for (int x = 0; x < diameter; x++) {
    267  *              inner_sum = outer_sum - *left++;
    268  *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
    269  *              dptr += dst_x_stride;
    270  *              outer_sum = inner_sum;
    271  *          }
    272  *      }
    273  *  }
    274  *  return new_width;
    275  */
    276 
    277 static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst,
    278                          int radius, int width, int height,
    279                          bool transpose, uint8_t outer_weight)
    280 {
    281     int diameter = radius * 2;
    282     int kernelSize = diameter + 1;
    283     int border = SkMin32(width, diameter);
    284     int inner_weight = 255 - outer_weight;
    285     outer_weight += outer_weight >> 7;
    286     inner_weight += inner_weight >> 7;
    287     uint32_t outer_scale = (outer_weight << 16) / kernelSize;
    288     uint32_t inner_scale = (inner_weight << 16) / (kernelSize - 2);
    289     uint32_t half = 1 << 23;
    290     int new_width = width + diameter;
    291     int dst_x_stride = transpose ? height : 1;
    292     int dst_y_stride = transpose ? 1 : new_width;
    293     for (int y = 0; y < height; ++y) {
    294         uint32_t outer_sum = 0, inner_sum = 0;
    295         uint8_t* dptr = dst + y * dst_y_stride;
    296         const uint8_t* right = src + y * src_y_stride;
    297         const uint8_t* left = right;
    298         int x = 0;
    299 
    300 #define LEFT_BORDER_ITER \
    301             inner_sum = outer_sum; \
    302             outer_sum += *right++; \
    303             *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
    304             dptr += dst_x_stride;
    305 
    306 #ifdef UNROLL_SEPARABLE_LOOPS
    307         for (;x < border - 16; x += 16) {
    308             LEFT_BORDER_ITER
    309             LEFT_BORDER_ITER
    310             LEFT_BORDER_ITER
    311             LEFT_BORDER_ITER
    312             LEFT_BORDER_ITER
    313             LEFT_BORDER_ITER
    314             LEFT_BORDER_ITER
    315             LEFT_BORDER_ITER
    316             LEFT_BORDER_ITER
    317             LEFT_BORDER_ITER
    318             LEFT_BORDER_ITER
    319             LEFT_BORDER_ITER
    320             LEFT_BORDER_ITER
    321             LEFT_BORDER_ITER
    322             LEFT_BORDER_ITER
    323             LEFT_BORDER_ITER
    324         }
    325 #endif
    326 
    327         for (;x < border; ++x) {
    328             LEFT_BORDER_ITER
    329         }
    330 #undef LEFT_BORDER_ITER
    331         for (int x = width; x < diameter; ++x) {
    332             *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
    333             dptr += dst_x_stride;
    334         }
    335         x = diameter;
    336 
    337 #define CENTER_ITER \
    338             inner_sum = outer_sum - *left; \
    339             outer_sum += *right++; \
    340             *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
    341             dptr += dst_x_stride; \
    342             outer_sum -= *left++;
    343 
    344 #ifdef UNROLL_SEPARABLE_LOOPS
    345         for (; x < width - 16; x += 16) {
    346             CENTER_ITER
    347             CENTER_ITER
    348             CENTER_ITER
    349             CENTER_ITER
    350             CENTER_ITER
    351             CENTER_ITER
    352             CENTER_ITER
    353             CENTER_ITER
    354             CENTER_ITER
    355             CENTER_ITER
    356             CENTER_ITER
    357             CENTER_ITER
    358             CENTER_ITER
    359             CENTER_ITER
    360             CENTER_ITER
    361             CENTER_ITER
    362         }
    363 #endif
    364         for (; x < width; ++x) {
    365             CENTER_ITER
    366         }
    367 #undef CENTER_ITER
    368 
    369         #define RIGHT_BORDER_ITER \
    370             inner_sum = outer_sum - *left++; \
    371             *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
    372             dptr += dst_x_stride; \
    373             outer_sum = inner_sum;
    374 
    375         x = 0;
    376 #ifdef UNROLL_SEPARABLE_LOOPS
    377         for (; x < border - 16; x += 16) {
    378             RIGHT_BORDER_ITER
    379             RIGHT_BORDER_ITER
    380             RIGHT_BORDER_ITER
    381             RIGHT_BORDER_ITER
    382             RIGHT_BORDER_ITER
    383             RIGHT_BORDER_ITER
    384             RIGHT_BORDER_ITER
    385             RIGHT_BORDER_ITER
    386             RIGHT_BORDER_ITER
    387             RIGHT_BORDER_ITER
    388             RIGHT_BORDER_ITER
    389             RIGHT_BORDER_ITER
    390             RIGHT_BORDER_ITER
    391             RIGHT_BORDER_ITER
    392             RIGHT_BORDER_ITER
    393             RIGHT_BORDER_ITER
    394         }
    395 #endif
    396         for (; x < border; ++x) {
    397             RIGHT_BORDER_ITER
    398         }
    399 #undef RIGHT_BORDER_ITER
    400         SkASSERT(outer_sum == 0 && inner_sum == 0);
    401     }
    402     return new_width;
    403 }
    404 
    405 static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius)
    406 {
    407     *loRadius = *hiRadius = SkScalarCeil(passRadius);
    408     if (SkIntToScalar(*hiRadius) - passRadius > 0.5f) {
    409         *loRadius = *hiRadius - 1;
    410     }
    411 }
    412 
    413 #include "SkColorPriv.h"
    414 
    415 static void merge_src_with_blur(uint8_t dst[], int dstRB,
    416                                 const uint8_t src[], int srcRB,
    417                                 const uint8_t blur[], int blurRB,
    418                                 int sw, int sh) {
    419     dstRB -= sw;
    420     srcRB -= sw;
    421     blurRB -= sw;
    422     while (--sh >= 0) {
    423         for (int x = sw - 1; x >= 0; --x) {
    424             *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));
    425             dst += 1;
    426             src += 1;
    427             blur += 1;
    428         }
    429         dst += dstRB;
    430         src += srcRB;
    431         blur += blurRB;
    432     }
    433 }
    434 
    435 static void clamp_with_orig(uint8_t dst[], int dstRowBytes,
    436                             const uint8_t src[], int srcRowBytes,
    437                             int sw, int sh,
    438                             SkBlurMask::Style style) {
    439     int x;
    440     while (--sh >= 0) {
    441         switch (style) {
    442         case SkBlurMask::kSolid_Style:
    443             for (x = sw - 1; x >= 0; --x) {
    444                 int s = *src;
    445                 int d = *dst;
    446                 *dst = SkToU8(s + d - SkMulDiv255Round(s, d));
    447                 dst += 1;
    448                 src += 1;
    449             }
    450             break;
    451         case SkBlurMask::kOuter_Style:
    452             for (x = sw - 1; x >= 0; --x) {
    453                 if (*src) {
    454                     *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));
    455                 }
    456                 dst += 1;
    457                 src += 1;
    458             }
    459             break;
    460         default:
    461             SkDEBUGFAIL("Unexpected blur style here");
    462             break;
    463         }
    464         dst += dstRowBytes - sw;
    465         src += srcRowBytes - sw;
    466     }
    467 }
    468 
    469 ///////////////////////////////////////////////////////////////////////////////
    470 
    471 // we use a local function to wrap the class static method to work around
    472 // a bug in gcc98
    473 void SkMask_FreeImage(uint8_t* image);
    474 void SkMask_FreeImage(uint8_t* image) {
    475     SkMask::FreeImage(image);
    476 }
    477 
    478 bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
    479                       SkScalar radius, Style style, Quality quality,
    480                       SkIPoint* margin) {
    481     return SkBlurMask::BoxBlur(dst, src,
    482                                SkBlurMask::ConvertRadiusToSigma(radius),
    483                                style, quality, margin);
    484 }
    485 
    486 bool SkBlurMask::BoxBlur(SkMask* dst, const SkMask& src,
    487                          SkScalar sigma, Style style, Quality quality,
    488                          SkIPoint* margin) {
    489 
    490     if (src.fFormat != SkMask::kA8_Format) {
    491         return false;
    492     }
    493 
    494     // Force high quality off for small radii (performance)
    495     if (sigma <= SkIntToScalar(2)) {
    496         quality = kLow_Quality;
    497     }
    498 
    499     SkScalar passRadius;
    500     if (kHigh_Quality == quality) {
    501         // For the high quality path the 3 pass box blur kernel width is
    502         // 6*rad+1 while the full Gaussian width is 6*sigma.
    503         passRadius = sigma - (1/6.0f);
    504     } else {
    505         // For the low quality path we only attempt to cover 3*sigma of the
    506         // Gaussian blur area (1.5*sigma on each side). The single pass box
    507         // blur's kernel size is 2*rad+1.
    508         passRadius = 1.5f*sigma - 0.5f;
    509     }
    510 
    511     // highQuality: use three box blur passes as a cheap way
    512     // to approximate a Gaussian blur
    513     int passCount = (kHigh_Quality == quality) ? 3 : 1;
    514 
    515     int rx = SkScalarCeil(passRadius);
    516     int outerWeight = 255 - SkScalarRound((SkIntToScalar(rx) - passRadius) * 255);
    517 
    518     SkASSERT(rx >= 0);
    519     SkASSERT((unsigned)outerWeight <= 255);
    520     if (rx <= 0) {
    521         return false;
    522     }
    523 
    524     int ry = rx;    // only do square blur for now
    525 
    526     int padx = passCount * rx;
    527     int pady = passCount * ry;
    528 
    529     if (margin) {
    530         margin->set(padx, pady);
    531     }
    532     dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
    533                      src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
    534 
    535     dst->fRowBytes = dst->fBounds.width();
    536     dst->fFormat = SkMask::kA8_Format;
    537     dst->fImage = NULL;
    538 
    539     if (src.fImage) {
    540         size_t dstSize = dst->computeImageSize();
    541         if (0 == dstSize) {
    542             return false;   // too big to allocate, abort
    543         }
    544 
    545         int             sw = src.fBounds.width();
    546         int             sh = src.fBounds.height();
    547         const uint8_t*  sp = src.fImage;
    548         uint8_t*        dp = SkMask::AllocImage(dstSize);
    549         SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
    550 
    551         // build the blurry destination
    552         SkAutoTMalloc<uint8_t>  tmpBuffer(dstSize);
    553         uint8_t*                tp = tmpBuffer.get();
    554         int w = sw, h = sh;
    555 
    556         if (outerWeight == 255) {
    557             int loRadius, hiRadius;
    558             get_adjusted_radii(passRadius, &loRadius, &hiRadius);
    559             if (kHigh_Quality == quality) {
    560                 // Do three X blurs, with a transpose on the final one.
    561                 w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false);
    562                 w = boxBlur(tp, w,             dp, hiRadius, loRadius, w, h, false);
    563                 w = boxBlur(dp, w,             tp, hiRadius, hiRadius, w, h, true);
    564                 // Do three Y blurs, with a transpose on the final one.
    565                 h = boxBlur(tp, h,             dp, loRadius, hiRadius, h, w, false);
    566                 h = boxBlur(dp, h,             tp, hiRadius, loRadius, h, w, false);
    567                 h = boxBlur(tp, h,             dp, hiRadius, hiRadius, h, w, true);
    568             } else {
    569                 w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true);
    570                 h = boxBlur(tp, h,             dp, ry, ry, h, w, true);
    571             }
    572         } else {
    573             if (kHigh_Quality == quality) {
    574                 // Do three X blurs, with a transpose on the final one.
    575                 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, outerWeight);
    576                 w = boxBlurInterp(tp, w,             dp, rx, w, h, false, outerWeight);
    577                 w = boxBlurInterp(dp, w,             tp, rx, w, h, true, outerWeight);
    578                 // Do three Y blurs, with a transpose on the final one.
    579                 h = boxBlurInterp(tp, h,             dp, ry, h, w, false, outerWeight);
    580                 h = boxBlurInterp(dp, h,             tp, ry, h, w, false, outerWeight);
    581                 h = boxBlurInterp(tp, h,             dp, ry, h, w, true, outerWeight);
    582             } else {
    583                 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, true, outerWeight);
    584                 h = boxBlurInterp(tp, h,             dp, ry, h, w, true, outerWeight);
    585             }
    586         }
    587 
    588         dst->fImage = dp;
    589         // if need be, alloc the "real" dst (same size as src) and copy/merge
    590         // the blur into it (applying the src)
    591         if (style == kInner_Style) {
    592             // now we allocate the "real" dst, mirror the size of src
    593             size_t srcSize = src.computeImageSize();
    594             if (0 == srcSize) {
    595                 return false;   // too big to allocate, abort
    596             }
    597             dst->fImage = SkMask::AllocImage(srcSize);
    598             merge_src_with_blur(dst->fImage, src.fRowBytes,
    599                                 sp, src.fRowBytes,
    600                                 dp + passCount * (rx + ry * dst->fRowBytes),
    601                                 dst->fRowBytes, sw, sh);
    602             SkMask::FreeImage(dp);
    603         } else if (style != kNormal_Style) {
    604             clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),
    605                             dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);
    606         }
    607         (void)autoCall.detach();
    608     }
    609 
    610     if (style == kInner_Style) {
    611         dst->fBounds = src.fBounds; // restore trimmed bounds
    612         dst->fRowBytes = src.fRowBytes;
    613     }
    614 
    615     return true;
    616 }
    617 
    618 /* Convolving a box with itself three times results in a piecewise
    619    quadratic function:
    620 
    621    0                              x <= -1.5
    622    9/8 + 3/2 x + 1/2 x^2   -1.5 < x <= -.5
    623    3/4 - x^2                -.5 < x <= .5
    624    9/8 - 3/2 x + 1/2 x^2    0.5 < x <= 1.5
    625    0                        1.5 < x
    626 
    627    Mathematica:
    628 
    629    g[x_] := Piecewise [ {
    630      {9/8 + 3/2 x + 1/2 x^2 ,  -1.5 < x <= -.5},
    631      {3/4 - x^2             ,   -.5 < x <= .5},
    632      {9/8 - 3/2 x + 1/2 x^2 ,   0.5 < x <= 1.5}
    633    }, 0]
    634 
    635    To get the profile curve of the blurred step function at the rectangle
    636    edge, we evaluate the indefinite integral, which is piecewise cubic:
    637 
    638    0                                        x <= -1.5
    639    9/16 + 9/8 x + 3/4 x^2 + 1/6 x^3   -1.5 < x <= -0.5
    640    1/2 + 3/4 x - 1/3 x^3              -.5 < x <= .5
    641    7/16 + 9/8 x - 3/4 x^2 + 1/6 x^3     .5 < x <= 1.5
    642    1                                  1.5 < x
    643 
    644    in Mathematica code:
    645 
    646    gi[x_] := Piecewise[ {
    647      { 0 , x <= -1.5 },
    648      { 9/16 + 9/8 x + 3/4 x^2 + 1/6 x^3, -1.5 < x <= -0.5 },
    649      { 1/2 + 3/4 x - 1/3 x^3          ,  -.5 < x <= .5},
    650      { 7/16 + 9/8 x - 3/4 x^2 + 1/6 x^3,   .5 < x <= 1.5}
    651    },1]
    652 */
    653 
    654 static float gaussianIntegral(float x) {
    655     if (x > 1.5f) {
    656         return 0.0f;
    657     }
    658     if (x < -1.5f) {
    659         return 1.0f;
    660     }
    661 
    662     float x2 = x*x;
    663     float x3 = x2*x;
    664 
    665     if ( x > 0.5f ) {
    666         return 0.5625f - (x3 / 6.0f - 3.0f * x2 * 0.25f + 1.125f * x);
    667     }
    668     if ( x > -0.5f ) {
    669         return 0.5f - (0.75f * x - x3 / 3.0f);
    670     }
    671     return 0.4375f + (-x3 / 6.0f - 3.0f * x2 * 0.25f - 1.125f * x);
    672 }
    673 
    674 /*  compute_profile allocates and fills in an array of floating
    675     point values between 0 and 255 for the profile signature of
    676     a blurred half-plane with the given blur radius.  Since we're
    677     going to be doing screened multiplications (i.e., 1 - (1-x)(1-y))
    678     all the time, we actually fill in the profile pre-inverted
    679     (already done 255-x).
    680 
    681     It's the responsibility of the caller to delete the
    682     memory returned in profile_out.
    683 */
    684 
    685 static void compute_profile(SkScalar sigma, unsigned int **profile_out) {
    686     int size = SkScalarCeilToInt(6*sigma);
    687 
    688     int center = size >> 1;
    689     unsigned int *profile = SkNEW_ARRAY(unsigned int, size);
    690 
    691     float invr = 1.f/(2*sigma);
    692 
    693     profile[0] = 255;
    694     for (int x = 1 ; x < size ; ++x) {
    695         float scaled_x = (center - x - .5f) * invr;
    696         float gi = gaussianIntegral(scaled_x);
    697         profile[x] = 255 - (uint8_t) (255.f * gi);
    698     }
    699 
    700     *profile_out = profile;
    701 }
    702 
    703 // TODO MAYBE: Maintain a profile cache to avoid recomputing this for
    704 // commonly used radii.  Consider baking some of the most common blur radii
    705 // directly in as static data?
    706 
    707 // Implementation adapted from Michael Herf's approach:
    708 // http://stereopsis.com/shadowrect/
    709 
    710 static inline unsigned int profile_lookup( unsigned int *profile, int loc, int blurred_width, int sharp_width ) {
    711     int dx = SkAbs32(((loc << 1) + 1) - blurred_width) - sharp_width; // how far are we from the original edge?
    712     int ox = dx >> 1;
    713     if (ox < 0) {
    714         ox = 0;
    715     }
    716 
    717     return profile[ox];
    718 }
    719 
    720 bool SkBlurMask::BlurRect(SkMask *dst, const SkRect &src,
    721                           SkScalar radius, Style style,
    722                           SkIPoint *margin, SkMask::CreateMode createMode) {
    723     return SkBlurMask::BlurRect(SkBlurMask::ConvertRadiusToSigma(radius),
    724                                 dst, src,
    725                                 style, margin, createMode);
    726 }
    727 
    728 bool SkBlurMask::BlurRect(SkScalar sigma, SkMask *dst,
    729                           const SkRect &src, Style style,
    730                           SkIPoint *margin, SkMask::CreateMode createMode) {
    731     int profile_size = SkScalarCeilToInt(6*sigma);
    732 
    733     int pad = profile_size/2;
    734     if (margin) {
    735         margin->set( pad, pad );
    736     }
    737 
    738     dst->fBounds.set(SkScalarRoundToInt(src.fLeft - pad),
    739                      SkScalarRoundToInt(src.fTop - pad),
    740                      SkScalarRoundToInt(src.fRight + pad),
    741                      SkScalarRoundToInt(src.fBottom + pad));
    742 
    743     dst->fRowBytes = dst->fBounds.width();
    744     dst->fFormat = SkMask::kA8_Format;
    745     dst->fImage = NULL;
    746 
    747     int             sw = SkScalarFloorToInt(src.width());
    748     int             sh = SkScalarFloorToInt(src.height());
    749 
    750     if (createMode == SkMask::kJustComputeBounds_CreateMode) {
    751         if (style == kInner_Style) {
    752             dst->fBounds.set(SkScalarRoundToInt(src.fLeft),
    753                              SkScalarRoundToInt(src.fTop),
    754                              SkScalarRoundToInt(src.fRight),
    755                              SkScalarRoundToInt(src.fBottom)); // restore trimmed bounds
    756             dst->fRowBytes = sw;
    757         }
    758         return true;
    759     }
    760     unsigned int *profile = NULL;
    761 
    762     compute_profile(sigma, &profile);
    763     SkAutoTDeleteArray<unsigned int> ada(profile);
    764 
    765     size_t dstSize = dst->computeImageSize();
    766     if (0 == dstSize) {
    767         return false;   // too big to allocate, abort
    768     }
    769 
    770     uint8_t*        dp = SkMask::AllocImage(dstSize);
    771 
    772     dst->fImage = dp;
    773 
    774     int dstHeight = dst->fBounds.height();
    775     int dstWidth = dst->fBounds.width();
    776 
    777     // nearest odd number less than the profile size represents the center
    778     // of the (2x scaled) profile
    779     int center = ( profile_size & ~1 ) - 1;
    780 
    781     int w = sw - center;
    782     int h = sh - center;
    783 
    784     uint8_t *outptr = dp;
    785 
    786     SkAutoTMalloc<uint8_t> horizontalScanline(dstWidth);
    787 
    788     for (int x = 0 ; x < dstWidth ; ++x) {
    789         if (profile_size <= sw) {
    790             horizontalScanline[x] = profile_lookup(profile, x, dstWidth, w);
    791         } else {
    792             float span = float(sw)/(2*sigma);
    793             float giX = 1.5f - (x+.5f)/(2*sigma);
    794             horizontalScanline[x] = (uint8_t) (255 * (gaussianIntegral(giX) - gaussianIntegral(giX + span)));
    795         }
    796     }
    797 
    798     for (int y = 0 ; y < dstHeight ; ++y) {
    799         unsigned int profile_y;
    800         if (profile_size <= sh) {
    801             profile_y = profile_lookup(profile, y, dstHeight, h);
    802         } else {
    803             float span = float(sh)/(2*sigma);
    804             float giY = 1.5f - (y+.5f)/(2*sigma);
    805             profile_y = (uint8_t) (255 * (gaussianIntegral(giY) - gaussianIntegral(giY + span)));
    806         }
    807 
    808         for (int x = 0 ; x < dstWidth ; x++) {
    809             unsigned int maskval = SkMulDiv255Round(horizontalScanline[x], profile_y);
    810             *(outptr++) = maskval;
    811         }
    812     }
    813 
    814     if (style == kInner_Style) {
    815         // now we allocate the "real" dst, mirror the size of src
    816         size_t srcSize = (size_t)(src.width() * src.height());
    817         if (0 == srcSize) {
    818             return false;   // too big to allocate, abort
    819         }
    820         dst->fImage = SkMask::AllocImage(srcSize);
    821         for (int y = 0 ; y < sh ; y++) {
    822             uint8_t *blur_scanline = dp + (y+pad)*dstWidth + pad;
    823             uint8_t *inner_scanline = dst->fImage + y*sw;
    824             memcpy(inner_scanline, blur_scanline, sw);
    825         }
    826         SkMask::FreeImage(dp);
    827 
    828         dst->fBounds.set(SkScalarRoundToInt(src.fLeft),
    829                          SkScalarRoundToInt(src.fTop),
    830                          SkScalarRoundToInt(src.fRight),
    831                          SkScalarRoundToInt(src.fBottom)); // restore trimmed bounds
    832         dst->fRowBytes = sw;
    833 
    834     } else if (style == kOuter_Style) {
    835         for (int y = pad ; y < dstHeight-pad ; y++) {
    836             uint8_t *dst_scanline = dp + y*dstWidth + pad;
    837             memset(dst_scanline, 0, sw);
    838         }
    839     } else if (style == kSolid_Style) {
    840         for (int y = pad ; y < dstHeight-pad ; y++) {
    841             uint8_t *dst_scanline = dp + y*dstWidth + pad;
    842             memset(dst_scanline, 0xff, sw);
    843         }
    844     }
    845     // normal and solid styles are the same for analytic rect blurs, so don't
    846     // need to handle solid specially.
    847 
    848     return true;
    849 }
    850 
    851 bool SkBlurMask::BlurGroundTruth(SkMask* dst, const SkMask& src, SkScalar radius,
    852                                  Style style, SkIPoint* margin) {
    853     return BlurGroundTruth(ConvertRadiusToSigma(radius), dst, src, style, margin);
    854 }
    855 // The "simple" blur is a direct implementation of separable convolution with a discrete
    856 // gaussian kernel.  It's "ground truth" in a sense; too slow to be used, but very
    857 // useful for correctness comparisons.
    858 
    859 bool SkBlurMask::BlurGroundTruth(SkScalar sigma, SkMask* dst, const SkMask& src,
    860                                  Style style, SkIPoint* margin) {
    861 
    862     if (src.fFormat != SkMask::kA8_Format) {
    863         return false;
    864     }
    865 
    866     float variance = sigma * sigma;
    867 
    868     int windowSize = SkScalarCeil(sigma*6);
    869     // round window size up to nearest odd number
    870     windowSize |= 1;
    871 
    872     SkAutoTMalloc<float> gaussWindow(windowSize);
    873 
    874     int halfWindow = windowSize >> 1;
    875 
    876     gaussWindow[halfWindow] = 1;
    877 
    878     float windowSum = 1;
    879     for (int x = 1 ; x <= halfWindow ; ++x) {
    880         float gaussian = expf(-x*x / (2*variance));
    881         gaussWindow[halfWindow + x] = gaussWindow[halfWindow-x] = gaussian;
    882         windowSum += 2*gaussian;
    883     }
    884 
    885     // leave the filter un-normalized for now; we will divide by the normalization
    886     // sum later;
    887 
    888     int pad = halfWindow;
    889     if (margin) {
    890         margin->set( pad, pad );
    891     }
    892 
    893     dst->fBounds = src.fBounds;
    894     dst->fBounds.outset(pad, pad);
    895 
    896     dst->fRowBytes = dst->fBounds.width();
    897     dst->fFormat = SkMask::kA8_Format;
    898     dst->fImage = NULL;
    899 
    900     if (src.fImage) {
    901 
    902         size_t dstSize = dst->computeImageSize();
    903         if (0 == dstSize) {
    904             return false;   // too big to allocate, abort
    905         }
    906 
    907         int             srcWidth = src.fBounds.width();
    908         int             srcHeight = src.fBounds.height();
    909         int             dstWidth = dst->fBounds.width();
    910 
    911         const uint8_t*  srcPixels = src.fImage;
    912         uint8_t*        dstPixels = SkMask::AllocImage(dstSize);
    913         SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dstPixels);
    914 
    915         // do the actual blur.  First, make a padded copy of the source.
    916         // use double pad so we never have to check if we're outside anything
    917 
    918         int padWidth = srcWidth + 4*pad;
    919         int padHeight = srcHeight;
    920         int padSize = padWidth * padHeight;
    921 
    922         SkAutoTMalloc<uint8_t> padPixels(padSize);
    923         memset(padPixels, 0, padSize);
    924 
    925         for (int y = 0 ; y < srcHeight; ++y) {
    926             uint8_t* padptr = padPixels + y * padWidth + 2*pad;
    927             const uint8_t* srcptr = srcPixels + y * srcWidth;
    928             memcpy(padptr, srcptr, srcWidth);
    929         }
    930 
    931         // blur in X, transposing the result into a temporary floating point buffer.
    932         // also double-pad the intermediate result so that the second blur doesn't
    933         // have to do extra conditionals.
    934 
    935         int tmpWidth = padHeight + 4*pad;
    936         int tmpHeight = padWidth - 2*pad;
    937         int tmpSize = tmpWidth * tmpHeight;
    938 
    939         SkAutoTMalloc<float> tmpImage(tmpSize);
    940         memset(tmpImage, 0, tmpSize*sizeof(tmpImage[0]));
    941 
    942         for (int y = 0 ; y < padHeight ; ++y) {
    943             uint8_t *srcScanline = padPixels + y*padWidth;
    944             for (int x = pad ; x < padWidth - pad ; ++x) {
    945                 float *outPixel = tmpImage + (x-pad)*tmpWidth + y + 2*pad; // transposed output
    946                 uint8_t *windowCenter = srcScanline + x;
    947                 for (int i = -pad ; i <= pad ; ++i) {
    948                     *outPixel += gaussWindow[pad+i]*windowCenter[i];
    949                 }
    950                 *outPixel /= windowSum;
    951             }
    952         }
    953 
    954         // blur in Y; now filling in the actual desired destination.  We have to do
    955         // the transpose again; these transposes guarantee that we read memory in
    956         // linear order.
    957 
    958         for (int y = 0 ; y < tmpHeight ; ++y) {
    959             float *srcScanline = tmpImage + y*tmpWidth;
    960             for (int x = pad ; x < tmpWidth - pad ; ++x) {
    961                 float *windowCenter = srcScanline + x;
    962                 float finalValue = 0;
    963                 for (int i = -pad ; i <= pad ; ++i) {
    964                     finalValue += gaussWindow[pad+i]*windowCenter[i];
    965                 }
    966                 finalValue /= windowSum;
    967                 uint8_t *outPixel = dstPixels + (x-pad)*dstWidth + y; // transposed output
    968                 int integerPixel = int(finalValue + 0.5f);
    969                 *outPixel = SkClampMax( SkClampPos(integerPixel), 255 );
    970             }
    971         }
    972 
    973         dst->fImage = dstPixels;
    974         // if need be, alloc the "real" dst (same size as src) and copy/merge
    975         // the blur into it (applying the src)
    976         if (style == kInner_Style) {
    977             // now we allocate the "real" dst, mirror the size of src
    978             size_t srcSize = src.computeImageSize();
    979             if (0 == srcSize) {
    980                 return false;   // too big to allocate, abort
    981             }
    982             dst->fImage = SkMask::AllocImage(srcSize);
    983             merge_src_with_blur(dst->fImage, src.fRowBytes,
    984                 srcPixels, src.fRowBytes,
    985                 dstPixels + pad*dst->fRowBytes + pad,
    986                 dst->fRowBytes, srcWidth, srcHeight);
    987             SkMask::FreeImage(dstPixels);
    988         } else if (style != kNormal_Style) {
    989             clamp_with_orig(dstPixels + pad*dst->fRowBytes + pad,
    990                 dst->fRowBytes, srcPixels, src.fRowBytes, srcWidth, srcHeight, style);
    991         }
    992         (void)autoCall.detach();
    993     }
    994 
    995     if (style == kInner_Style) {
    996         dst->fBounds = src.fBounds; // restore trimmed bounds
    997         dst->fRowBytes = src.fRowBytes;
    998     }
    999 
   1000     return true;
   1001 }
   1002