Home | History | Annotate | Download | only in effects
      1 
      2 /*
      3  * Copyright 2006 The Android Open Source Project
      4  *
      5  * Use of this source code is governed by a BSD-style license that can be
      6  * found in the LICENSE file.
      7  */
      8 
      9 
     10 #include "SkBlurMask.h"
     11 #include "SkMath.h"
     12 #include "SkTemplates.h"
     13 #include "SkEndian.h"
     14 
     15 const SkScalar SkBlurMask::kBlurRadiusFudgeFactor = SkFloatToScalar(.57735f);
     16 
     17 #define UNROLL_SEPARABLE_LOOPS
     18 
     19 /**
     20  * This function performs a box blur in X, of the given radius.  If the
     21  * "transpose" parameter is true, it will transpose the pixels on write,
     22  * such that X and Y are swapped. Reads are always performed from contiguous
     23  * memory in X, for speed. The destination buffer (dst) must be at least
     24  * (width + leftRadius + rightRadius) * height bytes in size.
     25  *
     26  * This is what the inner loop looks like before unrolling, and with the two
     27  * cases broken out separately (width < diameter, width >= diameter):
     28  *
     29  *      if (width < diameter) {
     30  *          for (int x = 0; x < width; ++x) {
     31  *              sum += *right++;
     32  *              *dptr = (sum * scale + half) >> 24;
     33  *              dptr += dst_x_stride;
     34  *          }
     35  *          for (int x = width; x < diameter; ++x) {
     36  *              *dptr = (sum * scale + half) >> 24;
     37  *              dptr += dst_x_stride;
     38  *          }
     39  *          for (int x = 0; x < width; ++x) {
     40  *              *dptr = (sum * scale + half) >> 24;
     41  *              sum -= *left++;
     42  *              dptr += dst_x_stride;
     43  *          }
     44  *      } else {
     45  *          for (int x = 0; x < diameter; ++x) {
     46  *              sum += *right++;
     47  *              *dptr = (sum * scale + half) >> 24;
     48  *              dptr += dst_x_stride;
     49  *          }
     50  *          for (int x = diameter; x < width; ++x) {
     51  *              sum += *right++;
     52  *              *dptr = (sum * scale + half) >> 24;
     53  *              sum -= *left++;
     54  *              dptr += dst_x_stride;
     55  *          }
     56  *          for (int x = 0; x < diameter; ++x) {
     57  *              *dptr = (sum * scale + half) >> 24;
     58  *              sum -= *left++;
     59  *              dptr += dst_x_stride;
     60  *          }
     61  *      }
     62  */
     63 static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
     64                    int leftRadius, int rightRadius, int width, int height,
     65                    bool transpose)
     66 {
     67     int diameter = leftRadius + rightRadius;
     68     int kernelSize = diameter + 1;
     69     int border = SkMin32(width, diameter);
     70     uint32_t scale = (1 << 24) / kernelSize;
     71     int new_width = width + SkMax32(leftRadius, rightRadius) * 2;
     72     int dst_x_stride = transpose ? height : 1;
     73     int dst_y_stride = transpose ? 1 : new_width;
     74 #ifndef SK_DISABLE_BLUR_ROUNDING
     75     uint32_t half = 1 << 23;
     76 #else
     77     uint32_t half = 0;
     78 #endif
     79     for (int y = 0; y < height; ++y) {
     80         uint32_t sum = 0;
     81         uint8_t* dptr = dst + y * dst_y_stride;
     82         const uint8_t* right = src + y * src_y_stride;
     83         const uint8_t* left = right;
     84         for (int x = 0; x < rightRadius - leftRadius; x++) {
     85             *dptr = 0;
     86             dptr += dst_x_stride;
     87         }
     88 #define LEFT_BORDER_ITER \
     89             sum += *right++; \
     90             *dptr = (sum * scale + half) >> 24; \
     91             dptr += dst_x_stride;
     92 
     93         int x = 0;
     94 #ifdef UNROLL_SEPARABLE_LOOPS
     95         for (; x < border - 16; x += 16) {
     96             LEFT_BORDER_ITER
     97             LEFT_BORDER_ITER
     98             LEFT_BORDER_ITER
     99             LEFT_BORDER_ITER
    100             LEFT_BORDER_ITER
    101             LEFT_BORDER_ITER
    102             LEFT_BORDER_ITER
    103             LEFT_BORDER_ITER
    104             LEFT_BORDER_ITER
    105             LEFT_BORDER_ITER
    106             LEFT_BORDER_ITER
    107             LEFT_BORDER_ITER
    108             LEFT_BORDER_ITER
    109             LEFT_BORDER_ITER
    110             LEFT_BORDER_ITER
    111             LEFT_BORDER_ITER
    112         }
    113 #endif
    114         for (; x < border; ++x) {
    115             LEFT_BORDER_ITER
    116         }
    117 #undef LEFT_BORDER_ITER
    118 #define TRIVIAL_ITER \
    119             *dptr = (sum * scale + half) >> 24; \
    120             dptr += dst_x_stride;
    121         x = width;
    122 #ifdef UNROLL_SEPARABLE_LOOPS
    123         for (; x < diameter - 16; x += 16) {
    124             TRIVIAL_ITER
    125             TRIVIAL_ITER
    126             TRIVIAL_ITER
    127             TRIVIAL_ITER
    128             TRIVIAL_ITER
    129             TRIVIAL_ITER
    130             TRIVIAL_ITER
    131             TRIVIAL_ITER
    132             TRIVIAL_ITER
    133             TRIVIAL_ITER
    134             TRIVIAL_ITER
    135             TRIVIAL_ITER
    136             TRIVIAL_ITER
    137             TRIVIAL_ITER
    138             TRIVIAL_ITER
    139             TRIVIAL_ITER
    140         }
    141 #endif
    142         for (; x < diameter; ++x) {
    143             TRIVIAL_ITER
    144         }
    145 #undef TRIVIAL_ITER
    146 #define CENTER_ITER \
    147             sum += *right++; \
    148             *dptr = (sum * scale + half) >> 24; \
    149             sum -= *left++; \
    150             dptr += dst_x_stride;
    151 
    152         x = diameter;
    153 #ifdef UNROLL_SEPARABLE_LOOPS
    154         for (; x < width - 16; x += 16) {
    155             CENTER_ITER
    156             CENTER_ITER
    157             CENTER_ITER
    158             CENTER_ITER
    159             CENTER_ITER
    160             CENTER_ITER
    161             CENTER_ITER
    162             CENTER_ITER
    163             CENTER_ITER
    164             CENTER_ITER
    165             CENTER_ITER
    166             CENTER_ITER
    167             CENTER_ITER
    168             CENTER_ITER
    169             CENTER_ITER
    170             CENTER_ITER
    171         }
    172 #endif
    173         for (; x < width; ++x) {
    174             CENTER_ITER
    175         }
    176 #undef CENTER_ITER
    177 #define RIGHT_BORDER_ITER \
    178             *dptr = (sum * scale + half) >> 24; \
    179             sum -= *left++; \
    180             dptr += dst_x_stride;
    181 
    182         x = 0;
    183 #ifdef UNROLL_SEPARABLE_LOOPS
    184         for (; x < border - 16; x += 16) {
    185             RIGHT_BORDER_ITER
    186             RIGHT_BORDER_ITER
    187             RIGHT_BORDER_ITER
    188             RIGHT_BORDER_ITER
    189             RIGHT_BORDER_ITER
    190             RIGHT_BORDER_ITER
    191             RIGHT_BORDER_ITER
    192             RIGHT_BORDER_ITER
    193             RIGHT_BORDER_ITER
    194             RIGHT_BORDER_ITER
    195             RIGHT_BORDER_ITER
    196             RIGHT_BORDER_ITER
    197             RIGHT_BORDER_ITER
    198             RIGHT_BORDER_ITER
    199             RIGHT_BORDER_ITER
    200             RIGHT_BORDER_ITER
    201         }
    202 #endif
    203         for (; x < border; ++x) {
    204             RIGHT_BORDER_ITER
    205         }
    206 #undef RIGHT_BORDER_ITER
    207         for (int x = 0; x < leftRadius - rightRadius; ++x) {
    208             *dptr = 0;
    209             dptr += dst_x_stride;
    210         }
    211         SkASSERT(sum == 0);
    212     }
    213     return new_width;
    214 }
    215 
    216 /**
    217  * This variant of the box blur handles blurring of non-integer radii.  It
    218  * keeps two running sums: an outer sum for the rounded-up kernel radius, and
    219  * an inner sum for the rounded-down kernel radius.  For each pixel, it linearly
    220  * interpolates between them.  In float this would be:
    221  *  outer_weight * outer_sum / kernelSize +
    222  *  (1.0 - outer_weight) * innerSum / (kernelSize - 2)
    223  *
    224  * This is what the inner loop looks like before unrolling, and with the two
    225  * cases broken out separately (width < diameter, width >= diameter):
    226  *
    227  *      if (width < diameter) {
    228  *          for (int x = 0; x < width; x++) {
    229  *              inner_sum = outer_sum;
    230  *              outer_sum += *right++;
    231  *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
    232  *              dptr += dst_x_stride;
    233  *          }
    234  *          for (int x = width; x < diameter; ++x) {
    235  *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
    236  *              dptr += dst_x_stride;
    237  *          }
    238  *          for (int x = 0; x < width; x++) {
    239  *              inner_sum = outer_sum - *left++;
    240  *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
    241  *              dptr += dst_x_stride;
    242  *              outer_sum = inner_sum;
    243  *          }
    244  *      } else {
    245  *          for (int x = 0; x < diameter; x++) {
    246  *              inner_sum = outer_sum;
    247  *              outer_sum += *right++;
    248  *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
    249  *              dptr += dst_x_stride;
    250  *          }
    251  *          for (int x = diameter; x < width; ++x) {
    252  *              inner_sum = outer_sum - *left;
    253  *              outer_sum += *right++;
    254  *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
    255  *              dptr += dst_x_stride;
    256  *              outer_sum -= *left++;
    257  *          }
    258  *          for (int x = 0; x < diameter; x++) {
    259  *              inner_sum = outer_sum - *left++;
    260  *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
    261  *              dptr += dst_x_stride;
    262  *              outer_sum = inner_sum;
    263  *          }
    264  *      }
    265  *  }
    266  *  return new_width;
    267  */
    268 
    269 static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst,
    270                          int radius, int width, int height,
    271                          bool transpose, uint8_t outer_weight)
    272 {
    273     int diameter = radius * 2;
    274     int kernelSize = diameter + 1;
    275     int border = SkMin32(width, diameter);
    276     int inner_weight = 255 - outer_weight;
    277     outer_weight += outer_weight >> 7;
    278     inner_weight += inner_weight >> 7;
    279     uint32_t outer_scale = (outer_weight << 16) / kernelSize;
    280     uint32_t inner_scale = (inner_weight << 16) / (kernelSize - 2);
    281 #ifndef SK_DISABLE_BLUR_ROUNDING
    282     uint32_t half = 1 << 23;
    283 #else
    284     uint32_t half = 0;
    285 #endif
    286     int new_width = width + diameter;
    287     int dst_x_stride = transpose ? height : 1;
    288     int dst_y_stride = transpose ? 1 : new_width;
    289     for (int y = 0; y < height; ++y) {
    290         uint32_t outer_sum = 0, inner_sum = 0;
    291         uint8_t* dptr = dst + y * dst_y_stride;
    292         const uint8_t* right = src + y * src_y_stride;
    293         const uint8_t* left = right;
    294         int x = 0;
    295 
    296 #define LEFT_BORDER_ITER \
    297             inner_sum = outer_sum; \
    298             outer_sum += *right++; \
    299             *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
    300             dptr += dst_x_stride;
    301 
    302 #ifdef UNROLL_SEPARABLE_LOOPS
    303         for (;x < border - 16; x += 16) {
    304             LEFT_BORDER_ITER
    305             LEFT_BORDER_ITER
    306             LEFT_BORDER_ITER
    307             LEFT_BORDER_ITER
    308             LEFT_BORDER_ITER
    309             LEFT_BORDER_ITER
    310             LEFT_BORDER_ITER
    311             LEFT_BORDER_ITER
    312             LEFT_BORDER_ITER
    313             LEFT_BORDER_ITER
    314             LEFT_BORDER_ITER
    315             LEFT_BORDER_ITER
    316             LEFT_BORDER_ITER
    317             LEFT_BORDER_ITER
    318             LEFT_BORDER_ITER
    319             LEFT_BORDER_ITER
    320         }
    321 #endif
    322 
    323         for (;x < border; ++x) {
    324             LEFT_BORDER_ITER
    325         }
    326 #undef LEFT_BORDER_ITER
    327         for (int x = width; x < diameter; ++x) {
    328             *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
    329             dptr += dst_x_stride;
    330         }
    331         x = diameter;
    332 
    333 #define CENTER_ITER \
    334             inner_sum = outer_sum - *left; \
    335             outer_sum += *right++; \
    336             *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
    337             dptr += dst_x_stride; \
    338             outer_sum -= *left++;
    339 
    340 #ifdef UNROLL_SEPARABLE_LOOPS
    341         for (; x < width - 16; x += 16) {
    342             CENTER_ITER
    343             CENTER_ITER
    344             CENTER_ITER
    345             CENTER_ITER
    346             CENTER_ITER
    347             CENTER_ITER
    348             CENTER_ITER
    349             CENTER_ITER
    350             CENTER_ITER
    351             CENTER_ITER
    352             CENTER_ITER
    353             CENTER_ITER
    354             CENTER_ITER
    355             CENTER_ITER
    356             CENTER_ITER
    357             CENTER_ITER
    358         }
    359 #endif
    360         for (; x < width; ++x) {
    361             CENTER_ITER
    362         }
    363 #undef CENTER_ITER
    364 
    365         #define RIGHT_BORDER_ITER \
    366             inner_sum = outer_sum - *left++; \
    367             *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
    368             dptr += dst_x_stride; \
    369             outer_sum = inner_sum;
    370 
    371         x = 0;
    372 #ifdef UNROLL_SEPARABLE_LOOPS
    373         for (; x < border - 16; x += 16) {
    374             RIGHT_BORDER_ITER
    375             RIGHT_BORDER_ITER
    376             RIGHT_BORDER_ITER
    377             RIGHT_BORDER_ITER
    378             RIGHT_BORDER_ITER
    379             RIGHT_BORDER_ITER
    380             RIGHT_BORDER_ITER
    381             RIGHT_BORDER_ITER
    382             RIGHT_BORDER_ITER
    383             RIGHT_BORDER_ITER
    384             RIGHT_BORDER_ITER
    385             RIGHT_BORDER_ITER
    386             RIGHT_BORDER_ITER
    387             RIGHT_BORDER_ITER
    388             RIGHT_BORDER_ITER
    389             RIGHT_BORDER_ITER
    390         }
    391 #endif
    392         for (; x < border; ++x) {
    393             RIGHT_BORDER_ITER
    394         }
    395 #undef RIGHT_BORDER_ITER
    396         SkASSERT(outer_sum == 0 && inner_sum == 0);
    397     }
    398     return new_width;
    399 }
    400 
    401 static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius)
    402 {
    403     *loRadius = *hiRadius = SkScalarCeil(passRadius);
    404     if (SkIntToScalar(*hiRadius) - passRadius > SkFloatToScalar(0.5f)) {
    405         *loRadius = *hiRadius - 1;
    406     }
    407 }
    408 
    409 #include "SkColorPriv.h"
    410 
    411 static void merge_src_with_blur(uint8_t dst[], int dstRB,
    412                                 const uint8_t src[], int srcRB,
    413                                 const uint8_t blur[], int blurRB,
    414                                 int sw, int sh) {
    415     dstRB -= sw;
    416     srcRB -= sw;
    417     blurRB -= sw;
    418     while (--sh >= 0) {
    419         for (int x = sw - 1; x >= 0; --x) {
    420             *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));
    421             dst += 1;
    422             src += 1;
    423             blur += 1;
    424         }
    425         dst += dstRB;
    426         src += srcRB;
    427         blur += blurRB;
    428     }
    429 }
    430 
    431 static void clamp_with_orig(uint8_t dst[], int dstRowBytes,
    432                             const uint8_t src[], int srcRowBytes,
    433                             int sw, int sh,
    434                             SkBlurMask::Style style) {
    435     int x;
    436     while (--sh >= 0) {
    437         switch (style) {
    438         case SkBlurMask::kSolid_Style:
    439             for (x = sw - 1; x >= 0; --x) {
    440                 int s = *src;
    441                 int d = *dst;
    442                 *dst = SkToU8(s + d - SkMulDiv255Round(s, d));
    443                 dst += 1;
    444                 src += 1;
    445             }
    446             break;
    447         case SkBlurMask::kOuter_Style:
    448             for (x = sw - 1; x >= 0; --x) {
    449                 if (*src) {
    450                     *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));
    451                 }
    452                 dst += 1;
    453                 src += 1;
    454             }
    455             break;
    456         default:
    457             SkDEBUGFAIL("Unexpected blur style here");
    458             break;
    459         }
    460         dst += dstRowBytes - sw;
    461         src += srcRowBytes - sw;
    462     }
    463 }
    464 
    465 ///////////////////////////////////////////////////////////////////////////////
    466 
    467 // we use a local function to wrap the class static method to work around
    468 // a bug in gcc98
    469 void SkMask_FreeImage(uint8_t* image);
    470 void SkMask_FreeImage(uint8_t* image) {
    471     SkMask::FreeImage(image);
    472 }
    473 
    474 bool SkBlurMask::Blur(SkMask* dst, const SkMask& src,
    475                       SkScalar radius, Style style, Quality quality,
    476                       SkIPoint* margin)
    477 {
    478 
    479     if (src.fFormat != SkMask::kA8_Format) {
    480         return false;
    481     }
    482 
    483     // Force high quality off for small radii (performance)
    484     if (radius < SkIntToScalar(3)) {
    485         quality = kLow_Quality;
    486     }
    487 
    488     // highQuality: use three box blur passes as a cheap way
    489     // to approximate a Gaussian blur
    490     int passCount = (kHigh_Quality == quality) ? 3 : 1;
    491     SkScalar passRadius = (kHigh_Quality == quality) ?
    492                           SkScalarMul( radius, kBlurRadiusFudgeFactor):
    493                           radius;
    494 
    495     int rx = SkScalarCeil(passRadius);
    496     int outerWeight = 255 - SkScalarRound((SkIntToScalar(rx) - passRadius) * 255);
    497 
    498     SkASSERT(rx >= 0);
    499     SkASSERT((unsigned)outerWeight <= 255);
    500     if (rx <= 0) {
    501         return false;
    502     }
    503 
    504     int ry = rx;    // only do square blur for now
    505 
    506     int padx = passCount * rx;
    507     int pady = passCount * ry;
    508 
    509     if (margin) {
    510         margin->set(padx, pady);
    511     }
    512     dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
    513         src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
    514 
    515     dst->fRowBytes = dst->fBounds.width();
    516     dst->fFormat = SkMask::kA8_Format;
    517     dst->fImage = NULL;
    518 
    519     if (src.fImage) {
    520         size_t dstSize = dst->computeImageSize();
    521         if (0 == dstSize) {
    522             return false;   // too big to allocate, abort
    523         }
    524 
    525         int             sw = src.fBounds.width();
    526         int             sh = src.fBounds.height();
    527         const uint8_t*  sp = src.fImage;
    528         uint8_t*        dp = SkMask::AllocImage(dstSize);
    529         SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
    530 
    531         // build the blurry destination
    532         SkAutoTMalloc<uint8_t>  tmpBuffer(dstSize);
    533         uint8_t*                tp = tmpBuffer.get();
    534         int w = sw, h = sh;
    535 
    536         if (outerWeight == 255) {
    537             int loRadius, hiRadius;
    538             get_adjusted_radii(passRadius, &loRadius, &hiRadius);
    539             if (kHigh_Quality == quality) {
    540                 // Do three X blurs, with a transpose on the final one.
    541                 w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false);
    542                 w = boxBlur(tp, w,             dp, hiRadius, loRadius, w, h, false);
    543                 w = boxBlur(dp, w,             tp, hiRadius, hiRadius, w, h, true);
    544                 // Do three Y blurs, with a transpose on the final one.
    545                 h = boxBlur(tp, h,             dp, loRadius, hiRadius, h, w, false);
    546                 h = boxBlur(dp, h,             tp, hiRadius, loRadius, h, w, false);
    547                 h = boxBlur(tp, h,             dp, hiRadius, hiRadius, h, w, true);
    548             } else {
    549                 w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true);
    550                 h = boxBlur(tp, h,             dp, ry, ry, h, w, true);
    551             }
    552         } else {
    553             if (kHigh_Quality == quality) {
    554                 // Do three X blurs, with a transpose on the final one.
    555                 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, outerWeight);
    556                 w = boxBlurInterp(tp, w,             dp, rx, w, h, false, outerWeight);
    557                 w = boxBlurInterp(dp, w,             tp, rx, w, h, true, outerWeight);
    558                 // Do three Y blurs, with a transpose on the final one.
    559                 h = boxBlurInterp(tp, h,             dp, ry, h, w, false, outerWeight);
    560                 h = boxBlurInterp(dp, h,             tp, ry, h, w, false, outerWeight);
    561                 h = boxBlurInterp(tp, h,             dp, ry, h, w, true, outerWeight);
    562             } else {
    563                 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, true, outerWeight);
    564                 h = boxBlurInterp(tp, h,             dp, ry, h, w, true, outerWeight);
    565             }
    566         }
    567 
    568         dst->fImage = dp;
    569         // if need be, alloc the "real" dst (same size as src) and copy/merge
    570         // the blur into it (applying the src)
    571         if (style == kInner_Style) {
    572             // now we allocate the "real" dst, mirror the size of src
    573             size_t srcSize = src.computeImageSize();
    574             if (0 == srcSize) {
    575                 return false;   // too big to allocate, abort
    576             }
    577             dst->fImage = SkMask::AllocImage(srcSize);
    578             merge_src_with_blur(dst->fImage, src.fRowBytes,
    579                                 sp, src.fRowBytes,
    580                                 dp + passCount * (rx + ry * dst->fRowBytes),
    581                                 dst->fRowBytes, sw, sh);
    582             SkMask::FreeImage(dp);
    583         } else if (style != kNormal_Style) {
    584             clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),
    585                             dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);
    586         }
    587         (void)autoCall.detach();
    588     }
    589 
    590     if (style == kInner_Style) {
    591         dst->fBounds = src.fBounds; // restore trimmed bounds
    592         dst->fRowBytes = src.fRowBytes;
    593     }
    594 
    595     return true;
    596 }
    597 
    598 /* Convolving a box with itself three times results in a piecewise
    599    quadratic function:
    600 
    601    0                              x <= -1.5
    602    9/8 + 3/2 x + 1/2 x^2   -1.5 < x <= -.5
    603    3/4 - x^2                -.5 < x <= .5
    604    9/8 - 3/2 x + 1/2 x^2    0.5 < x <= 1.5
    605    0                        1.5 < x
    606 
    607    Mathematica:
    608 
    609    g[x_] := Piecewise [ {
    610      {9/8 + 3/2 x + 1/2 x^2 ,  -1.5 < x <= -.5},
    611      {3/4 - x^2             ,   -.5 < x <= .5},
    612      {9/8 - 3/2 x + 1/2 x^2 ,   0.5 < x <= 1.5}
    613    }, 0]
    614 
    615    To get the profile curve of the blurred step function at the rectangle
    616    edge, we evaluate the indefinite integral, which is piecewise cubic:
    617 
    618    0                                        x <= -1.5
    619    9/16 + 9/8 x + 3/4 x^2 + 1/6 x^3   -1.5 < x <= -0.5
    620    1/2 + 3/4 x - 1/3 x^3              -.5 < x <= .5
    621    7/16 + 9/8 x - 3/4 x^2 + 1/6 x^3     .5 < x <= 1.5
    622    1                                  1.5 < x
    623 
    624    in Mathematica code:
    625 
    626    gi[x_] := Piecewise[ {
    627      { 0 , x <= -1.5 },
    628      { 9/16 + 9/8 x + 3/4 x^2 + 1/6 x^3, -1.5 < x <= -0.5 },
    629      { 1/2 + 3/4 x - 1/3 x^3          ,  -.5 < x <= .5},
    630      { 7/16 + 9/8 x - 3/4 x^2 + 1/6 x^3,   .5 < x <= 1.5}
    631    },1]
    632 */
    633 
    634 static float gaussianIntegral(float x) {
    635     if (x > 1.5f) {
    636         return 0.0f;
    637     }
    638     if (x < -1.5f) {
    639         return 1.0f;
    640     }
    641 
    642     float x2 = x*x;
    643     float x3 = x2*x;
    644 
    645     if ( x > 0.5f ) {
    646         return 0.5625f - (x3 / 6.0f - 3.0f * x2 * 0.25f + 1.125f * x);
    647     }
    648     if ( x > -0.5f ) {
    649         return 0.5f - (0.75f * x - x3 / 3.0f);
    650     }
    651     return 0.4375f + (-x3 / 6.0f - 3.0f * x2 * 0.25f - 1.125f * x);
    652 }
    653 
    654 // Compute the size of the array allocated for the profile.
    655 
    656 static int compute_profile_size(SkScalar radius) {
    657     return SkScalarRoundToInt(radius * 3);
    658 
    659 }
    660 
    661 /*  compute_profile allocates and fills in an array of floating
    662     point values between 0 and 255 for the profile signature of
    663     a blurred half-plane with the given blur radius.  Since we're
    664     going to be doing screened multiplications (i.e., 1 - (1-x)(1-y))
    665     all the time, we actually fill in the profile pre-inverted
    666     (already done 255-x).
    667 
    668     It's the responsibility of the caller to delete the
    669     memory returned in profile_out.
    670 */
    671 
    672 static void compute_profile(SkScalar radius, unsigned int **profile_out) {
    673     int size = compute_profile_size(radius);
    674 
    675     int center = size >> 1;
    676     unsigned int *profile = SkNEW_ARRAY(unsigned int, size);
    677 
    678     float invr = 1.f/radius;
    679 
    680     profile[0] = 255;
    681     for (int x = 1 ; x < size ; ++x) {
    682         float scaled_x = (center - x - .5f) * invr;
    683         float gi = gaussianIntegral(scaled_x);
    684         profile[x] = 255 - (uint8_t) (255.f * gi);
    685     }
    686 
    687     *profile_out = profile;
    688 }
    689 
    690 // TODO MAYBE: Maintain a profile cache to avoid recomputing this for
    691 // commonly used radii.  Consider baking some of the most common blur radii
    692 // directly in as static data?
    693 
    694 // Implementation adapted from Michael Herf's approach:
    695 // http://stereopsis.com/shadowrect/
    696 
    697 static inline unsigned int profile_lookup( unsigned int *profile, int loc, int blurred_width, int sharp_width ) {
    698     int dx = SkAbs32(((loc << 1) + 1) - blurred_width) - sharp_width; // how far are we from the original edge?
    699     int ox = dx >> 1;
    700     if (ox < 0) {
    701         ox = 0;
    702     }
    703 
    704     return profile[ox];
    705 }
    706 
    707 bool SkBlurMask::BlurRect(SkMask *dst, const SkRect &src,
    708                           SkScalar provided_radius, Style style,
    709                           SkIPoint *margin, SkMask::CreateMode createMode) {
    710     int profile_size;
    711 
    712     float radius = SkScalarToFloat(SkScalarMul(provided_radius, kBlurRadiusFudgeFactor));
    713 
    714     // adjust blur radius to match interpretation from boxfilter code
    715     radius = (radius + .5f) * 2.f;
    716 
    717     profile_size = compute_profile_size(radius);
    718 
    719     int pad = profile_size/2;
    720     if (margin) {
    721         margin->set( pad, pad );
    722     }
    723 
    724     dst->fBounds.set(SkScalarRoundToInt(src.fLeft - pad),
    725                      SkScalarRoundToInt(src.fTop - pad),
    726                      SkScalarRoundToInt(src.fRight + pad),
    727                      SkScalarRoundToInt(src.fBottom + pad));
    728 
    729     dst->fRowBytes = dst->fBounds.width();
    730     dst->fFormat = SkMask::kA8_Format;
    731     dst->fImage = NULL;
    732 
    733     int             sw = SkScalarFloorToInt(src.width());
    734     int             sh = SkScalarFloorToInt(src.height());
    735 
    736     if (createMode == SkMask::kJustComputeBounds_CreateMode) {
    737         if (style == kInner_Style) {
    738             dst->fBounds.set(SkScalarRoundToInt(src.fLeft),
    739                              SkScalarRoundToInt(src.fTop),
    740                              SkScalarRoundToInt(src.fRight),
    741                              SkScalarRoundToInt(src.fBottom)); // restore trimmed bounds
    742             dst->fRowBytes = sw;
    743         }
    744         return true;
    745     }
    746     unsigned int *profile = NULL;
    747 
    748     compute_profile(radius, &profile);
    749     SkAutoTDeleteArray<unsigned int> ada(profile);
    750 
    751     size_t dstSize = dst->computeImageSize();
    752     if (0 == dstSize) {
    753         return false;   // too big to allocate, abort
    754     }
    755 
    756     uint8_t*        dp = SkMask::AllocImage(dstSize);
    757 
    758     dst->fImage = dp;
    759 
    760     int dstHeight = dst->fBounds.height();
    761     int dstWidth = dst->fBounds.width();
    762 
    763     // nearest odd number less than the profile size represents the center
    764     // of the (2x scaled) profile
    765     int center = ( profile_size & ~1 ) - 1;
    766 
    767     int w = sw - center;
    768     int h = sh - center;
    769 
    770     uint8_t *outptr = dp;
    771 
    772     SkAutoTMalloc<uint8_t> horizontalScanline(dstWidth);
    773 
    774     for (int x = 0 ; x < dstWidth ; ++x) {
    775         if (profile_size <= sw) {
    776             horizontalScanline[x] = profile_lookup(profile, x, dstWidth, w);
    777         } else {
    778             float span = float(sw)/radius;
    779             float giX = 1.5f - (x+.5f)/radius;
    780             horizontalScanline[x] = (uint8_t) (255 * (gaussianIntegral(giX) - gaussianIntegral(giX + span)));
    781         }
    782     }
    783 
    784     for (int y = 0 ; y < dstHeight ; ++y) {
    785         unsigned int profile_y;
    786         if (profile_size <= sh) {
    787             profile_y = profile_lookup(profile, y, dstHeight, h);
    788         } else {
    789             float span = float(sh)/radius;
    790             float giY = 1.5f - (y+.5f)/radius;
    791             profile_y = (uint8_t) (255 * (gaussianIntegral(giY) - gaussianIntegral(giY + span)));
    792         }
    793 
    794         for (int x = 0 ; x < dstWidth ; x++) {
    795             unsigned int maskval = SkMulDiv255Round(horizontalScanline[x], profile_y);
    796             *(outptr++) = maskval;
    797         }
    798     }
    799 
    800     if (style == kInner_Style) {
    801         // now we allocate the "real" dst, mirror the size of src
    802         size_t srcSize = (size_t)(src.width() * src.height());
    803         if (0 == srcSize) {
    804             return false;   // too big to allocate, abort
    805         }
    806         dst->fImage = SkMask::AllocImage(srcSize);
    807         for (int y = 0 ; y < sh ; y++) {
    808             uint8_t *blur_scanline = dp + (y+pad)*dstWidth + pad;
    809             uint8_t *inner_scanline = dst->fImage + y*sw;
    810             memcpy(inner_scanline, blur_scanline, sw);
    811         }
    812         SkMask::FreeImage(dp);
    813 
    814         dst->fBounds.set(SkScalarRoundToInt(src.fLeft),
    815                          SkScalarRoundToInt(src.fTop),
    816                          SkScalarRoundToInt(src.fRight),
    817                          SkScalarRoundToInt(src.fBottom)); // restore trimmed bounds
    818         dst->fRowBytes = sw;
    819 
    820     } else if (style == kOuter_Style) {
    821         for (int y = pad ; y < dstHeight-pad ; y++) {
    822             uint8_t *dst_scanline = dp + y*dstWidth + pad;
    823             memset(dst_scanline, 0, sw);
    824         }
    825     } else if (style == kSolid_Style) {
    826         for (int y = pad ; y < dstHeight-pad ; y++) {
    827             uint8_t *dst_scanline = dp + y*dstWidth + pad;
    828             memset(dst_scanline, 0xff, sw);
    829         }
    830     }
    831     // normal and solid styles are the same for analytic rect blurs, so don't
    832     // need to handle solid specially.
    833 
    834     return true;
    835 }
    836 
    837 // The "simple" blur is a direct implementation of separable convolution with a discrete
    838 // gaussian kernel.  It's "ground truth" in a sense; too slow to be used, but very
    839 // useful for correctness comparisons.
    840 
    841 bool SkBlurMask::BlurGroundTruth(SkMask* dst, const SkMask& src, SkScalar provided_radius,
    842                             Style style, SkIPoint* margin) {
    843 
    844     if (src.fFormat != SkMask::kA8_Format) {
    845         return false;
    846     }
    847 
    848     float radius = SkScalarToFloat(SkScalarMul(provided_radius, kBlurRadiusFudgeFactor));
    849     float stddev = SkScalarToFloat(radius) /2.0f;
    850     float variance = stddev * stddev;
    851 
    852     int windowSize = SkScalarCeil(stddev*4);
    853     // round window size up to nearest odd number
    854     windowSize |= 1;
    855 
    856     SkAutoTMalloc<float> gaussWindow(windowSize);
    857 
    858     int halfWindow = windowSize >> 1;
    859 
    860     gaussWindow[halfWindow] = 1;
    861 
    862     float windowSum = 1;
    863     for (int x = 1 ; x <= halfWindow ; ++x) {
    864         float gaussian = expf(-x*x / variance);
    865         gaussWindow[halfWindow + x] = gaussWindow[halfWindow-x] = gaussian;
    866         windowSum += 2*gaussian;
    867     }
    868 
    869     // leave the filter un-normalized for now; we will divide by the normalization
    870     // sum later;
    871 
    872     int pad = halfWindow;
    873     if (margin) {
    874         margin->set( pad, pad );
    875     }
    876 
    877     dst->fBounds = src.fBounds;
    878     dst->fBounds.outset(pad, pad);
    879 
    880     dst->fRowBytes = dst->fBounds.width();
    881     dst->fFormat = SkMask::kA8_Format;
    882     dst->fImage = NULL;
    883 
    884     if (src.fImage) {
    885 
    886         size_t dstSize = dst->computeImageSize();
    887         if (0 == dstSize) {
    888             return false;   // too big to allocate, abort
    889         }
    890 
    891         int             srcWidth = src.fBounds.width();
    892         int             srcHeight = src.fBounds.height();
    893         int             dstWidth = dst->fBounds.width();
    894 
    895         const uint8_t*  srcPixels = src.fImage;
    896         uint8_t*        dstPixels = SkMask::AllocImage(dstSize);
    897         SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dstPixels);
    898 
    899         // do the actual blur.  First, make a padded copy of the source.
    900         // use double pad so we never have to check if we're outside anything
    901 
    902         int padWidth = srcWidth + 4*pad;
    903         int padHeight = srcHeight;
    904         int padSize = padWidth * padHeight;
    905 
    906         SkAutoTMalloc<uint8_t> padPixels(padSize);
    907         memset(padPixels, 0, padSize);
    908 
    909         for (int y = 0 ; y < srcHeight; ++y) {
    910             uint8_t* padptr = padPixels + y * padWidth + 2*pad;
    911             const uint8_t* srcptr = srcPixels + y * srcWidth;
    912             memcpy(padptr, srcptr, srcWidth);
    913         }
    914 
    915         // blur in X, transposing the result into a temporary floating point buffer.
    916         // also double-pad the intermediate result so that the second blur doesn't
    917         // have to do extra conditionals.
    918 
    919         int tmpWidth = padHeight + 4*pad;
    920         int tmpHeight = padWidth - 2*pad;
    921         int tmpSize = tmpWidth * tmpHeight;
    922 
    923         SkAutoTMalloc<float> tmpImage(tmpSize);
    924         memset(tmpImage, 0, tmpSize*sizeof(tmpImage[0]));
    925 
    926         for (int y = 0 ; y < padHeight ; ++y) {
    927             uint8_t *srcScanline = padPixels + y*padWidth;
    928             for (int x = pad ; x < padWidth - pad ; ++x) {
    929                 float *outPixel = tmpImage + (x-pad)*tmpWidth + y + 2*pad; // transposed output
    930                 uint8_t *windowCenter = srcScanline + x;
    931                 for (int i = -pad ; i <= pad ; ++i) {
    932                     *outPixel += gaussWindow[pad+i]*windowCenter[i];
    933                 }
    934                 *outPixel /= windowSum;
    935             }
    936         }
    937 
    938         // blur in Y; now filling in the actual desired destination.  We have to do
    939         // the transpose again; these transposes guarantee that we read memory in
    940         // linear order.
    941 
    942         for (int y = 0 ; y < tmpHeight ; ++y) {
    943             float *srcScanline = tmpImage + y*tmpWidth;
    944             for (int x = pad ; x < tmpWidth - pad ; ++x) {
    945                 float *windowCenter = srcScanline + x;
    946                 float finalValue = 0;
    947                 for (int i = -pad ; i <= pad ; ++i) {
    948                     finalValue += gaussWindow[pad+i]*windowCenter[i];
    949                 }
    950                 finalValue /= windowSum;
    951                 uint8_t *outPixel = dstPixels + (x-pad)*dstWidth + y; // transposed output
    952                 int integerPixel = int(finalValue + 0.5f);
    953                 *outPixel = SkClampMax( SkClampPos(integerPixel), 255 );
    954             }
    955         }
    956 
    957         dst->fImage = dstPixels;
    958         // if need be, alloc the "real" dst (same size as src) and copy/merge
    959         // the blur into it (applying the src)
    960         if (style == kInner_Style) {
    961             // now we allocate the "real" dst, mirror the size of src
    962             size_t srcSize = src.computeImageSize();
    963             if (0 == srcSize) {
    964                 return false;   // too big to allocate, abort
    965             }
    966             dst->fImage = SkMask::AllocImage(srcSize);
    967             merge_src_with_blur(dst->fImage, src.fRowBytes,
    968                 srcPixels, src.fRowBytes,
    969                 dstPixels + pad*dst->fRowBytes + pad,
    970                 dst->fRowBytes, srcWidth, srcHeight);
    971             SkMask::FreeImage(dstPixels);
    972         } else if (style != kNormal_Style) {
    973             clamp_with_orig(dstPixels + pad*dst->fRowBytes + pad,
    974                 dst->fRowBytes, srcPixels, src.fRowBytes, srcWidth, srcHeight, style);
    975         }
    976         (void)autoCall.detach();
    977     }
    978 
    979     if (style == kInner_Style) {
    980         dst->fBounds = src.fBounds; // restore trimmed bounds
    981         dst->fRowBytes = src.fRowBytes;
    982     }
    983 
    984     return true;
    985 }
    986