Home | History | Annotate | Download | only in effects
      1 /*
      2  * Copyright 2006 The Android Open Source Project
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 
      9 #include "SkBlurMask.h"
     10 #include "SkMath.h"
     11 #include "SkTemplates.h"
     12 #include "SkEndian.h"
     13 
     14 
     15 // This constant approximates the scaling done in the software path's
     16 // "high quality" mode, in SkBlurMask::Blur() (1 / sqrt(3)).
     17 // IMHO, it actually should be 1:  we blur "less" than we should do
     18 // according to the CSS and canvas specs, simply because Safari does the same.
     19 // Firefox used to do the same too, until 4.0 where they fixed it.  So at some
     20 // point we should probably get rid of these scaling constants and rebaseline
     21 // all the blur tests.
     22 static const SkScalar kBLUR_SIGMA_SCALE = 0.57735f;
     23 
     24 SkScalar SkBlurMask::ConvertRadiusToSigma(SkScalar radius) {
     25     return radius > 0 ? kBLUR_SIGMA_SCALE * radius + 0.5f : 0.0f;
     26 }
     27 
     28 SkScalar SkBlurMask::ConvertSigmaToRadius(SkScalar sigma) {
     29     return sigma > 0.5f ? (sigma - 0.5f) / kBLUR_SIGMA_SCALE : 0.0f;
     30 }
     31 
     32 #define UNROLL_SEPARABLE_LOOPS
     33 
     34 /**
     35  * This function performs a box blur in X, of the given radius.  If the
     36  * "transpose" parameter is true, it will transpose the pixels on write,
     37  * such that X and Y are swapped. Reads are always performed from contiguous
     38  * memory in X, for speed. The destination buffer (dst) must be at least
     39  * (width + leftRadius + rightRadius) * height bytes in size.
     40  *
     41  * This is what the inner loop looks like before unrolling, and with the two
     42  * cases broken out separately (width < diameter, width >= diameter):
     43  *
     44  *      if (width < diameter) {
     45  *          for (int x = 0; x < width; ++x) {
     46  *              sum += *right++;
     47  *              *dptr = (sum * scale + half) >> 24;
     48  *              dptr += dst_x_stride;
     49  *          }
     50  *          for (int x = width; x < diameter; ++x) {
     51  *              *dptr = (sum * scale + half) >> 24;
     52  *              dptr += dst_x_stride;
     53  *          }
     54  *          for (int x = 0; x < width; ++x) {
     55  *              *dptr = (sum * scale + half) >> 24;
     56  *              sum -= *left++;
     57  *              dptr += dst_x_stride;
     58  *          }
     59  *      } else {
     60  *          for (int x = 0; x < diameter; ++x) {
     61  *              sum += *right++;
     62  *              *dptr = (sum * scale + half) >> 24;
     63  *              dptr += dst_x_stride;
     64  *          }
     65  *          for (int x = diameter; x < width; ++x) {
     66  *              sum += *right++;
     67  *              *dptr = (sum * scale + half) >> 24;
     68  *              sum -= *left++;
     69  *              dptr += dst_x_stride;
     70  *          }
     71  *          for (int x = 0; x < diameter; ++x) {
     72  *              *dptr = (sum * scale + half) >> 24;
     73  *              sum -= *left++;
     74  *              dptr += dst_x_stride;
     75  *          }
     76  *      }
     77  */
     78 static int boxBlur(const uint8_t* src, int src_y_stride, uint8_t* dst,
     79                    int leftRadius, int rightRadius, int width, int height,
     80                    bool transpose)
     81 {
     82     int diameter = leftRadius + rightRadius;
     83     int kernelSize = diameter + 1;
     84     int border = SkMin32(width, diameter);
     85     uint32_t scale = (1 << 24) / kernelSize;
     86     int new_width = width + SkMax32(leftRadius, rightRadius) * 2;
     87     int dst_x_stride = transpose ? height : 1;
     88     int dst_y_stride = transpose ? 1 : new_width;
     89     uint32_t half = 1 << 23;
     90     for (int y = 0; y < height; ++y) {
     91         uint32_t sum = 0;
     92         uint8_t* dptr = dst + y * dst_y_stride;
     93         const uint8_t* right = src + y * src_y_stride;
     94         const uint8_t* left = right;
     95         for (int x = 0; x < rightRadius - leftRadius; x++) {
     96             *dptr = 0;
     97             dptr += dst_x_stride;
     98         }
     99 #define LEFT_BORDER_ITER \
    100             sum += *right++; \
    101             *dptr = (sum * scale + half) >> 24; \
    102             dptr += dst_x_stride;
    103 
    104         int x = 0;
    105 #ifdef UNROLL_SEPARABLE_LOOPS
    106         for (; x < border - 16; x += 16) {
    107             LEFT_BORDER_ITER
    108             LEFT_BORDER_ITER
    109             LEFT_BORDER_ITER
    110             LEFT_BORDER_ITER
    111             LEFT_BORDER_ITER
    112             LEFT_BORDER_ITER
    113             LEFT_BORDER_ITER
    114             LEFT_BORDER_ITER
    115             LEFT_BORDER_ITER
    116             LEFT_BORDER_ITER
    117             LEFT_BORDER_ITER
    118             LEFT_BORDER_ITER
    119             LEFT_BORDER_ITER
    120             LEFT_BORDER_ITER
    121             LEFT_BORDER_ITER
    122             LEFT_BORDER_ITER
    123         }
    124 #endif
    125         for (; x < border; ++x) {
    126             LEFT_BORDER_ITER
    127         }
    128 #undef LEFT_BORDER_ITER
    129 #define TRIVIAL_ITER \
    130             *dptr = (sum * scale + half) >> 24; \
    131             dptr += dst_x_stride;
    132         x = width;
    133 #ifdef UNROLL_SEPARABLE_LOOPS
    134         for (; x < diameter - 16; x += 16) {
    135             TRIVIAL_ITER
    136             TRIVIAL_ITER
    137             TRIVIAL_ITER
    138             TRIVIAL_ITER
    139             TRIVIAL_ITER
    140             TRIVIAL_ITER
    141             TRIVIAL_ITER
    142             TRIVIAL_ITER
    143             TRIVIAL_ITER
    144             TRIVIAL_ITER
    145             TRIVIAL_ITER
    146             TRIVIAL_ITER
    147             TRIVIAL_ITER
    148             TRIVIAL_ITER
    149             TRIVIAL_ITER
    150             TRIVIAL_ITER
    151         }
    152 #endif
    153         for (; x < diameter; ++x) {
    154             TRIVIAL_ITER
    155         }
    156 #undef TRIVIAL_ITER
    157 #define CENTER_ITER \
    158             sum += *right++; \
    159             *dptr = (sum * scale + half) >> 24; \
    160             sum -= *left++; \
    161             dptr += dst_x_stride;
    162 
    163         x = diameter;
    164 #ifdef UNROLL_SEPARABLE_LOOPS
    165         for (; x < width - 16; x += 16) {
    166             CENTER_ITER
    167             CENTER_ITER
    168             CENTER_ITER
    169             CENTER_ITER
    170             CENTER_ITER
    171             CENTER_ITER
    172             CENTER_ITER
    173             CENTER_ITER
    174             CENTER_ITER
    175             CENTER_ITER
    176             CENTER_ITER
    177             CENTER_ITER
    178             CENTER_ITER
    179             CENTER_ITER
    180             CENTER_ITER
    181             CENTER_ITER
    182         }
    183 #endif
    184         for (; x < width; ++x) {
    185             CENTER_ITER
    186         }
    187 #undef CENTER_ITER
    188 #define RIGHT_BORDER_ITER \
    189             *dptr = (sum * scale + half) >> 24; \
    190             sum -= *left++; \
    191             dptr += dst_x_stride;
    192 
    193         x = 0;
    194 #ifdef UNROLL_SEPARABLE_LOOPS
    195         for (; x < border - 16; x += 16) {
    196             RIGHT_BORDER_ITER
    197             RIGHT_BORDER_ITER
    198             RIGHT_BORDER_ITER
    199             RIGHT_BORDER_ITER
    200             RIGHT_BORDER_ITER
    201             RIGHT_BORDER_ITER
    202             RIGHT_BORDER_ITER
    203             RIGHT_BORDER_ITER
    204             RIGHT_BORDER_ITER
    205             RIGHT_BORDER_ITER
    206             RIGHT_BORDER_ITER
    207             RIGHT_BORDER_ITER
    208             RIGHT_BORDER_ITER
    209             RIGHT_BORDER_ITER
    210             RIGHT_BORDER_ITER
    211             RIGHT_BORDER_ITER
    212         }
    213 #endif
    214         for (; x < border; ++x) {
    215             RIGHT_BORDER_ITER
    216         }
    217 #undef RIGHT_BORDER_ITER
    218         for (int x = 0; x < leftRadius - rightRadius; ++x) {
    219             *dptr = 0;
    220             dptr += dst_x_stride;
    221         }
    222         SkASSERT(sum == 0);
    223     }
    224     return new_width;
    225 }
    226 
    227 /**
    228  * This variant of the box blur handles blurring of non-integer radii.  It
    229  * keeps two running sums: an outer sum for the rounded-up kernel radius, and
    230  * an inner sum for the rounded-down kernel radius.  For each pixel, it linearly
    231  * interpolates between them.  In float this would be:
    232  *  outer_weight * outer_sum / kernelSize +
    233  *  (1.0 - outer_weight) * innerSum / (kernelSize - 2)
    234  *
    235  * This is what the inner loop looks like before unrolling, and with the two
    236  * cases broken out separately (width < diameter, width >= diameter):
    237  *
    238  *      if (width < diameter) {
    239  *          for (int x = 0; x < width; x++) {
    240  *              inner_sum = outer_sum;
    241  *              outer_sum += *right++;
    242  *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
    243  *              dptr += dst_x_stride;
    244  *          }
    245  *          for (int x = width; x < diameter; ++x) {
    246  *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
    247  *              dptr += dst_x_stride;
    248  *          }
    249  *          for (int x = 0; x < width; x++) {
    250  *              inner_sum = outer_sum - *left++;
    251  *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
    252  *              dptr += dst_x_stride;
    253  *              outer_sum = inner_sum;
    254  *          }
    255  *      } else {
    256  *          for (int x = 0; x < diameter; x++) {
    257  *              inner_sum = outer_sum;
    258  *              outer_sum += *right++;
    259  *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
    260  *              dptr += dst_x_stride;
    261  *          }
    262  *          for (int x = diameter; x < width; ++x) {
    263  *              inner_sum = outer_sum - *left;
    264  *              outer_sum += *right++;
    265  *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
    266  *              dptr += dst_x_stride;
    267  *              outer_sum -= *left++;
    268  *          }
    269  *          for (int x = 0; x < diameter; x++) {
    270  *              inner_sum = outer_sum - *left++;
    271  *              *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
    272  *              dptr += dst_x_stride;
    273  *              outer_sum = inner_sum;
    274  *          }
    275  *      }
    276  *  }
    277  *  return new_width;
    278  */
    279 
    280 static int boxBlurInterp(const uint8_t* src, int src_y_stride, uint8_t* dst,
    281                          int radius, int width, int height,
    282                          bool transpose, uint8_t outer_weight)
    283 {
    284     int diameter = radius * 2;
    285     int kernelSize = diameter + 1;
    286     int border = SkMin32(width, diameter);
    287     int inner_weight = 255 - outer_weight;
    288     outer_weight += outer_weight >> 7;
    289     inner_weight += inner_weight >> 7;
    290     uint32_t outer_scale = (outer_weight << 16) / kernelSize;
    291     uint32_t inner_scale = (inner_weight << 16) / (kernelSize - 2);
    292     uint32_t half = 1 << 23;
    293     int new_width = width + diameter;
    294     int dst_x_stride = transpose ? height : 1;
    295     int dst_y_stride = transpose ? 1 : new_width;
    296     for (int y = 0; y < height; ++y) {
    297         uint32_t outer_sum = 0, inner_sum = 0;
    298         uint8_t* dptr = dst + y * dst_y_stride;
    299         const uint8_t* right = src + y * src_y_stride;
    300         const uint8_t* left = right;
    301         int x = 0;
    302 
    303 #define LEFT_BORDER_ITER \
    304             inner_sum = outer_sum; \
    305             outer_sum += *right++; \
    306             *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
    307             dptr += dst_x_stride;
    308 
    309 #ifdef UNROLL_SEPARABLE_LOOPS
    310         for (;x < border - 16; x += 16) {
    311             LEFT_BORDER_ITER
    312             LEFT_BORDER_ITER
    313             LEFT_BORDER_ITER
    314             LEFT_BORDER_ITER
    315             LEFT_BORDER_ITER
    316             LEFT_BORDER_ITER
    317             LEFT_BORDER_ITER
    318             LEFT_BORDER_ITER
    319             LEFT_BORDER_ITER
    320             LEFT_BORDER_ITER
    321             LEFT_BORDER_ITER
    322             LEFT_BORDER_ITER
    323             LEFT_BORDER_ITER
    324             LEFT_BORDER_ITER
    325             LEFT_BORDER_ITER
    326             LEFT_BORDER_ITER
    327         }
    328 #endif
    329 
    330         for (;x < border; ++x) {
    331             LEFT_BORDER_ITER
    332         }
    333 #undef LEFT_BORDER_ITER
    334         for (int x = width; x < diameter; ++x) {
    335             *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24;
    336             dptr += dst_x_stride;
    337         }
    338         x = diameter;
    339 
    340 #define CENTER_ITER \
    341             inner_sum = outer_sum - *left; \
    342             outer_sum += *right++; \
    343             *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
    344             dptr += dst_x_stride; \
    345             outer_sum -= *left++;
    346 
    347 #ifdef UNROLL_SEPARABLE_LOOPS
    348         for (; x < width - 16; x += 16) {
    349             CENTER_ITER
    350             CENTER_ITER
    351             CENTER_ITER
    352             CENTER_ITER
    353             CENTER_ITER
    354             CENTER_ITER
    355             CENTER_ITER
    356             CENTER_ITER
    357             CENTER_ITER
    358             CENTER_ITER
    359             CENTER_ITER
    360             CENTER_ITER
    361             CENTER_ITER
    362             CENTER_ITER
    363             CENTER_ITER
    364             CENTER_ITER
    365         }
    366 #endif
    367         for (; x < width; ++x) {
    368             CENTER_ITER
    369         }
    370 #undef CENTER_ITER
    371 
    372         #define RIGHT_BORDER_ITER \
    373             inner_sum = outer_sum - *left++; \
    374             *dptr = (outer_sum * outer_scale + inner_sum * inner_scale + half) >> 24; \
    375             dptr += dst_x_stride; \
    376             outer_sum = inner_sum;
    377 
    378         x = 0;
    379 #ifdef UNROLL_SEPARABLE_LOOPS
    380         for (; x < border - 16; x += 16) {
    381             RIGHT_BORDER_ITER
    382             RIGHT_BORDER_ITER
    383             RIGHT_BORDER_ITER
    384             RIGHT_BORDER_ITER
    385             RIGHT_BORDER_ITER
    386             RIGHT_BORDER_ITER
    387             RIGHT_BORDER_ITER
    388             RIGHT_BORDER_ITER
    389             RIGHT_BORDER_ITER
    390             RIGHT_BORDER_ITER
    391             RIGHT_BORDER_ITER
    392             RIGHT_BORDER_ITER
    393             RIGHT_BORDER_ITER
    394             RIGHT_BORDER_ITER
    395             RIGHT_BORDER_ITER
    396             RIGHT_BORDER_ITER
    397         }
    398 #endif
    399         for (; x < border; ++x) {
    400             RIGHT_BORDER_ITER
    401         }
    402 #undef RIGHT_BORDER_ITER
    403         SkASSERT(outer_sum == 0 && inner_sum == 0);
    404     }
    405     return new_width;
    406 }
    407 
    408 static void get_adjusted_radii(SkScalar passRadius, int *loRadius, int *hiRadius)
    409 {
    410     *loRadius = *hiRadius = SkScalarCeilToInt(passRadius);
    411     if (SkIntToScalar(*hiRadius) - passRadius > 0.5f) {
    412         *loRadius = *hiRadius - 1;
    413     }
    414 }
    415 
    416 #include "SkColorPriv.h"
    417 
    418 static void merge_src_with_blur(uint8_t dst[], int dstRB,
    419                                 const uint8_t src[], int srcRB,
    420                                 const uint8_t blur[], int blurRB,
    421                                 int sw, int sh) {
    422     dstRB -= sw;
    423     srcRB -= sw;
    424     blurRB -= sw;
    425     while (--sh >= 0) {
    426         for (int x = sw - 1; x >= 0; --x) {
    427             *dst = SkToU8(SkAlphaMul(*blur, SkAlpha255To256(*src)));
    428             dst += 1;
    429             src += 1;
    430             blur += 1;
    431         }
    432         dst += dstRB;
    433         src += srcRB;
    434         blur += blurRB;
    435     }
    436 }
    437 
    438 static void clamp_with_orig(uint8_t dst[], int dstRowBytes,
    439                             const uint8_t src[], int srcRowBytes,
    440                             int sw, int sh,
    441                             SkBlurStyle style) {
    442     int x;
    443     while (--sh >= 0) {
    444         switch (style) {
    445         case kSolid_SkBlurStyle:
    446             for (x = sw - 1; x >= 0; --x) {
    447                 int s = *src;
    448                 int d = *dst;
    449                 *dst = SkToU8(s + d - SkMulDiv255Round(s, d));
    450                 dst += 1;
    451                 src += 1;
    452             }
    453             break;
    454         case kOuter_SkBlurStyle:
    455             for (x = sw - 1; x >= 0; --x) {
    456                 if (*src) {
    457                     *dst = SkToU8(SkAlphaMul(*dst, SkAlpha255To256(255 - *src)));
    458                 }
    459                 dst += 1;
    460                 src += 1;
    461             }
    462             break;
    463         default:
    464             SkDEBUGFAIL("Unexpected blur style here");
    465             break;
    466         }
    467         dst += dstRowBytes - sw;
    468         src += srcRowBytes - sw;
    469     }
    470 }
    471 
    472 ///////////////////////////////////////////////////////////////////////////////
    473 
    474 // we use a local function to wrap the class static method to work around
    475 // a bug in gcc98
    476 void SkMask_FreeImage(uint8_t* image);
    477 void SkMask_FreeImage(uint8_t* image) {
    478     SkMask::FreeImage(image);
    479 }
    480 
    481 bool SkBlurMask::BoxBlur(SkMask* dst, const SkMask& src,
    482                          SkScalar sigma, SkBlurStyle style, SkBlurQuality quality,
    483                          SkIPoint* margin, bool force_quality) {
    484 
    485     if (src.fFormat != SkMask::kA8_Format) {
    486         return false;
    487     }
    488 
    489     // Force high quality off for small radii (performance)
    490     if (!force_quality && sigma <= SkIntToScalar(2)) {
    491         quality = kLow_SkBlurQuality;
    492     }
    493 
    494     SkScalar passRadius;
    495     if (kHigh_SkBlurQuality == quality) {
    496         // For the high quality path the 3 pass box blur kernel width is
    497         // 6*rad+1 while the full Gaussian width is 6*sigma.
    498         passRadius = sigma - (1/6.0f);
    499     } else {
    500         // For the low quality path we only attempt to cover 3*sigma of the
    501         // Gaussian blur area (1.5*sigma on each side). The single pass box
    502         // blur's kernel size is 2*rad+1.
    503         passRadius = 1.5f*sigma - 0.5f;
    504     }
    505 
    506     // highQuality: use three box blur passes as a cheap way
    507     // to approximate a Gaussian blur
    508     int passCount = (kHigh_SkBlurQuality == quality) ? 3 : 1;
    509 
    510     int rx = SkScalarCeilToInt(passRadius);
    511     int outerWeight = 255 - SkScalarRoundToInt((SkIntToScalar(rx) - passRadius) * 255);
    512 
    513     SkASSERT(rx >= 0);
    514     SkASSERT((unsigned)outerWeight <= 255);
    515     if (rx <= 0) {
    516         return false;
    517     }
    518 
    519     int ry = rx;    // only do square blur for now
    520 
    521     int padx = passCount * rx;
    522     int pady = passCount * ry;
    523 
    524     if (margin) {
    525         margin->set(padx, pady);
    526     }
    527     dst->fBounds.set(src.fBounds.fLeft - padx, src.fBounds.fTop - pady,
    528                      src.fBounds.fRight + padx, src.fBounds.fBottom + pady);
    529 
    530     dst->fRowBytes = dst->fBounds.width();
    531     dst->fFormat = SkMask::kA8_Format;
    532     dst->fImage = nullptr;
    533 
    534     if (src.fImage) {
    535         size_t dstSize = dst->computeImageSize();
    536         if (0 == dstSize) {
    537             return false;   // too big to allocate, abort
    538         }
    539 
    540         int             sw = src.fBounds.width();
    541         int             sh = src.fBounds.height();
    542         const uint8_t*  sp = src.fImage;
    543         uint8_t*        dp = SkMask::AllocImage(dstSize);
    544         SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dp);
    545 
    546         // build the blurry destination
    547         SkAutoTMalloc<uint8_t>  tmpBuffer(dstSize);
    548         uint8_t*                tp = tmpBuffer.get();
    549         int w = sw, h = sh;
    550 
    551         if (outerWeight == 255) {
    552             int loRadius, hiRadius;
    553             get_adjusted_radii(passRadius, &loRadius, &hiRadius);
    554             if (kHigh_SkBlurQuality == quality) {
    555                 // Do three X blurs, with a transpose on the final one.
    556                 w = boxBlur(sp, src.fRowBytes, tp, loRadius, hiRadius, w, h, false);
    557                 w = boxBlur(tp, w,             dp, hiRadius, loRadius, w, h, false);
    558                 w = boxBlur(dp, w,             tp, hiRadius, hiRadius, w, h, true);
    559                 // Do three Y blurs, with a transpose on the final one.
    560                 h = boxBlur(tp, h,             dp, loRadius, hiRadius, h, w, false);
    561                 h = boxBlur(dp, h,             tp, hiRadius, loRadius, h, w, false);
    562                 h = boxBlur(tp, h,             dp, hiRadius, hiRadius, h, w, true);
    563             } else {
    564                 w = boxBlur(sp, src.fRowBytes, tp, rx, rx, w, h, true);
    565                 h = boxBlur(tp, h,             dp, ry, ry, h, w, true);
    566             }
    567         } else {
    568             if (kHigh_SkBlurQuality == quality) {
    569                 // Do three X blurs, with a transpose on the final one.
    570                 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, false, outerWeight);
    571                 w = boxBlurInterp(tp, w,             dp, rx, w, h, false, outerWeight);
    572                 w = boxBlurInterp(dp, w,             tp, rx, w, h, true, outerWeight);
    573                 // Do three Y blurs, with a transpose on the final one.
    574                 h = boxBlurInterp(tp, h,             dp, ry, h, w, false, outerWeight);
    575                 h = boxBlurInterp(dp, h,             tp, ry, h, w, false, outerWeight);
    576                 h = boxBlurInterp(tp, h,             dp, ry, h, w, true, outerWeight);
    577             } else {
    578                 w = boxBlurInterp(sp, src.fRowBytes, tp, rx, w, h, true, outerWeight);
    579                 h = boxBlurInterp(tp, h,             dp, ry, h, w, true, outerWeight);
    580             }
    581         }
    582 
    583         dst->fImage = dp;
    584         // if need be, alloc the "real" dst (same size as src) and copy/merge
    585         // the blur into it (applying the src)
    586         if (style == kInner_SkBlurStyle) {
    587             // now we allocate the "real" dst, mirror the size of src
    588             size_t srcSize = src.computeImageSize();
    589             if (0 == srcSize) {
    590                 return false;   // too big to allocate, abort
    591             }
    592             dst->fImage = SkMask::AllocImage(srcSize);
    593             merge_src_with_blur(dst->fImage, src.fRowBytes,
    594                                 sp, src.fRowBytes,
    595                                 dp + passCount * (rx + ry * dst->fRowBytes),
    596                                 dst->fRowBytes, sw, sh);
    597             SkMask::FreeImage(dp);
    598         } else if (style != kNormal_SkBlurStyle) {
    599             clamp_with_orig(dp + passCount * (rx + ry * dst->fRowBytes),
    600                             dst->fRowBytes, sp, src.fRowBytes, sw, sh, style);
    601         }
    602         (void)autoCall.release();
    603     }
    604 
    605     if (style == kInner_SkBlurStyle) {
    606         dst->fBounds = src.fBounds; // restore trimmed bounds
    607         dst->fRowBytes = src.fRowBytes;
    608     }
    609 
    610     return true;
    611 }
    612 
    613 /* Convolving a box with itself three times results in a piecewise
    614    quadratic function:
    615 
    616    0                              x <= -1.5
    617    9/8 + 3/2 x + 1/2 x^2   -1.5 < x <= -.5
    618    3/4 - x^2                -.5 < x <= .5
    619    9/8 - 3/2 x + 1/2 x^2    0.5 < x <= 1.5
    620    0                        1.5 < x
    621 
    622    Mathematica:
    623 
    624    g[x_] := Piecewise [ {
    625      {9/8 + 3/2 x + 1/2 x^2 ,  -1.5 < x <= -.5},
    626      {3/4 - x^2             ,   -.5 < x <= .5},
    627      {9/8 - 3/2 x + 1/2 x^2 ,   0.5 < x <= 1.5}
    628    }, 0]
    629 
    630    To get the profile curve of the blurred step function at the rectangle
    631    edge, we evaluate the indefinite integral, which is piecewise cubic:
    632 
    633    0                                        x <= -1.5
    634    9/16 + 9/8 x + 3/4 x^2 + 1/6 x^3   -1.5 < x <= -0.5
    635    1/2 + 3/4 x - 1/3 x^3              -.5 < x <= .5
    636    7/16 + 9/8 x - 3/4 x^2 + 1/6 x^3     .5 < x <= 1.5
    637    1                                  1.5 < x
    638 
    639    in Mathematica code:
    640 
    641    gi[x_] := Piecewise[ {
    642      { 0 , x <= -1.5 },
    643      { 9/16 + 9/8 x + 3/4 x^2 + 1/6 x^3, -1.5 < x <= -0.5 },
    644      { 1/2 + 3/4 x - 1/3 x^3          ,  -.5 < x <= .5},
    645      { 7/16 + 9/8 x - 3/4 x^2 + 1/6 x^3,   .5 < x <= 1.5}
    646    },1]
    647 */
    648 
    649 static float gaussianIntegral(float x) {
    650     if (x > 1.5f) {
    651         return 0.0f;
    652     }
    653     if (x < -1.5f) {
    654         return 1.0f;
    655     }
    656 
    657     float x2 = x*x;
    658     float x3 = x2*x;
    659 
    660     if ( x > 0.5f ) {
    661         return 0.5625f - (x3 / 6.0f - 3.0f * x2 * 0.25f + 1.125f * x);
    662     }
    663     if ( x > -0.5f ) {
    664         return 0.5f - (0.75f * x - x3 / 3.0f);
    665     }
    666     return 0.4375f + (-x3 / 6.0f - 3.0f * x2 * 0.25f - 1.125f * x);
    667 }
    668 
    669 /*  ComputeBlurProfile allocates and fills in an array of floating
    670     point values between 0 and 255 for the profile signature of
    671     a blurred half-plane with the given blur radius.  Since we're
    672     going to be doing screened multiplications (i.e., 1 - (1-x)(1-y))
    673     all the time, we actually fill in the profile pre-inverted
    674     (already done 255-x).
    675 
    676     It's the responsibility of the caller to delete the
    677     memory returned in profile_out.
    678 */
    679 
    680 uint8_t* SkBlurMask::ComputeBlurProfile(SkScalar sigma) {
    681     int size = SkScalarCeilToInt(6*sigma);
    682 
    683     int center = size >> 1;
    684     uint8_t* profile = new uint8_t[size];
    685 
    686     float invr = 1.f/(2*sigma);
    687 
    688     profile[0] = 255;
    689     for (int x = 1 ; x < size ; ++x) {
    690         float scaled_x = (center - x - .5f) * invr;
    691         float gi = gaussianIntegral(scaled_x);
    692         profile[x] = 255 - (uint8_t) (255.f * gi);
    693     }
    694 
    695     return profile;
    696 }
    697 
    698 // TODO MAYBE: Maintain a profile cache to avoid recomputing this for
    699 // commonly used radii.  Consider baking some of the most common blur radii
    700 // directly in as static data?
    701 
    702 // Implementation adapted from Michael Herf's approach:
    703 // http://stereopsis.com/shadowrect/
    704 
    705 uint8_t SkBlurMask::ProfileLookup(const uint8_t *profile, int loc, int blurred_width, int sharp_width) {
    706     int dx = SkAbs32(((loc << 1) + 1) - blurred_width) - sharp_width; // how far are we from the original edge?
    707     int ox = dx >> 1;
    708     if (ox < 0) {
    709         ox = 0;
    710     }
    711 
    712     return profile[ox];
    713 }
    714 
    715 void SkBlurMask::ComputeBlurredScanline(uint8_t *pixels, const uint8_t *profile,
    716                                         unsigned int width, SkScalar sigma) {
    717 
    718     unsigned int profile_size = SkScalarCeilToInt(6*sigma);
    719     SkAutoTMalloc<uint8_t> horizontalScanline(width);
    720 
    721     unsigned int sw = width - profile_size;
    722     // nearest odd number less than the profile size represents the center
    723     // of the (2x scaled) profile
    724     int center = ( profile_size & ~1 ) - 1;
    725 
    726     int w = sw - center;
    727 
    728     for (unsigned int x = 0 ; x < width ; ++x) {
    729        if (profile_size <= sw) {
    730            pixels[x] = ProfileLookup(profile, x, width, w);
    731        } else {
    732            float span = float(sw)/(2*sigma);
    733            float giX = 1.5f - (x+.5f)/(2*sigma);
    734            pixels[x] = (uint8_t) (255 * (gaussianIntegral(giX) - gaussianIntegral(giX + span)));
    735        }
    736     }
    737 }
    738 
    739 bool SkBlurMask::BlurRect(SkScalar sigma, SkMask *dst,
    740                           const SkRect &src, SkBlurStyle style,
    741                           SkIPoint *margin, SkMask::CreateMode createMode) {
    742     int profile_size = SkScalarCeilToInt(6*sigma);
    743 
    744     int pad = profile_size/2;
    745     if (margin) {
    746         margin->set( pad, pad );
    747     }
    748 
    749     dst->fBounds.set(SkScalarRoundToInt(src.fLeft - pad),
    750                      SkScalarRoundToInt(src.fTop - pad),
    751                      SkScalarRoundToInt(src.fRight + pad),
    752                      SkScalarRoundToInt(src.fBottom + pad));
    753 
    754     dst->fRowBytes = dst->fBounds.width();
    755     dst->fFormat = SkMask::kA8_Format;
    756     dst->fImage = nullptr;
    757 
    758     int             sw = SkScalarFloorToInt(src.width());
    759     int             sh = SkScalarFloorToInt(src.height());
    760 
    761     if (createMode == SkMask::kJustComputeBounds_CreateMode) {
    762         if (style == kInner_SkBlurStyle) {
    763             dst->fBounds.set(SkScalarRoundToInt(src.fLeft),
    764                              SkScalarRoundToInt(src.fTop),
    765                              SkScalarRoundToInt(src.fRight),
    766                              SkScalarRoundToInt(src.fBottom)); // restore trimmed bounds
    767             dst->fRowBytes = sw;
    768         }
    769         return true;
    770     }
    771 
    772     std::unique_ptr<uint8_t[]> profile(ComputeBlurProfile(sigma));
    773 
    774     size_t dstSize = dst->computeImageSize();
    775     if (0 == dstSize) {
    776         return false;   // too big to allocate, abort
    777     }
    778 
    779     uint8_t*        dp = SkMask::AllocImage(dstSize);
    780 
    781     dst->fImage = dp;
    782 
    783     int dstHeight = dst->fBounds.height();
    784     int dstWidth = dst->fBounds.width();
    785 
    786     uint8_t *outptr = dp;
    787 
    788     SkAutoTMalloc<uint8_t> horizontalScanline(dstWidth);
    789     SkAutoTMalloc<uint8_t> verticalScanline(dstHeight);
    790 
    791     ComputeBlurredScanline(horizontalScanline, profile.get(), dstWidth, sigma);
    792     ComputeBlurredScanline(verticalScanline, profile.get(), dstHeight, sigma);
    793 
    794     for (int y = 0 ; y < dstHeight ; ++y) {
    795         for (int x = 0 ; x < dstWidth ; x++) {
    796             unsigned int maskval = SkMulDiv255Round(horizontalScanline[x], verticalScanline[y]);
    797             *(outptr++) = maskval;
    798         }
    799     }
    800 
    801     if (style == kInner_SkBlurStyle) {
    802         // now we allocate the "real" dst, mirror the size of src
    803         size_t srcSize = (size_t)(src.width() * src.height());
    804         if (0 == srcSize) {
    805             return false;   // too big to allocate, abort
    806         }
    807         dst->fImage = SkMask::AllocImage(srcSize);
    808         for (int y = 0 ; y < sh ; y++) {
    809             uint8_t *blur_scanline = dp + (y+pad)*dstWidth + pad;
    810             uint8_t *inner_scanline = dst->fImage + y*sw;
    811             memcpy(inner_scanline, blur_scanline, sw);
    812         }
    813         SkMask::FreeImage(dp);
    814 
    815         dst->fBounds.set(SkScalarRoundToInt(src.fLeft),
    816                          SkScalarRoundToInt(src.fTop),
    817                          SkScalarRoundToInt(src.fRight),
    818                          SkScalarRoundToInt(src.fBottom)); // restore trimmed bounds
    819         dst->fRowBytes = sw;
    820 
    821     } else if (style == kOuter_SkBlurStyle) {
    822         for (int y = pad ; y < dstHeight-pad ; y++) {
    823             uint8_t *dst_scanline = dp + y*dstWidth + pad;
    824             memset(dst_scanline, 0, sw);
    825         }
    826     } else if (style == kSolid_SkBlurStyle) {
    827         for (int y = pad ; y < dstHeight-pad ; y++) {
    828             uint8_t *dst_scanline = dp + y*dstWidth + pad;
    829             memset(dst_scanline, 0xff, sw);
    830         }
    831     }
    832     // normal and solid styles are the same for analytic rect blurs, so don't
    833     // need to handle solid specially.
    834 
    835     return true;
    836 }
    837 
    838 bool SkBlurMask::BlurRRect(SkScalar sigma, SkMask *dst,
    839                            const SkRRect &src, SkBlurStyle style,
    840                            SkIPoint *margin, SkMask::CreateMode createMode) {
    841     // Temporary for now -- always fail, should cause caller to fall back
    842     // to old path.  Plumbing just to land API and parallelize effort.
    843 
    844     return false;
    845 }
    846 
    847 // The "simple" blur is a direct implementation of separable convolution with a discrete
    848 // gaussian kernel.  It's "ground truth" in a sense; too slow to be used, but very
    849 // useful for correctness comparisons.
    850 
    851 bool SkBlurMask::BlurGroundTruth(SkScalar sigma, SkMask* dst, const SkMask& src,
    852                                  SkBlurStyle style, SkIPoint* margin) {
    853 
    854     if (src.fFormat != SkMask::kA8_Format) {
    855         return false;
    856     }
    857 
    858     float variance = sigma * sigma;
    859 
    860     int windowSize = SkScalarCeilToInt(sigma*6);
    861     // round window size up to nearest odd number
    862     windowSize |= 1;
    863 
    864     SkAutoTMalloc<float> gaussWindow(windowSize);
    865 
    866     int halfWindow = windowSize >> 1;
    867 
    868     gaussWindow[halfWindow] = 1;
    869 
    870     float windowSum = 1;
    871     for (int x = 1 ; x <= halfWindow ; ++x) {
    872         float gaussian = expf(-x*x / (2*variance));
    873         gaussWindow[halfWindow + x] = gaussWindow[halfWindow-x] = gaussian;
    874         windowSum += 2*gaussian;
    875     }
    876 
    877     // leave the filter un-normalized for now; we will divide by the normalization
    878     // sum later;
    879 
    880     int pad = halfWindow;
    881     if (margin) {
    882         margin->set( pad, pad );
    883     }
    884 
    885     dst->fBounds = src.fBounds;
    886     dst->fBounds.outset(pad, pad);
    887 
    888     dst->fRowBytes = dst->fBounds.width();
    889     dst->fFormat = SkMask::kA8_Format;
    890     dst->fImage = nullptr;
    891 
    892     if (src.fImage) {
    893 
    894         size_t dstSize = dst->computeImageSize();
    895         if (0 == dstSize) {
    896             return false;   // too big to allocate, abort
    897         }
    898 
    899         int             srcWidth = src.fBounds.width();
    900         int             srcHeight = src.fBounds.height();
    901         int             dstWidth = dst->fBounds.width();
    902 
    903         const uint8_t*  srcPixels = src.fImage;
    904         uint8_t*        dstPixels = SkMask::AllocImage(dstSize);
    905         SkAutoTCallVProc<uint8_t, SkMask_FreeImage> autoCall(dstPixels);
    906 
    907         // do the actual blur.  First, make a padded copy of the source.
    908         // use double pad so we never have to check if we're outside anything
    909 
    910         int padWidth = srcWidth + 4*pad;
    911         int padHeight = srcHeight;
    912         int padSize = padWidth * padHeight;
    913 
    914         SkAutoTMalloc<uint8_t> padPixels(padSize);
    915         memset(padPixels, 0, padSize);
    916 
    917         for (int y = 0 ; y < srcHeight; ++y) {
    918             uint8_t* padptr = padPixels + y * padWidth + 2*pad;
    919             const uint8_t* srcptr = srcPixels + y * srcWidth;
    920             memcpy(padptr, srcptr, srcWidth);
    921         }
    922 
    923         // blur in X, transposing the result into a temporary floating point buffer.
    924         // also double-pad the intermediate result so that the second blur doesn't
    925         // have to do extra conditionals.
    926 
    927         int tmpWidth = padHeight + 4*pad;
    928         int tmpHeight = padWidth - 2*pad;
    929         int tmpSize = tmpWidth * tmpHeight;
    930 
    931         SkAutoTMalloc<float> tmpImage(tmpSize);
    932         memset(tmpImage, 0, tmpSize*sizeof(tmpImage[0]));
    933 
    934         for (int y = 0 ; y < padHeight ; ++y) {
    935             uint8_t *srcScanline = padPixels + y*padWidth;
    936             for (int x = pad ; x < padWidth - pad ; ++x) {
    937                 float *outPixel = tmpImage + (x-pad)*tmpWidth + y + 2*pad; // transposed output
    938                 uint8_t *windowCenter = srcScanline + x;
    939                 for (int i = -pad ; i <= pad ; ++i) {
    940                     *outPixel += gaussWindow[pad+i]*windowCenter[i];
    941                 }
    942                 *outPixel /= windowSum;
    943             }
    944         }
    945 
    946         // blur in Y; now filling in the actual desired destination.  We have to do
    947         // the transpose again; these transposes guarantee that we read memory in
    948         // linear order.
    949 
    950         for (int y = 0 ; y < tmpHeight ; ++y) {
    951             float *srcScanline = tmpImage + y*tmpWidth;
    952             for (int x = pad ; x < tmpWidth - pad ; ++x) {
    953                 float *windowCenter = srcScanline + x;
    954                 float finalValue = 0;
    955                 for (int i = -pad ; i <= pad ; ++i) {
    956                     finalValue += gaussWindow[pad+i]*windowCenter[i];
    957                 }
    958                 finalValue /= windowSum;
    959                 uint8_t *outPixel = dstPixels + (x-pad)*dstWidth + y; // transposed output
    960                 int integerPixel = int(finalValue + 0.5f);
    961                 *outPixel = SkClampMax( SkClampPos(integerPixel), 255 );
    962             }
    963         }
    964 
    965         dst->fImage = dstPixels;
    966         // if need be, alloc the "real" dst (same size as src) and copy/merge
    967         // the blur into it (applying the src)
    968         if (style == kInner_SkBlurStyle) {
    969             // now we allocate the "real" dst, mirror the size of src
    970             size_t srcSize = src.computeImageSize();
    971             if (0 == srcSize) {
    972                 return false;   // too big to allocate, abort
    973             }
    974             dst->fImage = SkMask::AllocImage(srcSize);
    975             merge_src_with_blur(dst->fImage, src.fRowBytes,
    976                 srcPixels, src.fRowBytes,
    977                 dstPixels + pad*dst->fRowBytes + pad,
    978                 dst->fRowBytes, srcWidth, srcHeight);
    979             SkMask::FreeImage(dstPixels);
    980         } else if (style != kNormal_SkBlurStyle) {
    981             clamp_with_orig(dstPixels + pad*dst->fRowBytes + pad,
    982                 dst->fRowBytes, srcPixels, src.fRowBytes, srcWidth, srcHeight, style);
    983         }
    984         (void)autoCall.release();
    985     }
    986 
    987     if (style == kInner_SkBlurStyle) {
    988         dst->fBounds = src.fBounds; // restore trimmed bounds
    989         dst->fRowBytes = src.fRowBytes;
    990     }
    991 
    992     return true;
    993 }
    994