Home | History | Annotate | Download | only in opts
      1 /*
      2  * Copyright 2014 The Android Open Source Project
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #include "SkBitmap.h"
      9 #include "SkBlurImage_opts_SSE4.h"
     10 #include "SkColorPriv.h"
     11 #include "SkRect.h"
     12 
     13 /* With the exception of the compilers that don't support it, we always build the
     14  * SSE4 functions and enable the caller to determine SSE4 support.  However for
     15  * compilers that do not support SSE4x we provide a stub implementation.
     16  */
     17 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
     18 
     19 #include <smmintrin.h>
     20 
     21 namespace {
     22 enum BlurDirection {
     23     kX, kY
     24 };
     25 
     26 /* Helper function to spread the components of a 32-bit integer into the
     27  * lower 8 bits of each 32-bit element of an SSE register.
     28  */
     29 inline __m128i expand(int a) {
     30     const __m128i zero = _mm_setzero_si128();
     31 
     32     // 0 0 0 0   0 0 0 0   0 0 0 0   A R G B
     33     __m128i result = _mm_cvtsi32_si128(a);
     34 
     35     // 0 0 0 0   0 0 0 0   0 A 0 R   0 G 0 B
     36     result = _mm_unpacklo_epi8(result, zero);
     37 
     38     // 0 0 0 A   0 0 0 R   0 0 0 G   0 0 0 B
     39     return _mm_unpacklo_epi16(result, zero);
     40 }
     41 
     42 template<BlurDirection srcDirection, BlurDirection dstDirection>
     43 void SkBoxBlur_SSE4(const SkPMColor* src, int srcStride, SkPMColor* dst, int kernelSize,
     44                     int leftOffset, int rightOffset, int width, int height)
     45 {
     46     const int rightBorder = SkMin32(rightOffset + 1, width);
     47     const int srcStrideX = srcDirection == kX ? 1 : srcStride;
     48     const int dstStrideX = dstDirection == kX ? 1 : height;
     49     const int srcStrideY = srcDirection == kX ? srcStride : 1;
     50     const int dstStrideY = dstDirection == kX ? width : 1;
     51     const __m128i scale = _mm_set1_epi32((1 << 24) / kernelSize);
     52     const __m128i half = _mm_set1_epi32(1 << 23);
     53     const __m128i zero = _mm_setzero_si128();
     54     for (int y = 0; y < height; ++y) {
     55         __m128i sum = zero;
     56         const SkPMColor* p = src;
     57         for (int i = 0; i < rightBorder; ++i) {
     58             sum = _mm_add_epi32(sum, expand(*p));
     59             p += srcStrideX;
     60         }
     61 
     62         const SkPMColor* sptr = src;
     63         SkColor* dptr = dst;
     64         for (int x = 0; x < width; ++x) {
     65             __m128i result = _mm_mullo_epi32(sum, scale);
     66 
     67             // sumA*scale+.5 sumB*scale+.5 sumG*scale+.5 sumB*scale+.5
     68             result = _mm_add_epi32(result, half);
     69 
     70             // 0 0 0 A   0 0 0 R   0 0 0 G   0 0 0 B
     71             result = _mm_srli_epi32(result, 24);
     72 
     73             // 0 0 0 0   0 0 0 0   0 A 0 R   0 G 0 B
     74             result = _mm_packs_epi32(result, zero);
     75 
     76             // 0 0 0 0   0 0 0 0   0 0 0 0   A R G B
     77             result = _mm_packus_epi16(result, zero);
     78             *dptr = _mm_cvtsi128_si32(result);
     79             if (x >= leftOffset) {
     80                 SkColor l = *(sptr - leftOffset * srcStrideX);
     81                 sum = _mm_sub_epi32(sum, expand(l));
     82             }
     83             if (x + rightOffset + 1 < width) {
     84                 SkColor r = *(sptr + (rightOffset + 1) * srcStrideX);
     85                 sum = _mm_add_epi32(sum, expand(r));
     86             }
     87             sptr += srcStrideX;
     88             if (srcDirection == kY) {
     89                 _mm_prefetch(reinterpret_cast<const char*>(sptr + (rightOffset + 1) * srcStrideX),
     90                              _MM_HINT_T0);
     91             }
     92             dptr += dstStrideX;
     93         }
     94         src += srcStrideY;
     95         dst += dstStrideY;
     96     }
     97 }
     98 
     99 } // namespace
    100 
    101 bool SkBoxBlurGetPlatformProcs_SSE4(SkBoxBlurProc* boxBlurX,
    102                                     SkBoxBlurProc* boxBlurY,
    103                                     SkBoxBlurProc* boxBlurXY,
    104                                     SkBoxBlurProc* boxBlurYX) {
    105     *boxBlurX = SkBoxBlur_SSE4<kX, kX>;
    106     *boxBlurY = SkBoxBlur_SSE4<kY, kY>;
    107     *boxBlurXY = SkBoxBlur_SSE4<kX, kY>;
    108     *boxBlurYX = SkBoxBlur_SSE4<kY, kX>;
    109     return true;
    110 }
    111 
    112 #else // SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
    113 
    114 bool SkBoxBlurGetPlatformProcs_SSE4(SkBoxBlurProc* boxBlurX,
    115                                     SkBoxBlurProc* boxBlurY,
    116                                     SkBoxBlurProc* boxBlurXY,
    117                                     SkBoxBlurProc* boxBlurYX) {
    118     sk_throw();
    119     return false;
    120 }
    121 
    122 
    123 #endif
    124