Home | History | Annotate | Download | only in opts
      1 /*
      2  * Copyright 2009 The Android Open Source Project
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #include "SkBitmapFilter_opts_SSE2.h"
      9 #include "SkBitmapProcState_opts_SSE2.h"
     10 #include "SkBitmapProcState_opts_SSSE3.h"
     11 #include "SkBitmapScaler.h"
     12 #include "SkBlitMask.h"
     13 #include "SkBlitRect_opts_SSE2.h"
     14 #include "SkBlitRow.h"
     15 #include "SkBlitRow_opts_SSE2.h"
     16 #include "SkBlitRow_opts_SSE4.h"
     17 #include "SkBlurImage_opts_SSE2.h"
     18 #include "SkBlurImage_opts_SSE4.h"
     19 #include "SkMorphology_opts.h"
     20 #include "SkMorphology_opts_SSE2.h"
     21 #include "SkRTConf.h"
     22 #include "SkUtils.h"
     23 #include "SkUtils_opts_SSE2.h"
     24 #include "SkXfermode.h"
     25 #include "SkXfermode_proccoeff.h"
     26 
     27 #if defined(_MSC_VER) && defined(_WIN64)
     28 #include <intrin.h>
     29 #endif
     30 
     31 /* This file must *not* be compiled with -msse or any other optional SIMD
     32    extension, otherwise gcc may generate SIMD instructions even for scalar ops
     33    (and thus give an invalid instruction on Pentium3 on the code below).
     34    For example, only files named *_SSE2.cpp in this directory should be
     35    compiled with -msse2 or higher. */
     36 
     37 
     38 /* Function to get the CPU SSE-level in runtime, for different compilers. */
     39 #ifdef _MSC_VER
     40 static inline void getcpuid(int info_type, int info[4]) {
     41 #if defined(_WIN64)
     42     __cpuid(info, info_type);
     43 #else
     44     __asm {
     45         mov    eax, [info_type]
     46         cpuid
     47         mov    edi, [info]
     48         mov    [edi], eax
     49         mov    [edi+4], ebx
     50         mov    [edi+8], ecx
     51         mov    [edi+12], edx
     52     }
     53 #endif
     54 }
     55 #elif defined(__x86_64__)
     56 static inline void getcpuid(int info_type, int info[4]) {
     57     asm volatile (
     58         "cpuid \n\t"
     59         : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
     60         : "a"(info_type)
     61     );
     62 }
     63 #else
     64 static inline void getcpuid(int info_type, int info[4]) {
     65     // We save and restore ebx, so this code can be compatible with -fPIC
     66     asm volatile (
     67         "pushl %%ebx      \n\t"
     68         "cpuid            \n\t"
     69         "movl %%ebx, %1   \n\t"
     70         "popl %%ebx       \n\t"
     71         : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
     72         : "a"(info_type)
     73     );
     74 }
     75 #endif
     76 
     77 ////////////////////////////////////////////////////////////////////////////////
     78 
     79 /* Fetch the SIMD level directly from the CPU, at run-time.
     80  * Only checks the levels needed by the optimizations in this file.
     81  */
     82 static int get_SIMD_level() {
     83     int cpu_info[4] = { 0 };
     84 
     85     getcpuid(1, cpu_info);
     86     if ((cpu_info[2] & (1<<20)) != 0) {
     87         return SK_CPU_SSE_LEVEL_SSE42;
     88     } else if ((cpu_info[2] & (1<<19)) != 0) {
     89         return SK_CPU_SSE_LEVEL_SSE41;
     90     } else if ((cpu_info[2] & (1<<9)) != 0) {
     91         return SK_CPU_SSE_LEVEL_SSSE3;
     92     } else if ((cpu_info[3] & (1<<26)) != 0) {
     93         return SK_CPU_SSE_LEVEL_SSE2;
     94     } else {
     95         return 0;
     96     }
     97 }
     98 
     99 /* Verify that the requested SIMD level is supported in the build.
    100  * If not, check if the platform supports it.
    101  */
    102 static inline bool supports_simd(int minLevel) {
    103 #if defined(SK_CPU_SSE_LEVEL)
    104     if (minLevel <= SK_CPU_SSE_LEVEL) {
    105         return true;
    106     } else
    107 #endif
    108     {
    109 #if defined(SK_BUILD_FOR_ANDROID_FRAMEWORK)
    110         /* For the Android framework we should always know at compile time if the device
    111          * we are building for supports SSSE3.  The one exception to this rule is on the
    112          * emulator where we are compiled without the -mssse3 option (so we have no
    113          * SSSE3 procs) but can be run on a host machine that supports SSSE3
    114          * instructions. So for that particular case we disable our SSSE3 options.
    115          */
    116         return false;
    117 #else
    118         static int gSIMDLevel = get_SIMD_level();
    119         return (minLevel <= gSIMDLevel);
    120 #endif
    121     }
    122 }
    123 
    124 ////////////////////////////////////////////////////////////////////////////////
    125 
    126 SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", true, "Use SSE optimized version of high quality image filters");
    127 
    128 void SkBitmapScaler::PlatformConvolutionProcs(SkConvolutionProcs* procs) {
    129     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    130         procs->fExtraHorizontalReads = 3;
    131         procs->fConvolveVertically = &convolveVertically_SSE2;
    132         procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2;
    133         procs->fConvolveHorizontally = &convolveHorizontally_SSE2;
    134         procs->fApplySIMDPadding = &applySIMDPadding_SSE2;
    135     }
    136 }
    137 
    138 ////////////////////////////////////////////////////////////////////////////////
    139 
    140 void SkBitmapProcState::platformProcs() {
    141     /* Every optimization in the function requires at least SSE2 */
    142     if (!supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    143         return;
    144     }
    145 
    146     /* Check fSampleProc32 */
    147     if (fSampleProc32 == S32_opaque_D32_filter_DX) {
    148         if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) {
    149             fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3;
    150         } else {
    151             fSampleProc32 = S32_opaque_D32_filter_DX_SSE2;
    152         }
    153     } else if (fSampleProc32 == S32_opaque_D32_filter_DXDY) {
    154         if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) {
    155             fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3;
    156         }
    157     } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
    158         if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) {
    159             fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3;
    160         } else {
    161             fSampleProc32 = S32_alpha_D32_filter_DX_SSE2;
    162         }
    163     } else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) {
    164         if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) {
    165             fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3;
    166         }
    167     }
    168 
    169     /* Check fSampleProc16 */
    170     if (fSampleProc16 == S32_D16_filter_DX) {
    171         fSampleProc16 = S32_D16_filter_DX_SSE2;
    172     }
    173 
    174     /* Check fMatrixProc */
    175     if (fMatrixProc == ClampX_ClampY_filter_scale) {
    176         fMatrixProc = ClampX_ClampY_filter_scale_SSE2;
    177     } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) {
    178         fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2;
    179     } else if (fMatrixProc == ClampX_ClampY_filter_affine) {
    180         fMatrixProc = ClampX_ClampY_filter_affine_SSE2;
    181     } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) {
    182         fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2;
    183     }
    184 
    185     /* Check fShaderProc32 */
    186     if (c_hqfilter_sse) {
    187         if (fShaderProc32 == highQualityFilter32) {
    188             fShaderProc32 = highQualityFilter_SSE2;
    189         }
    190     }
    191 }
    192 
    193 ////////////////////////////////////////////////////////////////////////////////
    194 
    195 static SkBlitRow::Proc platform_16_procs[] = {
    196     S32_D565_Opaque_SSE2,               // S32_D565_Opaque
    197     NULL,                               // S32_D565_Blend
    198     S32A_D565_Opaque_SSE2,              // S32A_D565_Opaque
    199     NULL,                               // S32A_D565_Blend
    200     S32_D565_Opaque_Dither_SSE2,        // S32_D565_Opaque_Dither
    201     NULL,                               // S32_D565_Blend_Dither
    202     S32A_D565_Opaque_Dither_SSE2,       // S32A_D565_Opaque_Dither
    203     NULL,                               // S32A_D565_Blend_Dither
    204 };
    205 
    206 SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
    207     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    208         return platform_16_procs[flags];
    209     } else {
    210         return NULL;
    211     }
    212 }
    213 
    214 static SkBlitRow::Proc32 platform_32_procs_SSE2[] = {
    215     NULL,                               // S32_Opaque,
    216     S32_Blend_BlitRow32_SSE2,           // S32_Blend,
    217     S32A_Opaque_BlitRow32_SSE2,         // S32A_Opaque
    218     S32A_Blend_BlitRow32_SSE2,          // S32A_Blend,
    219 };
    220 
    221 #if defined(SK_ATT_ASM_SUPPORTED)
    222 static SkBlitRow::Proc32 platform_32_procs_SSE4[] = {
    223     NULL,                               // S32_Opaque,
    224     S32_Blend_BlitRow32_SSE2,           // S32_Blend,
    225     S32A_Opaque_BlitRow32_SSE4_asm,     // S32A_Opaque
    226     S32A_Blend_BlitRow32_SSE2,          // S32A_Blend,
    227 };
    228 #endif
    229 
    230 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
    231 #if defined(SK_ATT_ASM_SUPPORTED)
    232     if (supports_simd(SK_CPU_SSE_LEVEL_SSE41)) {
    233         return platform_32_procs_SSE4[flags];
    234     } else
    235 #endif
    236     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    237         return platform_32_procs_SSE2[flags];
    238     } else {
    239         return NULL;
    240     }
    241 }
    242 
    243 SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() {
    244     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    245         return Color32_SSE2;
    246     } else {
    247         return NULL;
    248     }
    249 }
    250 
    251 SkBlitRow::ColorRectProc PlatformColorRectProcFactory(); // suppress warning
    252 
    253 SkBlitRow::ColorRectProc PlatformColorRectProcFactory() {
    254 /* Return NULL for now, since the optimized path in ColorRect32_SSE2 is disabled.
    255     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    256         return ColorRect32_SSE2;
    257     } else {
    258         return NULL;
    259     }
    260 */
    261     return NULL;
    262 }
    263 
    264 ////////////////////////////////////////////////////////////////////////////////
    265 
    266 SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkColorType dstCT,
    267                                                      SkMask::Format maskFormat,
    268                                                      SkColor color) {
    269     if (SkMask::kA8_Format != maskFormat) {
    270         return NULL;
    271     }
    272 
    273     ColorProc proc = NULL;
    274     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    275         switch (dstCT) {
    276             case kN32_SkColorType:
    277                 // The SSE2 version is not (yet) faster for black, so we check
    278                 // for that.
    279                 if (SK_ColorBLACK != color) {
    280                     proc = SkARGB32_A8_BlitMask_SSE2;
    281                 }
    282                 break;
    283             default:
    284                 break;
    285         }
    286     }
    287     return proc;
    288 }
    289 
    290 SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
    291     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    292         if (isOpaque) {
    293             return SkBlitLCD16OpaqueRow_SSE2;
    294         } else {
    295             return SkBlitLCD16Row_SSE2;
    296         }
    297     } else {
    298         return NULL;
    299     }
    300 
    301 }
    302 
    303 SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkColorType, SkMask::Format, RowFlags) {
    304     return NULL;
    305 }
    306 
    307 ////////////////////////////////////////////////////////////////////////////////
    308 
    309 SkMemset16Proc SkMemset16GetPlatformProc() {
    310     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    311         return sk_memset16_SSE2;
    312     } else {
    313         return NULL;
    314     }
    315 }
    316 
    317 SkMemset32Proc SkMemset32GetPlatformProc() {
    318     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    319         return sk_memset32_SSE2;
    320     } else {
    321         return NULL;
    322     }
    323 }
    324 
    325 SkMemcpy32Proc SkMemcpy32GetPlatformProc() {
    326     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    327         return sk_memcpy32_SSE2;
    328     } else {
    329         return NULL;
    330     }
    331 }
    332 
    333 ////////////////////////////////////////////////////////////////////////////////
    334 
    335 SkMorphologyImageFilter::Proc SkMorphologyGetPlatformProc(SkMorphologyProcType type) {
    336     if (!supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    337         return NULL;
    338     }
    339     switch (type) {
    340         case kDilateX_SkMorphologyProcType:
    341             return SkDilateX_SSE2;
    342         case kDilateY_SkMorphologyProcType:
    343             return SkDilateY_SSE2;
    344         case kErodeX_SkMorphologyProcType:
    345             return SkErodeX_SSE2;
    346         case kErodeY_SkMorphologyProcType:
    347             return SkErodeY_SSE2;
    348         default:
    349             return NULL;
    350     }
    351 }
    352 
    353 ////////////////////////////////////////////////////////////////////////////////
    354 
    355 bool SkBoxBlurGetPlatformProcs(SkBoxBlurProc* boxBlurX,
    356                                SkBoxBlurProc* boxBlurY,
    357                                SkBoxBlurProc* boxBlurXY,
    358                                SkBoxBlurProc* boxBlurYX) {
    359 #ifdef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION
    360     return false;
    361 #else
    362     if (supports_simd(SK_CPU_SSE_LEVEL_SSE41)) {
    363         return SkBoxBlurGetPlatformProcs_SSE4(boxBlurX, boxBlurY, boxBlurXY, boxBlurYX);
    364     }
    365     else if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    366         return SkBoxBlurGetPlatformProcs_SSE2(boxBlurX, boxBlurY, boxBlurXY, boxBlurYX);
    367     }
    368     return false;
    369 #endif
    370 }
    371 
    372 ////////////////////////////////////////////////////////////////////////////////
    373 
    374 extern SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec,
    375                                                                 SkXfermode::Mode mode);
    376 
    377 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl(const ProcCoeff& rec,
    378                                                     SkXfermode::Mode mode);
    379 
    380 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl(const ProcCoeff& rec,
    381                                                     SkXfermode::Mode mode) {
    382     return NULL;
    383 }
    384 
    385 SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec,
    386                                                SkXfermode::Mode mode);
    387 
    388 SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec,
    389                                                SkXfermode::Mode mode) {
    390     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    391         return SkPlatformXfermodeFactory_impl_SSE2(rec, mode);
    392     } else {
    393         return SkPlatformXfermodeFactory_impl(rec, mode);
    394     }
    395 }
    396 
    397 SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode);
    398 
    399 SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode) {
    400     return NULL;
    401 }
    402