Home | History | Annotate | Download | only in opts
      1 /*
      2  * Copyright 2009 The Android Open Source Project
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #include "SkBitmapFilter_opts_SSE2.h"
      9 #include "SkBitmapProcState_opts_SSE2.h"
     10 #include "SkBitmapProcState_opts_SSSE3.h"
     11 #include "SkBlitMask.h"
     12 #include "SkBlitRect_opts_SSE2.h"
     13 #include "SkBlitRow.h"
     14 #include "SkBlitRow_opts_SSE2.h"
     15 #include "SkBlurImage_opts_SSE2.h"
     16 #include "SkMorphology_opts.h"
     17 #include "SkMorphology_opts_SSE2.h"
     18 #include "SkRTConf.h"
     19 #include "SkUtils.h"
     20 #include "SkUtils_opts_SSE2.h"
     21 #include "SkXfermode.h"
     22 #include "SkXfermode_proccoeff.h"
     23 
     24 #if defined(_MSC_VER) && defined(_WIN64)
     25 #include <intrin.h>
     26 #endif
     27 
     28 /* This file must *not* be compiled with -msse or any other optional SIMD
     29    extension, otherwise gcc may generate SIMD instructions even for scalar ops
     30    (and thus give an invalid instruction on Pentium3 on the code below).
     31    For example, only files named *_SSE2.cpp in this directory should be
     32    compiled with -msse2 or higher. */
     33 
     34 
     35 /* Function to get the CPU SSE-level in runtime, for different compilers. */
     36 #ifdef _MSC_VER
     37 static inline void getcpuid(int info_type, int info[4]) {
     38 #if defined(_WIN64)
     39     __cpuid(info, info_type);
     40 #else
     41     __asm {
     42         mov    eax, [info_type]
     43         cpuid
     44         mov    edi, [info]
     45         mov    [edi], eax
     46         mov    [edi+4], ebx
     47         mov    [edi+8], ecx
     48         mov    [edi+12], edx
     49     }
     50 #endif
     51 }
     52 #elif defined(__x86_64__)
     53 static inline void getcpuid(int info_type, int info[4]) {
     54     asm volatile (
     55         "cpuid \n\t"
     56         : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
     57         : "a"(info_type)
     58     );
     59 }
     60 #else
     61 static inline void getcpuid(int info_type, int info[4]) {
     62     // We save and restore ebx, so this code can be compatible with -fPIC
     63     asm volatile (
     64         "pushl %%ebx      \n\t"
     65         "cpuid            \n\t"
     66         "movl %%ebx, %1   \n\t"
     67         "popl %%ebx       \n\t"
     68         : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
     69         : "a"(info_type)
     70     );
     71 }
     72 #endif
     73 
     74 ////////////////////////////////////////////////////////////////////////////////
     75 
     76 /* Fetch the SIMD level directly from the CPU, at run-time.
     77  * Only checks the levels needed by the optimizations in this file.
     78  */
     79 static int get_SIMD_level() {
     80     int cpu_info[4] = { 0 };
     81 
     82     getcpuid(1, cpu_info);
     83     if ((cpu_info[2] & (1<<20)) != 0) {
     84         return SK_CPU_SSE_LEVEL_SSE42;
     85     } else if ((cpu_info[2] & (1<<9)) != 0) {
     86         return SK_CPU_SSE_LEVEL_SSSE3;
     87     } else if ((cpu_info[3] & (1<<26)) != 0) {
     88         return SK_CPU_SSE_LEVEL_SSE2;
     89     } else {
     90         return 0;
     91     }
     92 }
     93 
     94 /* Verify that the requested SIMD level is supported in the build.
     95  * If not, check if the platform supports it.
     96  */
     97 static inline bool supports_simd(int minLevel) {
     98 #if defined(SK_CPU_SSE_LEVEL)
     99     if (minLevel <= SK_CPU_SSE_LEVEL) {
    100         return true;
    101     } else
    102 #endif
    103     {
    104 #if defined(SK_BUILD_FOR_ANDROID_FRAMEWORK)
    105         /* For the Android framework we should always know at compile time if the device
    106          * we are building for supports SSSE3.  The one exception to this rule is on the
    107          * emulator where we are compiled without the -mssse3 option (so we have no
    108          * SSSE3 procs) but can be run on a host machine that supports SSSE3
    109          * instructions. So for that particular case we disable our SSSE3 options.
    110          */
    111         return false;
    112 #else
    113         static int gSIMDLevel = get_SIMD_level();
    114         return (minLevel <= gSIMDLevel);
    115 #endif
    116     }
    117 }
    118 
    119 ////////////////////////////////////////////////////////////////////////////////
    120 
    121 SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", false, "Use SSE optimized version of high quality image filters");
    122 
    123 void SkBitmapProcState::platformConvolutionProcs(SkConvolutionProcs* procs) {
    124     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    125         procs->fExtraHorizontalReads = 3;
    126         procs->fConvolveVertically = &convolveVertically_SSE2;
    127         procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2;
    128         procs->fConvolveHorizontally = &convolveHorizontally_SSE2;
    129         procs->fApplySIMDPadding = &applySIMDPadding_SSE2;
    130     }
    131 }
    132 
    133 ////////////////////////////////////////////////////////////////////////////////
    134 
    135 void SkBitmapProcState::platformProcs() {
    136     /* Every optimization in the function requires at least SSE2 */
    137     if (!supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    138         return;
    139     }
    140 
    141     /* Check fSampleProc32 */
    142     if (fSampleProc32 == S32_opaque_D32_filter_DX) {
    143         if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) {
    144             fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3;
    145         } else {
    146             fSampleProc32 = S32_opaque_D32_filter_DX_SSE2;
    147         }
    148     } else if (fSampleProc32 == S32_opaque_D32_filter_DXDY) {
    149         if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) {
    150             fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3;
    151         }
    152     } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
    153         if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) {
    154             fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3;
    155         } else {
    156             fSampleProc32 = S32_alpha_D32_filter_DX_SSE2;
    157         }
    158     } else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) {
    159         if (supports_simd(SK_CPU_SSE_LEVEL_SSSE3)) {
    160             fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3;
    161         }
    162     }
    163 
    164     /* Check fSampleProc16 */
    165     if (fSampleProc16 == S32_D16_filter_DX) {
    166         fSampleProc16 = S32_D16_filter_DX_SSE2;
    167     }
    168 
    169     /* Check fMatrixProc */
    170     if (fMatrixProc == ClampX_ClampY_filter_scale) {
    171         fMatrixProc = ClampX_ClampY_filter_scale_SSE2;
    172     } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) {
    173         fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2;
    174     } else if (fMatrixProc == ClampX_ClampY_filter_affine) {
    175         fMatrixProc = ClampX_ClampY_filter_affine_SSE2;
    176     } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) {
    177         fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2;
    178     }
    179 
    180     /* Check fShaderProc32 */
    181     if (c_hqfilter_sse) {
    182         if (fShaderProc32 == highQualityFilter32) {
    183             fShaderProc32 = highQualityFilter_SSE2;
    184         }
    185     }
    186 }
    187 
    188 ////////////////////////////////////////////////////////////////////////////////
    189 
    190 static SkBlitRow::Proc platform_16_procs[] = {
    191     S32_D565_Opaque_SSE2,               // S32_D565_Opaque
    192     NULL,                               // S32_D565_Blend
    193     S32A_D565_Opaque_SSE2,              // S32A_D565_Opaque
    194     NULL,                               // S32A_D565_Blend
    195     S32_D565_Opaque_Dither_SSE2,        // S32_D565_Opaque_Dither
    196     NULL,                               // S32_D565_Blend_Dither
    197     S32A_D565_Opaque_Dither_SSE2,       // S32A_D565_Opaque_Dither
    198     NULL,                               // S32A_D565_Blend_Dither
    199 };
    200 
    201 SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
    202     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    203         return platform_16_procs[flags];
    204     } else {
    205         return NULL;
    206     }
    207 }
    208 
    209 static SkBlitRow::Proc32 platform_32_procs[] = {
    210     NULL,                               // S32_Opaque,
    211     S32_Blend_BlitRow32_SSE2,           // S32_Blend,
    212     S32A_Opaque_BlitRow32_SSE2,         // S32A_Opaque
    213     S32A_Blend_BlitRow32_SSE2,          // S32A_Blend,
    214 };
    215 
    216 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
    217     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    218         return platform_32_procs[flags];
    219     } else {
    220         return NULL;
    221     }
    222 }
    223 
    224 SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() {
    225     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    226         return Color32_SSE2;
    227     } else {
    228         return NULL;
    229     }
    230 }
    231 
    232 SkBlitRow::ColorRectProc PlatformColorRectProcFactory(); // suppress warning
    233 
    234 SkBlitRow::ColorRectProc PlatformColorRectProcFactory() {
    235 /* Return NULL for now, since the optimized path in ColorRect32_SSE2 is disabled.
    236     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    237         return ColorRect32_SSE2;
    238     } else {
    239         return NULL;
    240     }
    241 */
    242     return NULL;
    243 }
    244 
    245 ////////////////////////////////////////////////////////////////////////////////
    246 
    247 SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkColorType dstCT,
    248                                                      SkMask::Format maskFormat,
    249                                                      SkColor color) {
    250     if (SkMask::kA8_Format != maskFormat) {
    251         return NULL;
    252     }
    253 
    254     ColorProc proc = NULL;
    255     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    256         switch (dstCT) {
    257             case kN32_SkColorType:
    258                 // The SSE2 version is not (yet) faster for black, so we check
    259                 // for that.
    260                 if (SK_ColorBLACK != color) {
    261                     proc = SkARGB32_A8_BlitMask_SSE2;
    262                 }
    263                 break;
    264             default:
    265                 break;
    266         }
    267     }
    268     return proc;
    269 }
    270 
    271 SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
    272     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    273         if (isOpaque) {
    274             return SkBlitLCD16OpaqueRow_SSE2;
    275         } else {
    276             return SkBlitLCD16Row_SSE2;
    277         }
    278     } else {
    279         return NULL;
    280     }
    281 
    282 }
    283 
    284 SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkColorType, SkMask::Format, RowFlags) {
    285     return NULL;
    286 }
    287 
    288 ////////////////////////////////////////////////////////////////////////////////
    289 
    290 SkMemset16Proc SkMemset16GetPlatformProc() {
    291     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    292         return sk_memset16_SSE2;
    293     } else {
    294         return NULL;
    295     }
    296 }
    297 
    298 SkMemset32Proc SkMemset32GetPlatformProc() {
    299     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    300         return sk_memset32_SSE2;
    301     } else {
    302         return NULL;
    303     }
    304 }
    305 
    306 SkMemcpy32Proc SkMemcpy32GetPlatformProc() {
    307     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    308         return sk_memcpy32_SSE2;
    309     } else {
    310         return NULL;
    311     }
    312 }
    313 
    314 ////////////////////////////////////////////////////////////////////////////////
    315 
    316 SkMorphologyImageFilter::Proc SkMorphologyGetPlatformProc(SkMorphologyProcType type) {
    317     if (!supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    318         return NULL;
    319     }
    320     switch (type) {
    321         case kDilateX_SkMorphologyProcType:
    322             return SkDilateX_SSE2;
    323         case kDilateY_SkMorphologyProcType:
    324             return SkDilateY_SSE2;
    325         case kErodeX_SkMorphologyProcType:
    326             return SkErodeX_SSE2;
    327         case kErodeY_SkMorphologyProcType:
    328             return SkErodeY_SSE2;
    329         default:
    330             return NULL;
    331     }
    332 }
    333 
    334 ////////////////////////////////////////////////////////////////////////////////
    335 
    336 bool SkBoxBlurGetPlatformProcs(SkBoxBlurProc* boxBlurX,
    337                                SkBoxBlurProc* boxBlurY,
    338                                SkBoxBlurProc* boxBlurXY,
    339                                SkBoxBlurProc* boxBlurYX) {
    340 #ifdef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION
    341     return false;
    342 #else
    343     if (!supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    344         return false;
    345     }
    346     return SkBoxBlurGetPlatformProcs_SSE2(boxBlurX, boxBlurY, boxBlurXY, boxBlurYX);
    347 #endif
    348 }
    349 
    350 ////////////////////////////////////////////////////////////////////////////////
    351 
    352 extern SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff& rec,
    353                                                                 SkXfermode::Mode mode);
    354 
    355 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl(const ProcCoeff& rec,
    356                                                     SkXfermode::Mode mode);
    357 
    358 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl(const ProcCoeff& rec,
    359                                                     SkXfermode::Mode mode) {
    360     return NULL;
    361 }
    362 
    363 SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec,
    364                                                SkXfermode::Mode mode);
    365 
    366 SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec,
    367                                                SkXfermode::Mode mode) {
    368     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    369         return SkPlatformXfermodeFactory_impl_SSE2(rec, mode);
    370     } else {
    371         return SkPlatformXfermodeFactory_impl(rec, mode);
    372     }
    373 }
    374 
    375 SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode);
    376 
    377 SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode) {
    378     return NULL;
    379 }
    380