Home | History | Annotate | Download | only in opts
      1 /*
      2  * Copyright 2009 The Android Open Source Project
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #include "SkBitmapFilter_opts_SSE2.h"
      9 #include "SkBitmapProcState_opts_SSE2.h"
     10 #include "SkBitmapProcState_opts_SSSE3.h"
     11 #include "SkBitmapScaler.h"
     12 #include "SkBlitMask.h"
     13 #include "SkBlitRow.h"
     14 #include "SkBlitRow_opts_SSE2.h"
     15 #include "SkBlitRow_opts_SSE4.h"
     16 #include "SkOncePtr.h"
     17 #include "SkRTConf.h"
     18 
     19 #if defined(_MSC_VER) && defined(_WIN64)
     20 #include <intrin.h>
     21 #endif
     22 
     23 /* This file must *not* be compiled with -msse or any other optional SIMD
     24    extension, otherwise gcc may generate SIMD instructions even for scalar ops
     25    (and thus give an invalid instruction on Pentium3 on the code below).
     26    For example, only files named *_SSE2.cpp in this directory should be
     27    compiled with -msse2 or higher. */
     28 
     29 
     30 /* Function to get the CPU SSE-level in runtime, for different compilers. */
     31 #ifdef _MSC_VER
     32 static inline void getcpuid(int info_type, int info[4]) {
     33 #if defined(_WIN64)
     34     __cpuid(info, info_type);
     35 #else
     36     __asm {
     37         mov    eax, [info_type]
     38         cpuid
     39         mov    edi, [info]
     40         mov    [edi], eax
     41         mov    [edi+4], ebx
     42         mov    [edi+8], ecx
     43         mov    [edi+12], edx
     44     }
     45 #endif
     46 }
     47 #elif defined(__x86_64__)
     48 static inline void getcpuid(int info_type, int info[4]) {
     49     asm volatile (
     50         "cpuid \n\t"
     51         : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
     52         : "a"(info_type)
     53     );
     54 }
     55 #else
     56 static inline void getcpuid(int info_type, int info[4]) {
     57     // We save and restore ebx, so this code can be compatible with -fPIC
     58     asm volatile (
     59         "pushl %%ebx      \n\t"
     60         "cpuid            \n\t"
     61         "movl %%ebx, %1   \n\t"
     62         "popl %%ebx       \n\t"
     63         : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
     64         : "a"(info_type)
     65     );
     66 }
     67 #endif
     68 
     69 ////////////////////////////////////////////////////////////////////////////////
     70 
     71 /* Fetch the SIMD level directly from the CPU, at run-time.
     72  * Only checks the levels needed by the optimizations in this file.
     73  */
     74 static int* get_SIMD_level() {
     75     int cpu_info[4] = { 0, 0, 0, 0 };
     76     getcpuid(1, cpu_info);
     77 
     78     int* level = new int;
     79 
     80     if ((cpu_info[2] & (1<<20)) != 0) {
     81         *level = SK_CPU_SSE_LEVEL_SSE42;
     82     } else if ((cpu_info[2] & (1<<19)) != 0) {
     83         *level = SK_CPU_SSE_LEVEL_SSE41;
     84     } else if ((cpu_info[2] & (1<<9)) != 0) {
     85         *level = SK_CPU_SSE_LEVEL_SSSE3;
     86     } else if ((cpu_info[3] & (1<<26)) != 0) {
     87         *level = SK_CPU_SSE_LEVEL_SSE2;
     88     } else {
     89         *level = 0;
     90     }
     91     return level;
     92 }
     93 
     94 SK_DECLARE_STATIC_ONCE_PTR(int, gSIMDLevel);
     95 
     96 /* Verify that the requested SIMD level is supported in the build.
     97  * If not, check if the platform supports it.
     98  */
     99 static inline bool supports_simd(int minLevel) {
    100 #if defined(SK_CPU_SSE_LEVEL)
    101     if (minLevel <= SK_CPU_SSE_LEVEL) {
    102         return true;
    103     } else
    104 #endif
    105     {
    106 #if defined(SK_BUILD_FOR_ANDROID_FRAMEWORK)
    107         /* For the Android framework we should always know at compile time if the device
    108          * we are building for supports SSSE3.  The one exception to this rule is on the
    109          * emulator where we are compiled without the -mssse3 option (so we have no
    110          * SSSE3 procs) but can be run on a host machine that supports SSSE3
    111          * instructions. So for that particular case we disable our SSSE3 options.
    112          */
    113         return false;
    114 #else
    115         return minLevel <= *gSIMDLevel.get(get_SIMD_level);
    116 #endif
    117     }
    118 }
    119 
    120 ////////////////////////////////////////////////////////////////////////////////
    121 
    122 void SkBitmapScaler::PlatformConvolutionProcs(SkConvolutionProcs* procs) {
    123     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    124         procs->fExtraHorizontalReads = 3;
    125         procs->fConvolveVertically = &convolveVertically_SSE2;
    126         procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2;
    127         procs->fConvolveHorizontally = &convolveHorizontally_SSE2;
    128         procs->fApplySIMDPadding = &applySIMDPadding_SSE2;
    129     }
    130 }
    131 
    132 ////////////////////////////////////////////////////////////////////////////////
    133 
    134 void SkBitmapProcState::platformProcs() {
    135     /* Every optimization in the function requires at least SSE2 */
    136     if (!supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    137         return;
    138     }
    139     const bool ssse3 = supports_simd(SK_CPU_SSE_LEVEL_SSSE3);
    140 
    141     /* Check fSampleProc32 */
    142     if (fSampleProc32 == S32_opaque_D32_filter_DX) {
    143         if (ssse3) {
    144             fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3;
    145         } else {
    146             fSampleProc32 = S32_opaque_D32_filter_DX_SSE2;
    147         }
    148     } else if (fSampleProc32 == S32_opaque_D32_filter_DXDY) {
    149         if (ssse3) {
    150             fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3;
    151         }
    152     } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
    153         if (ssse3) {
    154             fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3;
    155         } else {
    156             fSampleProc32 = S32_alpha_D32_filter_DX_SSE2;
    157         }
    158     } else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) {
    159         if (ssse3) {
    160             fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3;
    161         }
    162     }
    163 
    164     /* Check fMatrixProc */
    165     if (fMatrixProc == ClampX_ClampY_filter_scale) {
    166         fMatrixProc = ClampX_ClampY_filter_scale_SSE2;
    167     } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) {
    168         fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2;
    169     } else if (fMatrixProc == ClampX_ClampY_filter_affine) {
    170         fMatrixProc = ClampX_ClampY_filter_affine_SSE2;
    171     } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) {
    172         fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2;
    173     }
    174 }
    175 
    176 ////////////////////////////////////////////////////////////////////////////////
    177 
    178 static const SkBlitRow::Proc16 platform_16_procs[] = {
    179     S32_D565_Opaque_SSE2,               // S32_D565_Opaque
    180     nullptr,                               // S32_D565_Blend
    181     S32A_D565_Opaque_SSE2,              // S32A_D565_Opaque
    182     nullptr,                               // S32A_D565_Blend
    183     S32_D565_Opaque_Dither_SSE2,        // S32_D565_Opaque_Dither
    184     nullptr,                               // S32_D565_Blend_Dither
    185     S32A_D565_Opaque_Dither_SSE2,       // S32A_D565_Opaque_Dither
    186     nullptr,                               // S32A_D565_Blend_Dither
    187 };
    188 
    189 SkBlitRow::Proc16 SkBlitRow::PlatformFactory565(unsigned flags) {
    190     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    191         return platform_16_procs[flags];
    192     } else {
    193         return nullptr;
    194     }
    195 }
    196 
    197 static const SkBlitRow::ColorProc16 platform_565_colorprocs_SSE2[] = {
    198     Color32A_D565_SSE2,                 // Color32A_D565,
    199     nullptr,                               // Color32A_D565_Dither
    200 };
    201 
    202 SkBlitRow::ColorProc16 SkBlitRow::PlatformColorFactory565(unsigned flags) {
    203 /* If you're thinking about writing an SSE4 version of this, do check it's
    204  * actually faster on Atom. Our original SSE4 version was slower than this
    205  * SSE2 version on Silvermont, and only marginally faster on a Core i7,
    206  * mainly due to the MULLD timings.
    207  */
    208     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    209         return platform_565_colorprocs_SSE2[flags];
    210     } else {
    211         return nullptr;
    212     }
    213 }
    214 
    215 static const SkBlitRow::Proc32 platform_32_procs_SSE2[] = {
    216     nullptr,                               // S32_Opaque,
    217     S32_Blend_BlitRow32_SSE2,           // S32_Blend,
    218     S32A_Opaque_BlitRow32_SSE2,         // S32A_Opaque
    219     S32A_Blend_BlitRow32_SSE2,          // S32A_Blend,
    220 };
    221 
    222 static const SkBlitRow::Proc32 platform_32_procs_SSE4[] = {
    223     nullptr,                               // S32_Opaque,
    224     S32_Blend_BlitRow32_SSE2,           // S32_Blend,
    225     S32A_Opaque_BlitRow32_SSE4,         // S32A_Opaque
    226     S32A_Blend_BlitRow32_SSE2,          // S32A_Blend,
    227 };
    228 
    229 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
    230     if (supports_simd(SK_CPU_SSE_LEVEL_SSE41)) {
    231         return platform_32_procs_SSE4[flags];
    232     } else
    233     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    234         return platform_32_procs_SSE2[flags];
    235     } else {
    236         return nullptr;
    237     }
    238 }
    239 
    240 ////////////////////////////////////////////////////////////////////////////////
    241 
    242 SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
    243     if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) {
    244         if (isOpaque) {
    245             return SkBlitLCD16OpaqueRow_SSE2;
    246         } else {
    247             return SkBlitLCD16Row_SSE2;
    248         }
    249     } else {
    250         return nullptr;
    251     }
    252 
    253 }
    254 
    255 SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkColorType, SkMask::Format, RowFlags) {
    256     return nullptr;
    257 }
    258