Home | History | Annotate | Download | only in opts
      1 /*
      2  * Copyright 2009 The Android Open Source Project
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #include "SkBitmapProcState_opts_SSE2.h"
      9 #include "SkBitmapProcState_opts_SSSE3.h"
     10 #include "SkBitmapFilter_opts_SSE2.h"
     11 #include "SkBlitMask.h"
     12 #include "SkBlitRow.h"
     13 #include "SkBlitRect_opts_SSE2.h"
     14 #include "SkBlitRow_opts_SSE2.h"
     15 #include "SkBlurImage_opts_SSE2.h"
     16 #include "SkUtils_opts_SSE2.h"
     17 #include "SkUtils.h"
     18 #include "SkMorphology_opts.h"
     19 #include "SkMorphology_opts_SSE2.h"
     20 
     21 #include "SkRTConf.h"
     22 
     23 #if defined(_MSC_VER) && defined(_WIN64)
     24 #include <intrin.h>
     25 #endif
     26 
     27 /* This file must *not* be compiled with -msse or -msse2, otherwise
     28    gcc may generate sse2 even for scalar ops (and thus give an invalid
     29    instruction on Pentium3 on the code below).  Only files named *_SSE2.cpp
     30    in this directory should be compiled with -msse2. */
     31 
     32 
     33 #ifdef _MSC_VER
     34 static inline void getcpuid(int info_type, int info[4]) {
     35 #if defined(_WIN64)
     36     __cpuid(info, info_type);
     37 #else
     38     __asm {
     39         mov    eax, [info_type]
     40         cpuid
     41         mov    edi, [info]
     42         mov    [edi], eax
     43         mov    [edi+4], ebx
     44         mov    [edi+8], ecx
     45         mov    [edi+12], edx
     46     }
     47 #endif
     48 }
     49 #else
     50 #if defined(__x86_64__)
     51 static inline void getcpuid(int info_type, int info[4]) {
     52     asm volatile (
     53         "cpuid \n\t"
     54         : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
     55         : "a"(info_type)
     56     );
     57 }
     58 #else
     59 static inline void getcpuid(int info_type, int info[4]) {
     60     // We save and restore ebx, so this code can be compatible with -fPIC
     61     asm volatile (
     62         "pushl %%ebx      \n\t"
     63         "cpuid            \n\t"
     64         "movl %%ebx, %1   \n\t"
     65         "popl %%ebx       \n\t"
     66         : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
     67         : "a"(info_type)
     68     );
     69 }
     70 #endif
     71 #endif
     72 
     73 #if defined(__x86_64__) || defined(_WIN64) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
     74 /* All x86_64 machines have SSE2, or we know it's supported at compile time,  so don't even bother checking. */
     75 static inline bool hasSSE2() {
     76     return true;
     77 }
     78 #else
     79 
     80 static inline bool hasSSE2() {
     81     int cpu_info[4] = { 0 };
     82     getcpuid(1, cpu_info);
     83     return (cpu_info[3] & (1<<26)) != 0;
     84 }
     85 #endif
     86 
     87 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
     88 /* If we know SSSE3 is supported at compile time, don't even bother checking. */
     89 static inline bool hasSSSE3() {
     90     return true;
     91 }
     92 #else
     93 
     94 static inline bool hasSSSE3() {
     95     int cpu_info[4] = { 0 };
     96     getcpuid(1, cpu_info);
     97     return (cpu_info[2] & 0x200) != 0;
     98 }
     99 #endif
    100 
    101 static bool cachedHasSSE2() {
    102     static bool gHasSSE2 = hasSSE2();
    103     return gHasSSE2;
    104 }
    105 
    106 static bool cachedHasSSSE3() {
    107     static bool gHasSSSE3 = hasSSSE3();
    108     return gHasSSSE3;
    109 }
    110 
    111 SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", false, "Use SSE optimized version of high quality image filters");
    112 
    113 void SkBitmapProcState::platformConvolutionProcs(SkConvolutionProcs* procs) {
    114     if (cachedHasSSE2()) {
    115         procs->fExtraHorizontalReads = 3;
    116         procs->fConvolveVertically = &convolveVertically_SSE2;
    117         procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2;
    118         procs->fConvolveHorizontally = &convolveHorizontally_SSE2;
    119         procs->fApplySIMDPadding = &applySIMDPadding_SSE2;
    120     }
    121 }
    122 
    123 void SkBitmapProcState::platformProcs() {
    124     if (cachedHasSSSE3()) {
    125         if (fSampleProc32 == S32_opaque_D32_filter_DX) {
    126             fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3;
    127         } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
    128             fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3;
    129         }
    130 
    131         if (fSampleProc32 == S32_opaque_D32_filter_DXDY) {
    132             fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3;
    133         } else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) {
    134             fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3;
    135         }
    136     } else if (cachedHasSSE2()) {
    137         if (fSampleProc32 == S32_opaque_D32_filter_DX) {
    138             fSampleProc32 = S32_opaque_D32_filter_DX_SSE2;
    139         } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
    140             fSampleProc32 = S32_alpha_D32_filter_DX_SSE2;
    141         }
    142 
    143         if (fSampleProc16 == S32_D16_filter_DX) {
    144             fSampleProc16 = S32_D16_filter_DX_SSE2;
    145         }
    146     }
    147 
    148     if (cachedHasSSSE3() || cachedHasSSE2()) {
    149         if (fMatrixProc == ClampX_ClampY_filter_scale) {
    150             fMatrixProc = ClampX_ClampY_filter_scale_SSE2;
    151         } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) {
    152             fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2;
    153         }
    154 
    155         if (fMatrixProc == ClampX_ClampY_filter_affine) {
    156             fMatrixProc = ClampX_ClampY_filter_affine_SSE2;
    157         } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) {
    158             fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2;
    159         }
    160         if (c_hqfilter_sse) {
    161             if (fShaderProc32 == highQualityFilter32) {
    162                 fShaderProc32 = highQualityFilter_SSE2;
    163             }
    164         }
    165     }
    166 }
    167 
    168 static SkBlitRow::Proc32 platform_32_procs[] = {
    169     NULL,                               // S32_Opaque,
    170     S32_Blend_BlitRow32_SSE2,           // S32_Blend,
    171     S32A_Opaque_BlitRow32_SSE2,         // S32A_Opaque
    172     S32A_Blend_BlitRow32_SSE2,          // S32A_Blend,
    173 };
    174 
    175 SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
    176     return NULL;
    177 }
    178 
    179 SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() {
    180     if (cachedHasSSE2()) {
    181         return Color32_SSE2;
    182     } else {
    183         return NULL;
    184     }
    185 }
    186 
    187 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
    188     if (cachedHasSSE2()) {
    189         return platform_32_procs[flags];
    190     } else {
    191         return NULL;
    192     }
    193 }
    194 
    195 
    196 SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig,
    197                                                      SkMask::Format maskFormat,
    198                                                      SkColor color) {
    199     if (SkMask::kA8_Format != maskFormat) {
    200         return NULL;
    201     }
    202 
    203     ColorProc proc = NULL;
    204     if (cachedHasSSE2()) {
    205         switch (dstConfig) {
    206             case SkBitmap::kARGB_8888_Config:
    207                 // The SSE2 version is not (yet) faster for black, so we check
    208                 // for that.
    209                 if (SK_ColorBLACK != color) {
    210                     proc = SkARGB32_A8_BlitMask_SSE2;
    211                 }
    212                 break;
    213             default:
    214                 break;
    215         }
    216     }
    217     return proc;
    218 }
    219 
    220 SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
    221     if (cachedHasSSE2()) {
    222         if (isOpaque) {
    223             return SkBlitLCD16OpaqueRow_SSE2;
    224         } else {
    225             return SkBlitLCD16Row_SSE2;
    226         }
    227     } else {
    228         return NULL;
    229     }
    230 
    231 }
    232 SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig,
    233                                                  SkMask::Format maskFormat,
    234                                                  RowFlags flags) {
    235     return NULL;
    236 }
    237 
    238 SkMemset16Proc SkMemset16GetPlatformProc() {
    239     if (cachedHasSSE2()) {
    240         return sk_memset16_SSE2;
    241     } else {
    242         return NULL;
    243     }
    244 }
    245 
    246 SkMemset32Proc SkMemset32GetPlatformProc() {
    247     if (cachedHasSSE2()) {
    248         return sk_memset32_SSE2;
    249     } else {
    250         return NULL;
    251     }
    252 }
    253 
    254 SkMorphologyProc SkMorphologyGetPlatformProc(SkMorphologyProcType type) {
    255     if (!cachedHasSSE2()) {
    256         return NULL;
    257     }
    258     switch (type) {
    259         case kDilateX_SkMorphologyProcType:
    260             return SkDilateX_SSE2;
    261         case kDilateY_SkMorphologyProcType:
    262             return SkDilateY_SSE2;
    263         case kErodeX_SkMorphologyProcType:
    264             return SkErodeX_SSE2;
    265         case kErodeY_SkMorphologyProcType:
    266             return SkErodeY_SSE2;
    267         default:
    268             return NULL;
    269     }
    270 }
    271 
    272 bool SkBoxBlurGetPlatformProcs(SkBoxBlurProc* boxBlurX,
    273                                SkBoxBlurProc* boxBlurY,
    274                                SkBoxBlurProc* boxBlurXY,
    275                                SkBoxBlurProc* boxBlurYX) {
    276 #ifdef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION
    277     return false;
    278 #else
    279     if (!cachedHasSSE2()) {
    280         return false;
    281     }
    282     return SkBoxBlurGetPlatformProcs_SSE2(boxBlurX, boxBlurY, boxBlurXY, boxBlurYX);
    283 #endif
    284 }
    285 
    286 SkBlitRow::ColorRectProc PlatformColorRectProcFactory(); // suppress warning
    287 
    288 SkBlitRow::ColorRectProc PlatformColorRectProcFactory() {
    289     if (cachedHasSSE2()) {
    290         return ColorRect32_SSE2;
    291     } else {
    292         return NULL;
    293     }
    294 }
    295