Home | History | Annotate | Download | only in opts
      1 /*
      2  * Copyright 2009 The Android Open Source Project
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #include "SkBitmapProcState_opts_SSE2.h"
      9 #include "SkBitmapProcState_opts_SSSE3.h"
     10 #include "SkBitmapFilter_opts_SSE2.h"
     11 #include "SkBlitMask.h"
     12 #include "SkBlitRow.h"
     13 #include "SkBlitRect_opts_SSE2.h"
     14 #include "SkBlitRow_opts_SSE2.h"
     15 #include "SkUtils_opts_SSE2.h"
     16 #include "SkUtils.h"
     17 
     18 #include "SkRTConf.h"
     19 
     20 #if defined(_MSC_VER) && defined(_WIN64)
     21 #include <intrin.h>
     22 #endif
     23 
     24 /* This file must *not* be compiled with -msse or -msse2, otherwise
     25    gcc may generate sse2 even for scalar ops (and thus give an invalid
     26    instruction on Pentium3 on the code below).  Only files named *_SSE2.cpp
     27    in this directory should be compiled with -msse2. */
     28 
     29 
     30 #ifdef _MSC_VER
     31 static inline void getcpuid(int info_type, int info[4]) {
     32 #if defined(_WIN64)
     33     __cpuid(info, info_type);
     34 #else
     35     __asm {
     36         mov    eax, [info_type]
     37         cpuid
     38         mov    edi, [info]
     39         mov    [edi], eax
     40         mov    [edi+4], ebx
     41         mov    [edi+8], ecx
     42         mov    [edi+12], edx
     43     }
     44 #endif
     45 }
     46 #else
     47 #if defined(__x86_64__)
     48 static inline void getcpuid(int info_type, int info[4]) {
     49     asm volatile (
     50         "cpuid \n\t"
     51         : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
     52         : "a"(info_type)
     53     );
     54 }
     55 #else
     56 static inline void getcpuid(int info_type, int info[4]) {
     57     // We save and restore ebx, so this code can be compatible with -fPIC
     58     asm volatile (
     59         "pushl %%ebx      \n\t"
     60         "cpuid            \n\t"
     61         "movl %%ebx, %1   \n\t"
     62         "popl %%ebx       \n\t"
     63         : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
     64         : "a"(info_type)
     65     );
     66 }
     67 #endif
     68 #endif
     69 
     70 #if defined(__x86_64__) || defined(_WIN64) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
     71 /* All x86_64 machines have SSE2, or we know it's supported at compile time,  so don't even bother checking. */
     72 static inline bool hasSSE2() {
     73     return true;
     74 }
     75 #else
     76 
     77 static inline bool hasSSE2() {
     78     int cpu_info[4] = { 0 };
     79     getcpuid(1, cpu_info);
     80     return (cpu_info[3] & (1<<26)) != 0;
     81 }
     82 #endif
     83 
     84 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
     85 /* If we know SSSE3 is supported at compile time, don't even bother checking. */
     86 static inline bool hasSSSE3() {
     87     return true;
     88 }
     89 #else
     90 
     91 static inline bool hasSSSE3() {
     92     int cpu_info[4] = { 0 };
     93     getcpuid(1, cpu_info);
     94     return (cpu_info[2] & 0x200) != 0;
     95 }
     96 #endif
     97 
     98 static bool cachedHasSSE2() {
     99     static bool gHasSSE2 = hasSSE2();
    100     return gHasSSE2;
    101 }
    102 
    103 static bool cachedHasSSSE3() {
    104     static bool gHasSSSE3 = hasSSSE3();
    105     return gHasSSSE3;
    106 }
    107 
    108 SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", false, "Use SSE optimized version of high quality image filters");
    109 
    110 void SkBitmapProcState::platformConvolutionProcs() {
    111     if (cachedHasSSE2()) {
    112         fConvolutionProcs->fExtraHorizontalReads = 3;
    113         fConvolutionProcs->fConvolveVertically = &convolveVertically_SSE2;
    114         fConvolutionProcs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2;
    115         fConvolutionProcs->fConvolveHorizontally = &convolveHorizontally_SSE2;
    116         fConvolutionProcs->fApplySIMDPadding = &applySIMDPadding_SSE2;
    117     }
    118 }
    119 
    120 void SkBitmapProcState::platformProcs() {
    121     if (cachedHasSSSE3()) {
    122         if (fSampleProc32 == S32_opaque_D32_filter_DX) {
    123             fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3;
    124         } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
    125             fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3;
    126         }
    127 
    128         if (fSampleProc32 == S32_opaque_D32_filter_DXDY) {
    129             fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3;
    130         } else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) {
    131             fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3;
    132         }
    133     } else if (cachedHasSSE2()) {
    134         if (fSampleProc32 == S32_opaque_D32_filter_DX) {
    135             fSampleProc32 = S32_opaque_D32_filter_DX_SSE2;
    136         } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
    137             fSampleProc32 = S32_alpha_D32_filter_DX_SSE2;
    138         }
    139 
    140         if (fSampleProc16 == S32_D16_filter_DX) {
    141             fSampleProc16 = S32_D16_filter_DX_SSE2;
    142         }
    143     }
    144 
    145     if (cachedHasSSSE3() || cachedHasSSE2()) {
    146         if (fMatrixProc == ClampX_ClampY_filter_scale) {
    147             fMatrixProc = ClampX_ClampY_filter_scale_SSE2;
    148         } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) {
    149             fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2;
    150         }
    151 
    152         if (fMatrixProc == ClampX_ClampY_filter_affine) {
    153             fMatrixProc = ClampX_ClampY_filter_affine_SSE2;
    154         } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) {
    155             fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2;
    156         }
    157         if (c_hqfilter_sse) {
    158             if (fShaderProc32 == highQualityFilter) {
    159                 fShaderProc32 = highQualityFilter_SSE2;
    160             }
    161         }
    162     }
    163 }
    164 
    165 static SkBlitRow::Proc32 platform_32_procs[] = {
    166     NULL,                               // S32_Opaque,
    167     S32_Blend_BlitRow32_SSE2,           // S32_Blend,
    168     S32A_Opaque_BlitRow32_SSE2,         // S32A_Opaque
    169     S32A_Blend_BlitRow32_SSE2,          // S32A_Blend,
    170 };
    171 
    172 SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
    173     return NULL;
    174 }
    175 
    176 SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() {
    177     if (cachedHasSSE2()) {
    178         return Color32_SSE2;
    179     } else {
    180         return NULL;
    181     }
    182 }
    183 
    184 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
    185     if (cachedHasSSE2()) {
    186         return platform_32_procs[flags];
    187     } else {
    188         return NULL;
    189     }
    190 }
    191 
    192 
    193 SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig,
    194                                                      SkMask::Format maskFormat,
    195                                                      SkColor color) {
    196     if (SkMask::kA8_Format != maskFormat) {
    197         return NULL;
    198     }
    199 
    200     ColorProc proc = NULL;
    201     if (cachedHasSSE2()) {
    202         switch (dstConfig) {
    203             case SkBitmap::kARGB_8888_Config:
    204                 // The SSE2 version is not (yet) faster for black, so we check
    205                 // for that.
    206                 if (SK_ColorBLACK != color) {
    207                     proc = SkARGB32_A8_BlitMask_SSE2;
    208                 }
    209                 break;
    210             default:
    211                 break;
    212         }
    213     }
    214     return proc;
    215 }
    216 
    217 SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
    218     if (cachedHasSSE2()) {
    219         if (isOpaque) {
    220             return SkBlitLCD16OpaqueRow_SSE2;
    221         } else {
    222             return SkBlitLCD16Row_SSE2;
    223         }
    224     } else {
    225         return NULL;
    226     }
    227 
    228 }
    229 SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig,
    230                                                  SkMask::Format maskFormat,
    231                                                  RowFlags flags) {
    232     return NULL;
    233 }
    234 
    235 SkMemset16Proc SkMemset16GetPlatformProc() {
    236     if (cachedHasSSE2()) {
    237         return sk_memset16_SSE2;
    238     } else {
    239         return NULL;
    240     }
    241 }
    242 
    243 SkMemset32Proc SkMemset32GetPlatformProc() {
    244     if (cachedHasSSE2()) {
    245         return sk_memset32_SSE2;
    246     } else {
    247         return NULL;
    248     }
    249 }
    250 
    251 SkBlitRow::ColorRectProc PlatformColorRectProcFactory(); // suppress warning
    252 
    253 SkBlitRow::ColorRectProc PlatformColorRectProcFactory() {
    254     if (cachedHasSSE2()) {
    255         return ColorRect32_SSE2;
    256     } else {
    257         return NULL;
    258     }
    259 }
    260