Home | History | Annotate | Download | only in opts
      1 /*
      2  * Copyright 2009 The Android Open Source Project
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #include "SkBitmapProcState_opts_SSE2.h"
      9 #include "SkBitmapProcState_opts_SSSE3.h"
     10 #include "SkBlitMask.h"
     11 #include "SkBlitRow.h"
     12 #include "SkBlitRect_opts_SSE2.h"
     13 #include "SkBlitRow_opts_SSE2.h"
     14 #include "SkUtils_opts_SSE2.h"
     15 #include "SkUtils.h"
     16 
     17 #if defined(_MSC_VER) && defined(_WIN64)
     18 #include <intrin.h>
     19 #endif
     20 
     21 /* This file must *not* be compiled with -msse or -msse2, otherwise
     22    gcc may generate sse2 even for scalar ops (and thus give an invalid
     23    instruction on Pentium3 on the code below).  Only files named *_SSE2.cpp
     24    in this directory should be compiled with -msse2. */
     25 
     26 
     27 #ifdef _MSC_VER
     28 static inline void getcpuid(int info_type, int info[4]) {
     29 #if defined(_WIN64)
     30     __cpuid(info, info_type);
     31 #else
     32     __asm {
     33         mov    eax, [info_type]
     34         cpuid
     35         mov    edi, [info]
     36         mov    [edi], eax
     37         mov    [edi+4], ebx
     38         mov    [edi+8], ecx
     39         mov    [edi+12], edx
     40     }
     41 #endif
     42 }
     43 #else
     44 #if defined(__x86_64__)
     45 static inline void getcpuid(int info_type, int info[4]) {
     46     asm volatile (
     47         "cpuid \n\t"
     48         : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
     49         : "a"(info_type)
     50     );
     51 }
     52 #else
     53 static inline void getcpuid(int info_type, int info[4]) {
     54     // We save and restore ebx, so this code can be compatible with -fPIC
     55     asm volatile (
     56         "pushl %%ebx      \n\t"
     57         "cpuid            \n\t"
     58         "movl %%ebx, %1   \n\t"
     59         "popl %%ebx       \n\t"
     60         : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
     61         : "a"(info_type)
     62     );
     63 }
     64 #endif
     65 #endif
     66 
     67 #if defined(__x86_64__) || defined(_WIN64) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
     68 /* All x86_64 machines have SSE2, or we know it's supported at compile time,  so don't even bother checking. */
     69 static inline bool hasSSE2() {
     70     return true;
     71 }
     72 #else
     73 
     74 static inline bool hasSSE2() {
     75     int cpu_info[4] = { 0 };
     76     getcpuid(1, cpu_info);
     77     return (cpu_info[3] & (1<<26)) != 0;
     78 }
     79 #endif
     80 
     81 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
     82 /* If we know SSSE3 is supported at compile time, don't even bother checking. */
     83 static inline bool hasSSSE3() {
     84     return true;
     85 }
     86 #else
     87 
     88 static inline bool hasSSSE3() {
     89     int cpu_info[4] = { 0 };
     90     getcpuid(1, cpu_info);
     91     return (cpu_info[2] & 0x200) != 0;
     92 }
     93 #endif
     94 
     95 static bool cachedHasSSE2() {
     96     static bool gHasSSE2 = hasSSE2();
     97     return gHasSSE2;
     98 }
     99 
    100 static bool cachedHasSSSE3() {
    101     static bool gHasSSSE3 = hasSSSE3();
    102     return gHasSSSE3;
    103 }
    104 
    105 void SkBitmapProcState::platformProcs() {
    106     if (cachedHasSSSE3()) {
    107 #if !defined(SK_BUILD_FOR_ANDROID)
    108         // Disable SSSE3 optimization for Android x86
    109         if (fSampleProc32 == S32_opaque_D32_filter_DX) {
    110             fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3;
    111         } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
    112             fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3;
    113         }
    114 
    115         if (fSampleProc32 == S32_opaque_D32_filter_DXDY) {
    116             fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3;
    117         } else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) {
    118             fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3;
    119         }
    120 #endif
    121     } else if (cachedHasSSE2()) {
    122         if (fSampleProc32 == S32_opaque_D32_filter_DX) {
    123             fSampleProc32 = S32_opaque_D32_filter_DX_SSE2;
    124         } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
    125             fSampleProc32 = S32_alpha_D32_filter_DX_SSE2;
    126         }
    127 
    128         if (fSampleProc16 == S32_D16_filter_DX) {
    129             fSampleProc16 = S32_D16_filter_DX_SSE2;
    130         }
    131     }
    132 
    133     if (cachedHasSSSE3() || cachedHasSSE2()) {
    134         if (fMatrixProc == ClampX_ClampY_filter_scale) {
    135             fMatrixProc = ClampX_ClampY_filter_scale_SSE2;
    136         } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) {
    137             fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2;
    138         }
    139 
    140         if (fMatrixProc == ClampX_ClampY_filter_affine) {
    141             fMatrixProc = ClampX_ClampY_filter_affine_SSE2;
    142         } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) {
    143             fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2;
    144         }
    145     }
    146 }
    147 
    148 static SkBlitRow::Proc32 platform_32_procs[] = {
    149     NULL,                               // S32_Opaque,
    150     S32_Blend_BlitRow32_SSE2,           // S32_Blend,
    151     S32A_Opaque_BlitRow32_SSE2,         // S32A_Opaque
    152     S32A_Blend_BlitRow32_SSE2,          // S32A_Blend,
    153 };
    154 
    155 SkBlitRow::Proc SkBlitRow::PlatformProcs4444(unsigned flags) {
    156     return NULL;
    157 }
    158 
    159 SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
    160     return NULL;
    161 }
    162 
    163 SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() {
    164     if (cachedHasSSE2()) {
    165         return Color32_SSE2;
    166     } else {
    167         return NULL;
    168     }
    169 }
    170 
    171 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
    172     if (cachedHasSSE2()) {
    173         return platform_32_procs[flags];
    174     } else {
    175         return NULL;
    176     }
    177 }
    178 
    179 
    180 SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig,
    181                                                      SkMask::Format maskFormat,
    182                                                      SkColor color) {
    183     if (SkMask::kA8_Format != maskFormat) {
    184         return NULL;
    185     }
    186 
    187     ColorProc proc = NULL;
    188     if (cachedHasSSE2()) {
    189         switch (dstConfig) {
    190             case SkBitmap::kARGB_8888_Config:
    191                 // The SSE2 version is not (yet) faster for black, so we check
    192                 // for that.
    193                 if (SK_ColorBLACK != color) {
    194                     proc = SkARGB32_A8_BlitMask_SSE2;
    195                 }
    196                 break;
    197             default:
    198                 break;
    199         }
    200     }
    201     return proc;
    202 }
    203 
    204 SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
    205     if (cachedHasSSE2()) {
    206         if (isOpaque) {
    207             return SkBlitLCD16OpaqueRow_SSE2;
    208         } else {
    209             return SkBlitLCD16Row_SSE2;
    210         }
    211     } else {
    212         return NULL;
    213     }
    214 
    215 }
    216 SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig,
    217                                                  SkMask::Format maskFormat,
    218                                                  RowFlags flags) {
    219     return NULL;
    220 }
    221 
    222 SkMemset16Proc SkMemset16GetPlatformProc() {
    223     if (cachedHasSSE2()) {
    224         return sk_memset16_SSE2;
    225     } else {
    226         return NULL;
    227     }
    228 }
    229 
    230 SkMemset32Proc SkMemset32GetPlatformProc() {
    231     if (cachedHasSSE2()) {
    232         return sk_memset32_SSE2;
    233     } else {
    234         return NULL;
    235     }
    236 }
    237 
    238 SkBlitRow::ColorRectProc PlatformColorRectProcFactory(); // suppress warning
    239 
    240 SkBlitRow::ColorRectProc PlatformColorRectProcFactory() {
    241     if (cachedHasSSE2()) {
    242         return ColorRect32_SSE2;
    243     } else {
    244         return NULL;
    245     }
    246 }
    247