Home | History | Annotate | Download | only in opts
      1 /*
      2  * Copyright 2009 The Android Open Source Project
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #include "SkBitmapProcState_opts_SSE2.h"
      9 #include "SkBitmapProcState_opts_SSSE3.h"
     10 #include "SkBlitMask.h"
     11 #include "SkBlitRow_opts_SSE2.h"
     12 #include "SkUtils_opts_SSE2.h"
     13 #include "SkUtils.h"
     14 
     15 /* This file must *not* be compiled with -msse or -msse2, otherwise
     16    gcc may generate sse2 even for scalar ops (and thus give an invalid
     17    instruction on Pentium3 on the code below).  Only files named *_SSE2.cpp
     18    in this directory should be compiled with -msse2. */
     19 
     20 
     21 #ifdef _MSC_VER
     22 static inline void getcpuid(int info_type, int info[4]) {
     23     __asm {
     24         mov    eax, [info_type]
     25         cpuid
     26         mov    edi, [info]
     27         mov    [edi], eax
     28         mov    [edi+4], ebx
     29         mov    [edi+8], ecx
     30         mov    [edi+12], edx
     31     }
     32 }
     33 #else
     34 #if defined(__x86_64__)
     35 static inline void getcpuid(int info_type, int info[4]) {
     36     asm volatile (
     37         "cpuid \n\t"
     38         : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3])
     39         : "a"(info_type)
     40     );
     41 }
     42 #else
     43 static inline void getcpuid(int info_type, int info[4]) {
     44     // We save and restore ebx, so this code can be compatible with -fPIC
     45     asm volatile (
     46         "pushl %%ebx      \n\t"
     47         "cpuid            \n\t"
     48         "movl %%ebx, %1   \n\t"
     49         "popl %%ebx       \n\t"
     50         : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3])
     51         : "a"(info_type)
     52     );
     53 }
     54 #endif
     55 #endif
     56 
     57 #if defined(__x86_64__) || defined(_WIN64)
     58 /* All x86_64 machines have SSE2, so don't even bother checking. */
     59 static inline bool hasSSE2() {
     60     return true;
     61 }
     62 #else
     63 
     64 static inline bool hasSSE2() {
     65     int cpu_info[4] = { 0 };
     66     getcpuid(1, cpu_info);
     67     return (cpu_info[3] & (1<<26)) != 0;
     68 }
     69 #endif
     70 
     71 static inline bool hasSSSE3() {
     72     int cpu_info[4] = { 0 };
     73     getcpuid(1, cpu_info);
     74     return (cpu_info[2] & 0x200) != 0;
     75 }
     76 
     77 static bool cachedHasSSE2() {
     78     static bool gHasSSE2 = hasSSE2();
     79     return gHasSSE2;
     80 }
     81 
     82 static bool cachedHasSSSE3() {
     83     static bool gHasSSSE3 = hasSSSE3();
     84     return gHasSSSE3;
     85 }
     86 
     87 void SkBitmapProcState::platformProcs() {
     88     if (cachedHasSSSE3()) {
     89         if (fSampleProc32 == S32_opaque_D32_filter_DX) {
     90             fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3;
     91         } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
     92             fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3;
     93         }
     94     } else if (cachedHasSSE2()) {
     95         if (fSampleProc32 == S32_opaque_D32_filter_DX) {
     96             fSampleProc32 = S32_opaque_D32_filter_DX_SSE2;
     97         } else if (fSampleProc32 == S32_alpha_D32_filter_DX) {
     98             fSampleProc32 = S32_alpha_D32_filter_DX_SSE2;
     99         }
    100     }
    101 
    102     if (cachedHasSSSE3() || cachedHasSSE2()) {
    103         if (fMatrixProc == ClampX_ClampY_filter_scale) {
    104             fMatrixProc = ClampX_ClampY_filter_scale_SSE2;
    105         } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) {
    106             fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2;
    107         }
    108 
    109         if (fMatrixProc == ClampX_ClampY_filter_affine) {
    110             fMatrixProc = ClampX_ClampY_filter_affine_SSE2;
    111         } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) {
    112             fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2;
    113         }
    114     }
    115 }
    116 
    117 static SkBlitRow::Proc32 platform_32_procs[] = {
    118     NULL,                               // S32_Opaque,
    119     S32_Blend_BlitRow32_SSE2,           // S32_Blend,
    120     S32A_Opaque_BlitRow32_SSE2,         // S32A_Opaque
    121     S32A_Blend_BlitRow32_SSE2,          // S32A_Blend,
    122 };
    123 
    124 SkBlitRow::Proc SkBlitRow::PlatformProcs4444(unsigned flags) {
    125     return NULL;
    126 }
    127 
    128 SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) {
    129     return NULL;
    130 }
    131 
    132 SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() {
    133     if (cachedHasSSE2()) {
    134         return Color32_SSE2;
    135     } else {
    136         return NULL;
    137     }
    138 }
    139 
    140 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) {
    141     if (cachedHasSSE2()) {
    142         return platform_32_procs[flags];
    143     } else {
    144         return NULL;
    145     }
    146 }
    147 
    148 
    149 SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig,
    150                                                      SkMask::Format maskFormat,
    151                                                      SkColor color) {
    152     if (SkMask::kA8_Format != maskFormat) {
    153         return NULL;
    154     }
    155 
    156     ColorProc proc = NULL;
    157     if (cachedHasSSE2()) {
    158         switch (dstConfig) {
    159             case SkBitmap::kARGB_8888_Config:
    160                 // The SSE2 version is not (yet) faster for black, so we check
    161                 // for that.
    162                 if (SK_ColorBLACK != color) {
    163                     proc = SkARGB32_A8_BlitMask_SSE2;
    164                 }
    165                 break;
    166             default:
    167                 break;
    168         }
    169     }
    170     return proc;
    171 }
    172 
    173 SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) {
    174     if (cachedHasSSE2()) {
    175         if (isOpaque) {
    176             return SkBlitLCD16OpaqueRow_SSE2;
    177         } else {
    178             return SkBlitLCD16Row_SSE2;
    179         }
    180     } else {
    181         return NULL;
    182     }
    183 
    184 }
    185 SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig,
    186                                                  SkMask::Format maskFormat,
    187                                                  RowFlags flags) {
    188     return NULL;
    189 }
    190 
    191 SkMemset16Proc SkMemset16GetPlatformProc() {
    192     if (cachedHasSSE2()) {
    193         return sk_memset16_SSE2;
    194     } else {
    195         return NULL;
    196     }
    197 }
    198 
    199 SkMemset32Proc SkMemset32GetPlatformProc() {
    200     if (cachedHasSSE2()) {
    201         return sk_memset32_SSE2;
    202     } else {
    203         return NULL;
    204     }
    205 }
    206