1 /* 2 * Copyright 2009 The Android Open Source Project 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #include "SkBitmapProcState_opts_SSE2.h" 9 #include "SkBitmapProcState_opts_SSSE3.h" 10 #include "SkBitmapFilter_opts_SSE2.h" 11 #include "SkBlitMask.h" 12 #include "SkBlitRow.h" 13 #include "SkBlitRect_opts_SSE2.h" 14 #include "SkBlitRow_opts_SSE2.h" 15 #include "SkUtils_opts_SSE2.h" 16 #include "SkUtils.h" 17 18 #include "SkRTConf.h" 19 20 #if defined(_MSC_VER) && defined(_WIN64) 21 #include <intrin.h> 22 #endif 23 24 /* This file must *not* be compiled with -msse or -msse2, otherwise 25 gcc may generate sse2 even for scalar ops (and thus give an invalid 26 instruction on Pentium3 on the code below). Only files named *_SSE2.cpp 27 in this directory should be compiled with -msse2. */ 28 29 30 #ifdef _MSC_VER 31 static inline void getcpuid(int info_type, int info[4]) { 32 #if defined(_WIN64) 33 __cpuid(info, info_type); 34 #else 35 __asm { 36 mov eax, [info_type] 37 cpuid 38 mov edi, [info] 39 mov [edi], eax 40 mov [edi+4], ebx 41 mov [edi+8], ecx 42 mov [edi+12], edx 43 } 44 #endif 45 } 46 #else 47 #if defined(__x86_64__) 48 static inline void getcpuid(int info_type, int info[4]) { 49 asm volatile ( 50 "cpuid \n\t" 51 : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) 52 : "a"(info_type) 53 ); 54 } 55 #else 56 static inline void getcpuid(int info_type, int info[4]) { 57 // We save and restore ebx, so this code can be compatible with -fPIC 58 asm volatile ( 59 "pushl %%ebx \n\t" 60 "cpuid \n\t" 61 "movl %%ebx, %1 \n\t" 62 "popl %%ebx \n\t" 63 : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3]) 64 : "a"(info_type) 65 ); 66 } 67 #endif 68 #endif 69 70 #if defined(__x86_64__) || defined(_WIN64) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 71 /* All x86_64 machines have SSE2, or we know it's supported at compile time, so don't even bother checking. */ 72 static inline bool hasSSE2() { 73 return true; 74 } 75 #else 76 77 static inline bool hasSSE2() { 78 int cpu_info[4] = { 0 }; 79 getcpuid(1, cpu_info); 80 return (cpu_info[3] & (1<<26)) != 0; 81 } 82 #endif 83 84 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 85 /* If we know SSSE3 is supported at compile time, don't even bother checking. */ 86 static inline bool hasSSSE3() { 87 return true; 88 } 89 #else 90 91 static inline bool hasSSSE3() { 92 int cpu_info[4] = { 0 }; 93 getcpuid(1, cpu_info); 94 return (cpu_info[2] & 0x200) != 0; 95 } 96 #endif 97 98 static bool cachedHasSSE2() { 99 static bool gHasSSE2 = hasSSE2(); 100 return gHasSSE2; 101 } 102 103 static bool cachedHasSSSE3() { 104 static bool gHasSSSE3 = hasSSSE3(); 105 return gHasSSSE3; 106 } 107 108 SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", false, "Use SSE optimized version of high quality image filters"); 109 110 void SkBitmapProcState::platformConvolutionProcs() { 111 if (cachedHasSSE2()) { 112 fConvolutionProcs->fExtraHorizontalReads = 3; 113 fConvolutionProcs->fConvolveVertically = &convolveVertically_SSE2; 114 fConvolutionProcs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2; 115 fConvolutionProcs->fConvolveHorizontally = &convolveHorizontally_SSE2; 116 fConvolutionProcs->fApplySIMDPadding = &applySIMDPadding_SSE2; 117 } 118 } 119 120 void SkBitmapProcState::platformProcs() { 121 if (cachedHasSSSE3()) { 122 if (fSampleProc32 == S32_opaque_D32_filter_DX) { 123 fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3; 124 } else if (fSampleProc32 == S32_alpha_D32_filter_DX) { 125 fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3; 126 } 127 128 if (fSampleProc32 == S32_opaque_D32_filter_DXDY) { 129 fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3; 130 } else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) { 131 fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3; 132 } 133 } else if (cachedHasSSE2()) { 134 if (fSampleProc32 == S32_opaque_D32_filter_DX) { 135 fSampleProc32 = S32_opaque_D32_filter_DX_SSE2; 136 } else if (fSampleProc32 == S32_alpha_D32_filter_DX) { 137 fSampleProc32 = S32_alpha_D32_filter_DX_SSE2; 138 } 139 140 if (fSampleProc16 == S32_D16_filter_DX) { 141 fSampleProc16 = S32_D16_filter_DX_SSE2; 142 } 143 } 144 145 if (cachedHasSSSE3() || cachedHasSSE2()) { 146 if (fMatrixProc == ClampX_ClampY_filter_scale) { 147 fMatrixProc = ClampX_ClampY_filter_scale_SSE2; 148 } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) { 149 fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2; 150 } 151 152 if (fMatrixProc == ClampX_ClampY_filter_affine) { 153 fMatrixProc = ClampX_ClampY_filter_affine_SSE2; 154 } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) { 155 fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2; 156 } 157 if (c_hqfilter_sse) { 158 if (fShaderProc32 == highQualityFilter) { 159 fShaderProc32 = highQualityFilter_SSE2; 160 } 161 } 162 } 163 } 164 165 static SkBlitRow::Proc32 platform_32_procs[] = { 166 NULL, // S32_Opaque, 167 S32_Blend_BlitRow32_SSE2, // S32_Blend, 168 S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque 169 S32A_Blend_BlitRow32_SSE2, // S32A_Blend, 170 }; 171 172 SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) { 173 return NULL; 174 } 175 176 SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() { 177 if (cachedHasSSE2()) { 178 return Color32_SSE2; 179 } else { 180 return NULL; 181 } 182 } 183 184 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { 185 if (cachedHasSSE2()) { 186 return platform_32_procs[flags]; 187 } else { 188 return NULL; 189 } 190 } 191 192 193 SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig, 194 SkMask::Format maskFormat, 195 SkColor color) { 196 if (SkMask::kA8_Format != maskFormat) { 197 return NULL; 198 } 199 200 ColorProc proc = NULL; 201 if (cachedHasSSE2()) { 202 switch (dstConfig) { 203 case SkBitmap::kARGB_8888_Config: 204 // The SSE2 version is not (yet) faster for black, so we check 205 // for that. 206 if (SK_ColorBLACK != color) { 207 proc = SkARGB32_A8_BlitMask_SSE2; 208 } 209 break; 210 default: 211 break; 212 } 213 } 214 return proc; 215 } 216 217 SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) { 218 if (cachedHasSSE2()) { 219 if (isOpaque) { 220 return SkBlitLCD16OpaqueRow_SSE2; 221 } else { 222 return SkBlitLCD16Row_SSE2; 223 } 224 } else { 225 return NULL; 226 } 227 228 } 229 SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig, 230 SkMask::Format maskFormat, 231 RowFlags flags) { 232 return NULL; 233 } 234 235 SkMemset16Proc SkMemset16GetPlatformProc() { 236 if (cachedHasSSE2()) { 237 return sk_memset16_SSE2; 238 } else { 239 return NULL; 240 } 241 } 242 243 SkMemset32Proc SkMemset32GetPlatformProc() { 244 if (cachedHasSSE2()) { 245 return sk_memset32_SSE2; 246 } else { 247 return NULL; 248 } 249 } 250 251 SkBlitRow::ColorRectProc PlatformColorRectProcFactory(); // suppress warning 252 253 SkBlitRow::ColorRectProc PlatformColorRectProcFactory() { 254 if (cachedHasSSE2()) { 255 return ColorRect32_SSE2; 256 } else { 257 return NULL; 258 } 259 } 260