1 /* 2 * Copyright 2009 The Android Open Source Project 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #include "SkBitmapProcState_opts_SSE2.h" 9 #include "SkBitmapProcState_opts_SSSE3.h" 10 #include "SkBitmapFilter_opts_SSE2.h" 11 #include "SkBlitMask.h" 12 #include "SkBlitRow.h" 13 #include "SkBlitRect_opts_SSE2.h" 14 #include "SkBlitRow_opts_SSE2.h" 15 #include "SkBlurImage_opts_SSE2.h" 16 #include "SkUtils_opts_SSE2.h" 17 #include "SkUtils.h" 18 #include "SkMorphology_opts.h" 19 #include "SkMorphology_opts_SSE2.h" 20 21 #include "SkRTConf.h" 22 23 #if defined(_MSC_VER) && defined(_WIN64) 24 #include <intrin.h> 25 #endif 26 27 /* This file must *not* be compiled with -msse or -msse2, otherwise 28 gcc may generate sse2 even for scalar ops (and thus give an invalid 29 instruction on Pentium3 on the code below). Only files named *_SSE2.cpp 30 in this directory should be compiled with -msse2. */ 31 32 33 #ifdef _MSC_VER 34 static inline void getcpuid(int info_type, int info[4]) { 35 #if defined(_WIN64) 36 __cpuid(info, info_type); 37 #else 38 __asm { 39 mov eax, [info_type] 40 cpuid 41 mov edi, [info] 42 mov [edi], eax 43 mov [edi+4], ebx 44 mov [edi+8], ecx 45 mov [edi+12], edx 46 } 47 #endif 48 } 49 #else 50 #if defined(__x86_64__) 51 static inline void getcpuid(int info_type, int info[4]) { 52 asm volatile ( 53 "cpuid \n\t" 54 : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) 55 : "a"(info_type) 56 ); 57 } 58 #else 59 static inline void getcpuid(int info_type, int info[4]) { 60 // We save and restore ebx, so this code can be compatible with -fPIC 61 asm volatile ( 62 "pushl %%ebx \n\t" 63 "cpuid \n\t" 64 "movl %%ebx, %1 \n\t" 65 "popl %%ebx \n\t" 66 : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3]) 67 : "a"(info_type) 68 ); 69 } 70 #endif 71 #endif 72 73 #if defined(__x86_64__) || defined(_WIN64) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 74 /* All x86_64 machines have SSE2, or we know it's supported at compile time, so don't even bother checking. */ 75 static inline bool hasSSE2() { 76 return true; 77 } 78 #else 79 80 static inline bool hasSSE2() { 81 int cpu_info[4] = { 0 }; 82 getcpuid(1, cpu_info); 83 return (cpu_info[3] & (1<<26)) != 0; 84 } 85 #endif 86 87 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 88 /* If we know SSSE3 is supported at compile time, don't even bother checking. */ 89 static inline bool hasSSSE3() { 90 return true; 91 } 92 #else 93 94 static inline bool hasSSSE3() { 95 int cpu_info[4] = { 0 }; 96 getcpuid(1, cpu_info); 97 return (cpu_info[2] & 0x200) != 0; 98 } 99 #endif 100 101 static bool cachedHasSSE2() { 102 static bool gHasSSE2 = hasSSE2(); 103 return gHasSSE2; 104 } 105 106 static bool cachedHasSSSE3() { 107 static bool gHasSSSE3 = hasSSSE3(); 108 return gHasSSSE3; 109 } 110 111 SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", false, "Use SSE optimized version of high quality image filters"); 112 113 void SkBitmapProcState::platformConvolutionProcs(SkConvolutionProcs* procs) { 114 if (cachedHasSSE2()) { 115 procs->fExtraHorizontalReads = 3; 116 procs->fConvolveVertically = &convolveVertically_SSE2; 117 procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2; 118 procs->fConvolveHorizontally = &convolveHorizontally_SSE2; 119 procs->fApplySIMDPadding = &applySIMDPadding_SSE2; 120 } 121 } 122 123 void SkBitmapProcState::platformProcs() { 124 if (cachedHasSSSE3()) { 125 if (fSampleProc32 == S32_opaque_D32_filter_DX) { 126 fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3; 127 } else if (fSampleProc32 == S32_alpha_D32_filter_DX) { 128 fSampleProc32 = S32_alpha_D32_filter_DX_SSSE3; 129 } 130 131 if (fSampleProc32 == S32_opaque_D32_filter_DXDY) { 132 fSampleProc32 = S32_opaque_D32_filter_DXDY_SSSE3; 133 } else if (fSampleProc32 == S32_alpha_D32_filter_DXDY) { 134 fSampleProc32 = S32_alpha_D32_filter_DXDY_SSSE3; 135 } 136 } else if (cachedHasSSE2()) { 137 if (fSampleProc32 == S32_opaque_D32_filter_DX) { 138 fSampleProc32 = S32_opaque_D32_filter_DX_SSE2; 139 } else if (fSampleProc32 == S32_alpha_D32_filter_DX) { 140 fSampleProc32 = S32_alpha_D32_filter_DX_SSE2; 141 } 142 143 if (fSampleProc16 == S32_D16_filter_DX) { 144 fSampleProc16 = S32_D16_filter_DX_SSE2; 145 } 146 } 147 148 if (cachedHasSSSE3() || cachedHasSSE2()) { 149 if (fMatrixProc == ClampX_ClampY_filter_scale) { 150 fMatrixProc = ClampX_ClampY_filter_scale_SSE2; 151 } else if (fMatrixProc == ClampX_ClampY_nofilter_scale) { 152 fMatrixProc = ClampX_ClampY_nofilter_scale_SSE2; 153 } 154 155 if (fMatrixProc == ClampX_ClampY_filter_affine) { 156 fMatrixProc = ClampX_ClampY_filter_affine_SSE2; 157 } else if (fMatrixProc == ClampX_ClampY_nofilter_affine) { 158 fMatrixProc = ClampX_ClampY_nofilter_affine_SSE2; 159 } 160 if (c_hqfilter_sse) { 161 if (fShaderProc32 == highQualityFilter32) { 162 fShaderProc32 = highQualityFilter_SSE2; 163 } 164 } 165 } 166 } 167 168 static SkBlitRow::Proc32 platform_32_procs[] = { 169 NULL, // S32_Opaque, 170 S32_Blend_BlitRow32_SSE2, // S32_Blend, 171 S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque 172 S32A_Blend_BlitRow32_SSE2, // S32A_Blend, 173 }; 174 175 SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) { 176 return NULL; 177 } 178 179 SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() { 180 if (cachedHasSSE2()) { 181 return Color32_SSE2; 182 } else { 183 return NULL; 184 } 185 } 186 187 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { 188 if (cachedHasSSE2()) { 189 return platform_32_procs[flags]; 190 } else { 191 return NULL; 192 } 193 } 194 195 196 SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig, 197 SkMask::Format maskFormat, 198 SkColor color) { 199 if (SkMask::kA8_Format != maskFormat) { 200 return NULL; 201 } 202 203 ColorProc proc = NULL; 204 if (cachedHasSSE2()) { 205 switch (dstConfig) { 206 case SkBitmap::kARGB_8888_Config: 207 // The SSE2 version is not (yet) faster for black, so we check 208 // for that. 209 if (SK_ColorBLACK != color) { 210 proc = SkARGB32_A8_BlitMask_SSE2; 211 } 212 break; 213 default: 214 break; 215 } 216 } 217 return proc; 218 } 219 220 SkBlitMask::BlitLCD16RowProc SkBlitMask::PlatformBlitRowProcs16(bool isOpaque) { 221 if (cachedHasSSE2()) { 222 if (isOpaque) { 223 return SkBlitLCD16OpaqueRow_SSE2; 224 } else { 225 return SkBlitLCD16Row_SSE2; 226 } 227 } else { 228 return NULL; 229 } 230 231 } 232 SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig, 233 SkMask::Format maskFormat, 234 RowFlags flags) { 235 return NULL; 236 } 237 238 SkMemset16Proc SkMemset16GetPlatformProc() { 239 if (cachedHasSSE2()) { 240 return sk_memset16_SSE2; 241 } else { 242 return NULL; 243 } 244 } 245 246 SkMemset32Proc SkMemset32GetPlatformProc() { 247 if (cachedHasSSE2()) { 248 return sk_memset32_SSE2; 249 } else { 250 return NULL; 251 } 252 } 253 254 SkMorphologyProc SkMorphologyGetPlatformProc(SkMorphologyProcType type) { 255 if (!cachedHasSSE2()) { 256 return NULL; 257 } 258 switch (type) { 259 case kDilateX_SkMorphologyProcType: 260 return SkDilateX_SSE2; 261 case kDilateY_SkMorphologyProcType: 262 return SkDilateY_SSE2; 263 case kErodeX_SkMorphologyProcType: 264 return SkErodeX_SSE2; 265 case kErodeY_SkMorphologyProcType: 266 return SkErodeY_SSE2; 267 default: 268 return NULL; 269 } 270 } 271 272 bool SkBoxBlurGetPlatformProcs(SkBoxBlurProc* boxBlurX, 273 SkBoxBlurProc* boxBlurY, 274 SkBoxBlurProc* boxBlurXY, 275 SkBoxBlurProc* boxBlurYX) { 276 #ifdef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION 277 return false; 278 #else 279 if (!cachedHasSSE2()) { 280 return false; 281 } 282 return SkBoxBlurGetPlatformProcs_SSE2(boxBlurX, boxBlurY, boxBlurXY, boxBlurYX); 283 #endif 284 } 285 286 SkBlitRow::ColorRectProc PlatformColorRectProcFactory(); // suppress warning 287 288 SkBlitRow::ColorRectProc PlatformColorRectProcFactory() { 289 if (cachedHasSSE2()) { 290 return ColorRect32_SSE2; 291 } else { 292 return NULL; 293 } 294 } 295