Home | History | Annotate | Download | only in opts
      1 /*
      2  * Copyright 2014 The Android Open Source Project
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #ifndef SkMath_opts_SSE2_DEFINED
      9 #define SkMath_opts_SSE2_DEFINED
     10 
     11 #include <emmintrin.h>
     12 
     13 // Because no _mm_div_epi32() in SSE2, we use float division to emulate.
     14 // When using this function, make sure a and b don't exceed float's precision.
     15 static inline __m128i shim_mm_div_epi32(const __m128i& a, const __m128i& b) {
     16     __m128 x = _mm_cvtepi32_ps(a);
     17     __m128 y = _mm_cvtepi32_ps(b);
     18     return _mm_cvttps_epi32(_mm_div_ps(x, y));
     19 }
     20 
     21 // Portable version of SkSqrtBits is in SkMath.cpp.
     22 static inline __m128i SkSqrtBits_SSE2(const __m128i& x, int count) {
     23     __m128i root =  _mm_setzero_si128();
     24     __m128i remHi = _mm_setzero_si128();
     25     __m128i remLo = x;
     26     __m128i one128 = _mm_set1_epi32(1);
     27 
     28     do {
     29         root = _mm_slli_epi32(root, 1);
     30 
     31         remHi = _mm_or_si128(_mm_slli_epi32(remHi, 2),
     32                              _mm_srli_epi32(remLo, 30));
     33         remLo = _mm_slli_epi32(remLo, 2);
     34 
     35         __m128i testDiv = _mm_slli_epi32(root, 1);
     36         testDiv = _mm_add_epi32(testDiv, _mm_set1_epi32(1));
     37 
     38         __m128i cmp = _mm_cmplt_epi32(remHi, testDiv);
     39         __m128i remHi1 = _mm_and_si128(cmp, remHi);
     40         __m128i root1 = _mm_and_si128(cmp, root);
     41         __m128i remHi2 = _mm_andnot_si128(cmp, _mm_sub_epi32(remHi, testDiv));
     42         __m128i root2 = _mm_andnot_si128(cmp, _mm_add_epi32(root, one128));
     43 
     44         remHi = _mm_or_si128(remHi1, remHi2);
     45         root = _mm_or_si128(root1, root2);
     46     } while (--count >= 0);
     47 
     48     return root;
     49 }
     50 
     51 #endif // SkMath_opts_SSE2_DEFINED
     52