1 // Copyright 2014 Google Inc. All Rights Reserved. 2 // 3 // Use of this source code is governed by a BSD-style license 4 // that can be found in the COPYING file in the root of the source 5 // tree. An additional intellectual property rights grant can be found 6 // in the file PATENTS. All contributing project authors may 7 // be found in the AUTHORS file in the root of the source tree. 8 // ----------------------------------------------------------------------------- 9 // 10 // Utilities for processing transparent channel. 11 // 12 // Author: Skal (pascal.massimino (at) gmail.com) 13 14 #include "./dsp.h" 15 16 #if defined(WEBP_USE_SSE2) 17 #include <emmintrin.h> 18 19 //------------------------------------------------------------------------------ 20 21 static int ExtractAlpha(const uint8_t* argb, int argb_stride, 22 int width, int height, 23 uint8_t* alpha, int alpha_stride) { 24 // alpha_and stores an 'and' operation of all the alpha[] values. The final 25 // value is not 0xff if any of the alpha[] is not equal to 0xff. 26 uint32_t alpha_and = 0xff; 27 int i, j; 28 const __m128i a_mask = _mm_set1_epi32(0xffu); // to preserve alpha 29 const __m128i all_0xff = _mm_set_epi32(0, 0, ~0u, ~0u); 30 __m128i all_alphas = all_0xff; 31 32 // We must be able to access 3 extra bytes after the last written byte 33 // 'src[4 * width - 4]', because we don't know if alpha is the first or the 34 // last byte of the quadruplet. 35 const int limit = (width - 1) & ~7; 36 37 for (j = 0; j < height; ++j) { 38 const __m128i* src = (const __m128i*)argb; 39 for (i = 0; i < limit; i += 8) { 40 // load 32 argb bytes 41 const __m128i a0 = _mm_loadu_si128(src + 0); 42 const __m128i a1 = _mm_loadu_si128(src + 1); 43 const __m128i b0 = _mm_and_si128(a0, a_mask); 44 const __m128i b1 = _mm_and_si128(a1, a_mask); 45 const __m128i c0 = _mm_packs_epi32(b0, b1); 46 const __m128i d0 = _mm_packus_epi16(c0, c0); 47 // store 48 _mm_storel_epi64((__m128i*)&alpha[i], d0); 49 // accumulate eight alpha 'and' in parallel 50 all_alphas = _mm_and_si128(all_alphas, d0); 51 src += 2; 52 } 53 for (; i < width; ++i) { 54 const uint32_t alpha_value = argb[4 * i]; 55 alpha[i] = alpha_value; 56 alpha_and &= alpha_value; 57 } 58 argb += argb_stride; 59 alpha += alpha_stride; 60 } 61 // Combine the eight alpha 'and' into a 8-bit mask. 62 alpha_and &= _mm_movemask_epi8(_mm_cmpeq_epi8(all_alphas, all_0xff)); 63 return (alpha_and == 0xff); 64 } 65 66 #endif // WEBP_USE_SSE2 67 68 //------------------------------------------------------------------------------ 69 // Init function 70 71 extern void WebPInitAlphaProcessingSSE2(void); 72 73 void WebPInitAlphaProcessingSSE2(void) { 74 #if defined(WEBP_USE_SSE2) 75 WebPExtractAlpha = ExtractAlpha; 76 #endif 77 } 78