1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #if defined(_MSC_VER) 6 #include <intrin.h> 7 #else 8 #include <mmintrin.h> 9 #endif 10 11 #include "build/build_config.h" 12 #include "media/base/simd/filter_yuv.h" 13 14 namespace media { 15 16 #if defined(COMPILER_MSVC) 17 // Warning 4799 is about calling emms before the function exits. 18 // We calls emms in a frame level so suppress this warning. 19 #pragma warning(push) 20 #pragma warning(disable: 4799) 21 #endif 22 23 void FilterYUVRows_MMX(uint8* dest, 24 const uint8* src0, 25 const uint8* src1, 26 int width, 27 int fraction) { 28 int pixel = 0; 29 30 // Process the unaligned bytes first. 31 int unaligned_width = 32 (8 - (reinterpret_cast<uintptr_t>(dest) & 7)) & 7; 33 while (pixel < width && pixel < unaligned_width) { 34 dest[pixel] = (src0[pixel] * (256 - fraction) + 35 src1[pixel] * fraction) >> 8; 36 ++pixel; 37 } 38 39 __m64 zero = _mm_setzero_si64(); 40 __m64 src1_fraction = _mm_set1_pi16(fraction); 41 __m64 src0_fraction = _mm_set1_pi16(256 - fraction); 42 const __m64* src0_64 = reinterpret_cast<const __m64*>(src0 + pixel); 43 const __m64* src1_64 = reinterpret_cast<const __m64*>(src1 + pixel); 44 __m64* dest64 = reinterpret_cast<__m64*>(dest + pixel); 45 __m64* end64 = reinterpret_cast<__m64*>( 46 reinterpret_cast<uintptr_t>(dest + width) & ~7); 47 48 while (dest64 < end64) { 49 __m64 src0 = *src0_64++; 50 __m64 src1 = *src1_64++; 51 __m64 src2 = _mm_unpackhi_pi8(src0, zero); 52 __m64 src3 = _mm_unpackhi_pi8(src1, zero); 53 src0 = _mm_unpacklo_pi8(src0, zero); 54 src1 = _mm_unpacklo_pi8(src1, zero); 55 src0 = _mm_mullo_pi16(src0, src0_fraction); 56 src1 = _mm_mullo_pi16(src1, src1_fraction); 57 src2 = _mm_mullo_pi16(src2, src0_fraction); 58 src3 = _mm_mullo_pi16(src3, src1_fraction); 59 src0 = _mm_add_pi16(src0, src1); 60 src2 = _mm_add_pi16(src2, src3); 61 src0 = _mm_srli_pi16(src0, 8); 62 src2 = _mm_srli_pi16(src2, 8); 63 src0 = _mm_packs_pu16(src0, src2); 64 *dest64++ = src0; 65 pixel += 8; 66 } 67 68 while (pixel < width) { 69 dest[pixel] = (src0[pixel] * (256 - fraction) + 70 src1[pixel] * fraction) >> 8; 71 ++pixel; 72 } 73 } 74 75 #if defined(COMPILER_MSVC) 76 #pragma warning(pop) 77 #endif 78 79 } // namespace media 80