Home | History | Annotate | Download | only in simd
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #if defined(_MSC_VER)
      6 #include <intrin.h>
      7 #else
      8 #include <mmintrin.h>
      9 #endif
     10 
     11 #include "build/build_config.h"
     12 #include "media/base/simd/filter_yuv.h"
     13 
     14 namespace media {
     15 
     16 #if defined(COMPILER_MSVC)
     17 // Warning 4799 is about calling emms before the function exits.
     18 // We calls emms in a frame level so suppress this warning.
     19 #pragma warning(push)
     20 #pragma warning(disable: 4799)
     21 #endif
     22 
     23 void FilterYUVRows_MMX(uint8* dest,
     24                        const uint8* src0,
     25                        const uint8* src1,
     26                        int width,
     27                        int fraction) {
     28   int pixel = 0;
     29 
     30   // Process the unaligned bytes first.
     31   int unaligned_width =
     32       (8 - (reinterpret_cast<uintptr_t>(dest) & 7)) & 7;
     33   while (pixel < width && pixel < unaligned_width) {
     34     dest[pixel] = (src0[pixel] * (256 - fraction) +
     35                    src1[pixel] * fraction) >> 8;
     36     ++pixel;
     37   }
     38 
     39   __m64 zero = _mm_setzero_si64();
     40   __m64 src1_fraction = _mm_set1_pi16(fraction);
     41   __m64 src0_fraction = _mm_set1_pi16(256 - fraction);
     42   const __m64* src0_64 = reinterpret_cast<const __m64*>(src0 + pixel);
     43   const __m64* src1_64 = reinterpret_cast<const __m64*>(src1 + pixel);
     44   __m64* dest64 = reinterpret_cast<__m64*>(dest + pixel);
     45   __m64* end64 = reinterpret_cast<__m64*>(
     46       reinterpret_cast<uintptr_t>(dest + width) & ~7);
     47 
     48   while (dest64 < end64) {
     49     __m64 src0 = *src0_64++;
     50     __m64 src1 = *src1_64++;
     51     __m64 src2 = _mm_unpackhi_pi8(src0, zero);
     52     __m64 src3 = _mm_unpackhi_pi8(src1, zero);
     53     src0 = _mm_unpacklo_pi8(src0, zero);
     54     src1 = _mm_unpacklo_pi8(src1, zero);
     55     src0 = _mm_mullo_pi16(src0, src0_fraction);
     56     src1 = _mm_mullo_pi16(src1, src1_fraction);
     57     src2 = _mm_mullo_pi16(src2, src0_fraction);
     58     src3 = _mm_mullo_pi16(src3, src1_fraction);
     59     src0 = _mm_add_pi16(src0, src1);
     60     src2 = _mm_add_pi16(src2, src3);
     61     src0 = _mm_srli_pi16(src0, 8);
     62     src2 = _mm_srli_pi16(src2, 8);
     63     src0 = _mm_packs_pu16(src0, src2);
     64     *dest64++ = src0;
     65     pixel += 8;
     66   }
     67 
     68   while (pixel < width) {
     69     dest[pixel] = (src0[pixel] * (256 - fraction) +
     70                    src1[pixel] * fraction) >> 8;
     71     ++pixel;
     72   }
     73 }
     74 
     75 #if defined(COMPILER_MSVC)
     76 #pragma warning(pop)
     77 #endif
     78 
     79 }  // namespace media
     80