Home | History | Annotate | Download | only in simd
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "media/base/sinc_resampler.h"
      6 
      7 #include <xmmintrin.h>
      8 
      9 namespace media {
     10 
     11 float SincResampler::Convolve_SSE(const float* input_ptr, const float* k1,
     12                                   const float* k2,
     13                                   double kernel_interpolation_factor) {
     14   __m128 m_input;
     15   __m128 m_sums1 = _mm_setzero_ps();
     16   __m128 m_sums2 = _mm_setzero_ps();
     17 
     18   // Based on |input_ptr| alignment, we need to use loadu or load.  Unrolling
     19   // these loops hurt performance in local testing.
     20   if (reinterpret_cast<uintptr_t>(input_ptr) & 0x0F) {
     21     for (int i = 0; i < kKernelSize; i += 4) {
     22       m_input = _mm_loadu_ps(input_ptr + i);
     23       m_sums1 = _mm_add_ps(m_sums1, _mm_mul_ps(m_input, _mm_load_ps(k1 + i)));
     24       m_sums2 = _mm_add_ps(m_sums2, _mm_mul_ps(m_input, _mm_load_ps(k2 + i)));
     25     }
     26   } else {
     27     for (int i = 0; i < kKernelSize; i += 4) {
     28       m_input = _mm_load_ps(input_ptr + i);
     29       m_sums1 = _mm_add_ps(m_sums1, _mm_mul_ps(m_input, _mm_load_ps(k1 + i)));
     30       m_sums2 = _mm_add_ps(m_sums2, _mm_mul_ps(m_input, _mm_load_ps(k2 + i)));
     31     }
     32   }
     33 
     34   // Linearly interpolate the two "convolutions".
     35   m_sums1 = _mm_mul_ps(m_sums1, _mm_set_ps1(1.0 - kernel_interpolation_factor));
     36   m_sums2 = _mm_mul_ps(m_sums2, _mm_set_ps1(kernel_interpolation_factor));
     37   m_sums1 = _mm_add_ps(m_sums1, m_sums2);
     38 
     39   // Sum components together.
     40   float result;
     41   m_sums2 = _mm_add_ps(_mm_movehl_ps(m_sums1, m_sums1), m_sums1);
     42   _mm_store_ss(&result, _mm_add_ss(m_sums2, _mm_shuffle_ps(
     43       m_sums2, m_sums2, 1)));
     44 
     45   return result;
     46 }
     47 
     48 }  // namespace media
     49