Home | History | Annotate | Download | only in signal_processing
      1 /*
      2  *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <arm_neon.h>
     12 #include <assert.h>
     13 #include <stdlib.h>
     14 
     15 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
     16 
     17 // Maximum absolute value of word16 vector. C version for generic platforms.
     18 int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, size_t length) {
     19   int absolute = 0, maximum = 0;
     20 
     21   assert(length > 0);
     22 
     23   const int16_t* p_start = vector;
     24   size_t rest = length & 7;
     25   const int16_t* p_end = vector + length - rest;
     26 
     27   int16x8_t v;
     28   uint16x8_t max_qv;
     29   max_qv = vdupq_n_u16(0);
     30 
     31   while (p_start < p_end) {
     32     v = vld1q_s16(p_start);
     33     // Note vabs doesn't change the value of -32768.
     34     v = vabsq_s16(v);
     35     // Use u16 so we don't lose the value -32768.
     36     max_qv = vmaxq_u16(max_qv, vreinterpretq_u16_s16(v));
     37     p_start += 8;
     38   }
     39 
     40 #ifdef WEBRTC_ARCH_ARM64
     41   maximum = (int)vmaxvq_u16(max_qv);
     42 #else
     43   uint16x4_t max_dv;
     44   max_dv = vmax_u16(vget_low_u16(max_qv), vget_high_u16(max_qv));
     45   max_dv = vpmax_u16(max_dv, max_dv);
     46   max_dv = vpmax_u16(max_dv, max_dv);
     47 
     48   maximum = (int)vget_lane_u16(max_dv, 0);
     49 #endif
     50 
     51   p_end = vector + length;
     52   while (p_start < p_end) {
     53     absolute = abs((int)(*p_start));
     54 
     55     if (absolute > maximum) {
     56       maximum = absolute;
     57     }
     58     p_start++;
     59   }
     60 
     61   // Guard the case for abs(-32768).
     62   if (maximum > WEBRTC_SPL_WORD16_MAX) {
     63     maximum = WEBRTC_SPL_WORD16_MAX;
     64   }
     65 
     66   return (int16_t)maximum;
     67 }
     68 
     69 // Maximum absolute value of word32 vector. NEON intrinsics version for
     70 // ARM 32-bit/64-bit platforms.
     71 int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, size_t length) {
     72   // Use uint32_t for the local variables, to accommodate the return value
     73   // of abs(0x80000000), which is 0x80000000.
     74 
     75   uint32_t absolute = 0, maximum = 0;
     76   size_t i = 0;
     77   size_t residual = length & 0x7;
     78 
     79   assert(length > 0);
     80 
     81   const int32_t* p_start = vector;
     82   uint32x4_t max32x4_0 = vdupq_n_u32(0);
     83   uint32x4_t max32x4_1 = vdupq_n_u32(0);
     84 
     85   // First part, unroll the loop 8 times.
     86   for (i = 0; i < length - residual; i += 8) {
     87     int32x4_t in32x4_0 = vld1q_s32(p_start);
     88     p_start += 4;
     89     int32x4_t in32x4_1 = vld1q_s32(p_start);
     90     p_start += 4;
     91     in32x4_0 = vabsq_s32(in32x4_0);
     92     in32x4_1 = vabsq_s32(in32x4_1);
     93     // vabs doesn't change the value of 0x80000000.
     94     // Use u32 so we don't lose the value 0x80000000.
     95     max32x4_0 = vmaxq_u32(max32x4_0, vreinterpretq_u32_s32(in32x4_0));
     96     max32x4_1 = vmaxq_u32(max32x4_1, vreinterpretq_u32_s32(in32x4_1));
     97   }
     98 
     99   uint32x4_t max32x4 = vmaxq_u32(max32x4_0, max32x4_1);
    100 #if defined(WEBRTC_ARCH_ARM64)
    101   maximum = vmaxvq_u32(max32x4);
    102 #else
    103   uint32x2_t max32x2 = vmax_u32(vget_low_u32(max32x4), vget_high_u32(max32x4));
    104   max32x2 = vpmax_u32(max32x2, max32x2);
    105 
    106   maximum = vget_lane_u32(max32x2, 0);
    107 #endif
    108 
    109   // Second part, do the remaining iterations (if any).
    110   for (i = residual; i > 0; i--) {
    111     absolute = abs((int)(*p_start));
    112     if (absolute > maximum) {
    113       maximum = absolute;
    114     }
    115     p_start++;
    116   }
    117 
    118   // Guard against the case for 0x80000000.
    119   maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX);
    120 
    121   return (int32_t)maximum;
    122 }
    123 
    124 // Maximum value of word16 vector. NEON intrinsics version for
    125 // ARM 32-bit/64-bit platforms.
    126 int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, size_t length) {
    127   int16_t maximum = WEBRTC_SPL_WORD16_MIN;
    128   size_t i = 0;
    129   size_t residual = length & 0x7;
    130 
    131   assert(length > 0);
    132 
    133   const int16_t* p_start = vector;
    134   int16x8_t max16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MIN);
    135 
    136   // First part, unroll the loop 8 times.
    137   for (i = 0; i < length - residual; i += 8) {
    138     int16x8_t in16x8 = vld1q_s16(p_start);
    139     max16x8 = vmaxq_s16(max16x8, in16x8);
    140     p_start += 8;
    141   }
    142 
    143 #if defined(WEBRTC_ARCH_ARM64)
    144   maximum = vmaxvq_s16(max16x8);
    145 #else
    146   int16x4_t max16x4 = vmax_s16(vget_low_s16(max16x8), vget_high_s16(max16x8));
    147   max16x4 = vpmax_s16(max16x4, max16x4);
    148   max16x4 = vpmax_s16(max16x4, max16x4);
    149 
    150   maximum = vget_lane_s16(max16x4, 0);
    151 #endif
    152 
    153   // Second part, do the remaining iterations (if any).
    154   for (i = residual; i > 0; i--) {
    155     if (*p_start > maximum)
    156       maximum = *p_start;
    157     p_start++;
    158   }
    159   return maximum;
    160 }
    161 
    162 // Maximum value of word32 vector. NEON intrinsics version for
    163 // ARM 32-bit/64-bit platforms.
    164 int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, size_t length) {
    165   int32_t maximum = WEBRTC_SPL_WORD32_MIN;
    166   size_t i = 0;
    167   size_t residual = length & 0x7;
    168 
    169   assert(length > 0);
    170 
    171   const int32_t* p_start = vector;
    172   int32x4_t max32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN);
    173   int32x4_t max32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MIN);
    174 
    175   // First part, unroll the loop 8 times.
    176   for (i = 0; i < length - residual; i += 8) {
    177     int32x4_t in32x4_0 = vld1q_s32(p_start);
    178     p_start += 4;
    179     int32x4_t in32x4_1 = vld1q_s32(p_start);
    180     p_start += 4;
    181     max32x4_0 = vmaxq_s32(max32x4_0, in32x4_0);
    182     max32x4_1 = vmaxq_s32(max32x4_1, in32x4_1);
    183   }
    184 
    185   int32x4_t max32x4 = vmaxq_s32(max32x4_0, max32x4_1);
    186 #if defined(WEBRTC_ARCH_ARM64)
    187   maximum = vmaxvq_s32(max32x4);
    188 #else
    189   int32x2_t max32x2 = vmax_s32(vget_low_s32(max32x4), vget_high_s32(max32x4));
    190   max32x2 = vpmax_s32(max32x2, max32x2);
    191 
    192   maximum = vget_lane_s32(max32x2, 0);
    193 #endif
    194 
    195   // Second part, do the remaining iterations (if any).
    196   for (i = residual; i > 0; i--) {
    197     if (*p_start > maximum)
    198       maximum = *p_start;
    199     p_start++;
    200   }
    201   return maximum;
    202 }
    203 
    204 // Minimum value of word16 vector. NEON intrinsics version for
    205 // ARM 32-bit/64-bit platforms.
    206 int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, size_t length) {
    207   int16_t minimum = WEBRTC_SPL_WORD16_MAX;
    208   size_t i = 0;
    209   size_t residual = length & 0x7;
    210 
    211   assert(length > 0);
    212 
    213   const int16_t* p_start = vector;
    214   int16x8_t min16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MAX);
    215 
    216   // First part, unroll the loop 8 times.
    217   for (i = 0; i < length - residual; i += 8) {
    218     int16x8_t in16x8 = vld1q_s16(p_start);
    219     min16x8 = vminq_s16(min16x8, in16x8);
    220     p_start += 8;
    221   }
    222 
    223 #if defined(WEBRTC_ARCH_ARM64)
    224   minimum = vminvq_s16(min16x8);
    225 #else
    226   int16x4_t min16x4 = vmin_s16(vget_low_s16(min16x8), vget_high_s16(min16x8));
    227   min16x4 = vpmin_s16(min16x4, min16x4);
    228   min16x4 = vpmin_s16(min16x4, min16x4);
    229 
    230   minimum = vget_lane_s16(min16x4, 0);
    231 #endif
    232 
    233   // Second part, do the remaining iterations (if any).
    234   for (i = residual; i > 0; i--) {
    235     if (*p_start < minimum)
    236       minimum = *p_start;
    237     p_start++;
    238   }
    239   return minimum;
    240 }
    241 
    242 // Minimum value of word32 vector. NEON intrinsics version for
    243 // ARM 32-bit/64-bit platforms.
    244 int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length) {
    245   int32_t minimum = WEBRTC_SPL_WORD32_MAX;
    246   size_t i = 0;
    247   size_t residual = length & 0x7;
    248 
    249   assert(length > 0);
    250 
    251   const int32_t* p_start = vector;
    252   int32x4_t min32x4_0 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX);
    253   int32x4_t min32x4_1 = vdupq_n_s32(WEBRTC_SPL_WORD32_MAX);
    254 
    255   // First part, unroll the loop 8 times.
    256   for (i = 0; i < length - residual; i += 8) {
    257     int32x4_t in32x4_0 = vld1q_s32(p_start);
    258     p_start += 4;
    259     int32x4_t in32x4_1 = vld1q_s32(p_start);
    260     p_start += 4;
    261     min32x4_0 = vminq_s32(min32x4_0, in32x4_0);
    262     min32x4_1 = vminq_s32(min32x4_1, in32x4_1);
    263   }
    264 
    265   int32x4_t min32x4 = vminq_s32(min32x4_0, min32x4_1);
    266 #if defined(WEBRTC_ARCH_ARM64)
    267   minimum = vminvq_s32(min32x4);
    268 #else
    269   int32x2_t min32x2 = vmin_s32(vget_low_s32(min32x4), vget_high_s32(min32x4));
    270   min32x2 = vpmin_s32(min32x2, min32x2);
    271 
    272   minimum = vget_lane_s32(min32x2, 0);
    273 #endif
    274 
    275   // Second part, do the remaining iterations (if any).
    276   for (i = residual; i > 0; i--) {
    277     if (*p_start < minimum)
    278       minimum = *p_start;
    279     p_start++;
    280   }
    281   return minimum;
    282 }
    283 
    284