Home | History | Annotate | Download | only in signal_processing
      1 @
      2 @ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
      3 @
      4 @ Use of this source code is governed by a BSD-style license
      5 @ that can be found in the LICENSE file in the root of the source
      6 @ tree. An additional intellectual property rights grant can be found
      7 @ in the file PATENTS.  All contributing project authors may
      8 @ be found in the AUTHORS file in the root of the source tree.
      9 @
     10 
     11 @ This file contains some minimum and maximum functions, optimized for
     12 @ ARM Neon platform. The description header can be found in
     13 @ signal_processing_library.h
     14 @
     15 @ The reference C code is in file min_max_operations.c. Code here is basically
     16 @ a loop unrolling by 8 with Neon instructions. Bit-exact.
     17 
     18 #include "webrtc/system_wrappers/interface/asm_defines.h"
     19 
     20 GLOBAL_FUNCTION WebRtcSpl_MaxAbsValueW16Neon
     21 GLOBAL_FUNCTION WebRtcSpl_MaxAbsValueW32Neon
     22 GLOBAL_FUNCTION WebRtcSpl_MaxValueW16Neon
     23 GLOBAL_FUNCTION WebRtcSpl_MaxValueW32Neon
     24 GLOBAL_FUNCTION WebRtcSpl_MinValueW16Neon
     25 GLOBAL_FUNCTION WebRtcSpl_MinValueW32Neon
     26 
     27 .align  2
     28 @ int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, int length);
     29 DEFINE_FUNCTION WebRtcSpl_MaxAbsValueW16Neon
     30   mov r2, #-1                 @ Initialize the return value.
     31   cmp r0, #0
     32   beq END_MAX_ABS_VALUE_W16
     33   cmp r1, #0
     34   ble END_MAX_ABS_VALUE_W16
     35 
     36   cmp r1, #8
     37   blt LOOP_MAX_ABS_VALUE_W16
     38 
     39   vmov.i16 q12, #0
     40   sub r1, #8                  @ Counter for loops
     41 
     42 LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16:
     43   vld1.16 {q13}, [r0]!
     44   subs r1, #8
     45   vabs.s16 q13, q13           @ Note vabs doesn't change the value of -32768.
     46   vmax.u16 q12, q13           @ Use u16 so we don't lose the value -32768.
     47   bge LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W16
     48 
     49   @ Find the maximum value in the Neon registers and move it to r2.
     50   vmax.u16 d24, d25
     51   vpmax.u16 d24, d24, d24
     52   vpmax.u16 d24, d24, d24
     53   adds r1, #8
     54   vmov.u16 r2, d24[0]
     55   beq END_MAX_ABS_VALUE_W16
     56 
     57 LOOP_MAX_ABS_VALUE_W16:
     58   ldrsh r3, [r0], #2
     59   eor r12, r3, r3, asr #31    @ eor and then sub, to get absolute value.
     60   sub r12, r12, r3, asr #31
     61   cmp r2, r12
     62   movlt r2, r12
     63   subs r1, #1
     64   bne LOOP_MAX_ABS_VALUE_W16
     65 
     66 END_MAX_ABS_VALUE_W16:
     67   cmp r2, #0x8000             @ Guard against the case for -32768.
     68   subeq r2, #1
     69   mov r0, r2
     70   bx  lr
     71 
     72 
     73 
     74 @ int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, int length);
     75 DEFINE_FUNCTION WebRtcSpl_MaxAbsValueW32Neon
     76   cmp r0, #0
     77   moveq r0, #-1
     78   beq EXIT                    @ Return -1 for a NULL pointer.
     79   cmp r1, #0                  @ length
     80   movle r0, #-1
     81   ble EXIT                    @ Return -1 if length <= 0.
     82 
     83   vmov.i32 q11, #0
     84   vmov.i32 q12, #0
     85   cmp r1, #8
     86   blt LOOP_MAX_ABS_VALUE_W32
     87 
     88   sub r1, #8                  @ Counter for loops
     89 
     90 LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32:
     91   vld1.32 {q13, q14}, [r0]!
     92   subs r1, #8                 @ Counter for loops
     93   vabs.s32 q13, q13           @ vabs doesn't change the value of 0x80000000.
     94   vabs.s32 q14, q14
     95   vmax.u32 q11, q13           @ Use u32 so we don't lose the value 0x80000000.
     96   vmax.u32 q12, q14
     97   bge LOOP_UNROLLED_BY_8_MAX_ABS_VALUE_W32
     98 
     99   @ Find the maximum value in the Neon registers and move it to r2.
    100   vmax.u32 q12, q11
    101   vmax.u32 d24, d25
    102   vpmax.u32 d24, d24, d24
    103   adds r1, #8
    104   vmov.u32 r2, d24[0]
    105   beq END_MAX_ABS_VALUE_W32
    106 
    107 LOOP_MAX_ABS_VALUE_W32:
    108   ldr r3, [r0], #4
    109   eor r12, r3, r3, asr #31    @ eor and then sub, to get absolute value.
    110   sub r12, r12, r3, asr #31
    111   cmp r2, r12
    112   movcc r2, r12
    113   subs r1, #1
    114   bne LOOP_MAX_ABS_VALUE_W32
    115 
    116 END_MAX_ABS_VALUE_W32:
    117   mvn r0, #0x80000000         @ Guard against the case for 0x80000000.
    118   cmp r2, r0
    119   movcc r0, r2
    120 
    121 EXIT:
    122   bx  lr
    123 
    124 @ int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, int length);
    125 DEFINE_FUNCTION WebRtcSpl_MaxValueW16Neon
    126   mov r2, #0x8000             @ Initialize the return value.
    127   cmp r0, #0
    128   beq END_MAX_VALUE_W16
    129   cmp r1, #0
    130   ble END_MAX_VALUE_W16
    131 
    132   vmov.i16 q12, #0x8000
    133   cmp r1, #8
    134   blt LOOP_MAX_VALUE_W16
    135 
    136   sub r1, #8                  @ Counter for loops
    137 
    138 LOOP_UNROLLED_BY_8_MAX_VALUE_W16:
    139   vld1.16 {q13}, [r0]!
    140   subs r1, #8
    141   vmax.s16 q12, q13
    142   bge LOOP_UNROLLED_BY_8_MAX_VALUE_W16
    143 
    144   @ Find the maximum value in the Neon registers and move it to r2.
    145   vmax.s16 d24, d25
    146   vpmax.s16 d24, d24, d24
    147   vpmax.s16 d24, d24, d24
    148   adds r1, #8
    149   vmov.u16 r2, d24[0]
    150   beq END_MAX_VALUE_W16
    151 
    152 LOOP_MAX_VALUE_W16:
    153   ldrsh r3, [r0], #2
    154   cmp r2, r3
    155   movlt r2, r3
    156   subs r1, #1
    157   bne LOOP_MAX_VALUE_W16
    158 
    159 END_MAX_VALUE_W16:
    160   mov r0, r2
    161   bx  lr
    162 
    163 @ int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, int length);
    164 DEFINE_FUNCTION WebRtcSpl_MaxValueW32Neon
    165   mov r2, #0x80000000         @ Initialize the return value.
    166   cmp r0, #0
    167   beq END_MAX_VALUE_W32
    168   cmp r1, #0
    169   ble END_MAX_VALUE_W32
    170 
    171   vmov.i32 q11, #0x80000000
    172   vmov.i32 q12, #0x80000000
    173   cmp r1, #8
    174   blt LOOP_MAX_VALUE_W32
    175 
    176   sub r1, #8                  @ Counter for loops
    177 
    178 LOOP_UNROLLED_BY_8_MAX_VALUE_W32:
    179   vld1.32 {q13, q14}, [r0]!
    180   subs r1, #8
    181   vmax.s32 q11, q13
    182   vmax.s32 q12, q14
    183   bge LOOP_UNROLLED_BY_8_MAX_VALUE_W32
    184 
    185   @ Find the maximum value in the Neon registers and move it to r2.
    186   vmax.s32 q12, q11
    187   vpmax.s32 d24, d24, d25
    188   vpmax.s32 d24, d24, d24
    189   adds r1, #8
    190   vmov.s32 r2, d24[0]
    191   beq END_MAX_VALUE_W32
    192 
    193 LOOP_MAX_VALUE_W32:
    194   ldr r3, [r0], #4
    195   cmp r2, r3
    196   movlt r2, r3
    197   subs r1, #1
    198   bne LOOP_MAX_VALUE_W32
    199 
    200 END_MAX_VALUE_W32:
    201   mov r0, r2
    202   bx  lr
    203 
    204 @ int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, int length);
    205 DEFINE_FUNCTION WebRtcSpl_MinValueW16Neon
    206   movw r2, #0x7FFF            @ Initialize the return value.
    207   cmp r0, #0
    208   beq END_MIN_VALUE_W16
    209   cmp r1, #0
    210   ble END_MIN_VALUE_W16
    211 
    212   vmov.i16 q12, #0x7FFF
    213   cmp r1, #8
    214   blt LOOP_MIN_VALUE_W16
    215 
    216   sub r1, #8                  @ Counter for loops
    217 
    218 LOOP_UNROLLED_BY_8_MIN_VALUE_W16:
    219   vld1.16 {q13}, [r0]!
    220   subs r1, #8
    221   vmin.s16 q12, q13
    222   bge LOOP_UNROLLED_BY_8_MIN_VALUE_W16
    223 
    224   @ Find the maximum value in the Neon registers and move it to r2.
    225   vmin.s16 d24, d25
    226   vpmin.s16 d24, d24, d24
    227   vpmin.s16 d24, d24, d24
    228   adds r1, #8
    229   vmov.s16 r2, d24[0]
    230   sxth  r2, r2
    231   beq END_MIN_VALUE_W16
    232 
    233 LOOP_MIN_VALUE_W16:
    234   ldrsh r3, [r0], #2
    235   cmp r2, r3
    236   movge r2, r3
    237   subs r1, #1
    238   bne LOOP_MIN_VALUE_W16
    239 
    240 END_MIN_VALUE_W16:
    241   mov r0, r2
    242   bx  lr
    243 
    244 @ int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, int length);
    245 DEFINE_FUNCTION WebRtcSpl_MinValueW32Neon
    246   mov r2, #0x7FFFFFFF         @ Initialize the return value.
    247   cmp r0, #0
    248   beq END_MIN_VALUE_W32
    249   cmp r1, #0
    250   ble END_MIN_VALUE_W32
    251 
    252   vdup.32 q11, r2
    253   vdup.32 q12, r2
    254   cmp r1, #8
    255   blt LOOP_MIN_VALUE_W32
    256 
    257   sub r1, #8                  @ Counter for loops
    258 
    259 LOOP_UNROLLED_BY_8_MIN_VALUE_W32:
    260   vld1.32 {q13, q14}, [r0]!
    261   subs r1, #8
    262   vmin.s32 q11, q13
    263   vmin.s32 q12, q14
    264   bge LOOP_UNROLLED_BY_8_MIN_VALUE_W32
    265 
    266   @ Find the maximum value in the Neon registers and move it to r2.
    267   vmin.s32 q12, q11
    268   vpmin.s32 d24, d24, d25
    269   vpmin.s32 d24, d24, d24
    270   adds r1, #8
    271   vmov.s32 r2, d24[0]
    272   beq END_MIN_VALUE_W32
    273 
    274 LOOP_MIN_VALUE_W32:
    275   ldr r3, [r0], #4
    276   cmp r2, r3
    277   movge r2, r3
    278   subs r1, #1
    279   bne LOOP_MIN_VALUE_W32
    280 
    281 END_MIN_VALUE_W32:
    282   mov r0, r2
    283   bx  lr
    284