Home | History | Annotate | Download | only in aecm
      1 @
      2 @ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
      3 @
      4 @ Use of this source code is governed by a BSD-style license
      5 @ that can be found in the LICENSE file in the root of the source
      6 @ tree. An additional intellectual property rights grant can be found
      7 @ in the file PATENTS.  All contributing project authors may
      8 @ be found in the AUTHORS file in the root of the source tree.
      9 @
     10 
     11 @ aecm_core_neon.s
     12 @ This file contains some functions in AECM, optimized for ARM Neon
     13 @ platforms. Reference C code is in file aecm_core.c. Bit-exact.
     14 
     15 #include "aecm_core_neon_offsets.h"
     16 #include "webrtc/modules/audio_processing/aecm/aecm_defines.h"
     17 #include "webrtc/system_wrappers/interface/asm_defines.h"
     18 
     19 GLOBAL_LABEL WebRtcAecm_kSqrtHanning
     20 GLOBAL_FUNCTION WebRtcAecm_CalcLinearEnergiesNeon
     21 GLOBAL_FUNCTION WebRtcAecm_StoreAdaptiveChannelNeon
     22 GLOBAL_FUNCTION WebRtcAecm_ResetAdaptiveChannelNeon
     23 
     24 @ void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore_t* aecm,
     25 @                                        const uint16_t* far_spectrum,
     26 @                                        int32_t* echo_est,
     27 @                                        uint32_t* far_energy,
     28 @                                        uint32_t* echo_energy_adapt,
     29 @                                        uint32_t* echo_energy_stored);
     30 .align 2
     31 DEFINE_FUNCTION WebRtcAecm_CalcLinearEnergiesNeon
     32   push {r4-r7}
     33 
     34   vmov.i32 q14, #0
     35   vmov.i32 q8,  #0
     36   vmov.i32 q9,  #0
     37 
     38   movw r7, #offset_aecm_channelStored
     39   movw r5, #offset_aecm_channelAdapt16
     40 
     41   mov r4, r2
     42   mov r12, #(PART_LEN / 8)                   @  Loop counter, unrolled by 8.
     43   ldr r6, [r0, r7]
     44   ldr r7, [r0, r5]
     45 
     46 LOOP_CALC_LINEAR_ENERGIES:
     47   vld1.16 {d26, d27}, [r1]!                  @ far_spectrum[i]
     48   vld1.16 {d24, d25}, [r6, :128]!            @ &aecm->channelStored[i]
     49   vld1.16 {d0, d1}, [r7, :128]!              @ &aecm->channelAdapt16[i]
     50   vaddw.u16 q14, q14, d26
     51   vmull.u16 q10, d26, d24
     52   vmull.u16 q11, d27, d25
     53   vaddw.u16 q14, q14, d27
     54   vmull.u16 q1, d26, d0
     55   vst1.32 {q10, q11}, [r4, :256]!            @ &echo_est[i]
     56   vadd.u32 q8, q10
     57   vmull.u16 q2, d27, d1
     58   vadd.u32 q8, q11
     59   vadd.u32 q9, q1
     60   subs r12, #1
     61   vadd.u32 q9, q2
     62   bgt LOOP_CALC_LINEAR_ENERGIES
     63 
     64   vadd.u32 d28, d29
     65   vpadd.u32 d28, d28
     66   vmov.32 r12, d28[0]
     67   vadd.u32 d18, d19
     68   vpadd.u32 d18, d18
     69   vmov.32 r5, d18[0]                         @ echo_energy_adapt_r
     70   vadd.u32 d16, d17
     71   vpadd.u32 d16, d16
     72 
     73   ldrh  r1, [r1]                             @ far_spectrum[i]
     74   add r12, r12, r1
     75   str r12, [r3]                              @ far_energy
     76   vmov.32 r2, d16[0]
     77 
     78   ldrsh r12, [r6]                            @ aecm->channelStored[i]
     79   ldrh  r6, [r7]                             @ aecm->channelAdapt16[i]
     80   mul r0, r12, r1
     81   mla r1, r6, r1, r5
     82   add r2, r2, r0
     83   str r0, [r4]                               @ echo_est[i]
     84   ldr r4, [sp, #20]                          @ &echo_energy_stored
     85   str r2, [r4]
     86   ldr r3, [sp, #16]                          @ &echo_energy_adapt
     87   str r1, [r3]
     88 
     89   pop {r4-r7}
     90   bx  lr
     91 
     92 @ void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore_t* aecm,
     93 @                                          const uint16_t* far_spectrum,
     94 @                                          int32_t* echo_est);
     95 .align 2
     96 DEFINE_FUNCTION WebRtcAecm_StoreAdaptiveChannelNeon
     97   movw r3, #offset_aecm_channelAdapt16
     98   movw r12, #offset_aecm_channelStored
     99   ldr r3, [r0, r3]
    100   ldr r0, [r0, r12]
    101   mov r12, #(PART_LEN / 8)                   @ Loop counter, unrolled by 8.
    102 
    103 LOOP_STORE_ADAPTIVE_CHANNEL:
    104   vld1.16 {d24, d25}, [r3, :128]!            @ &aecm->channelAdapt16[i]
    105   vld1.16 {d26, d27}, [r1]!                  @ &far_spectrum[i]
    106   vst1.16 {d24, d25}, [r0, :128]!            @ &aecm->channelStored[i]
    107   vmull.u16 q10, d26, d24
    108   vmull.u16 q11, d27, d25
    109   vst1.16 {q10, q11}, [r2, :256]!            @ echo_est[i]
    110   subs r12, #1
    111   bgt LOOP_STORE_ADAPTIVE_CHANNEL
    112 
    113   ldrsh  r12, [r3]
    114   strh  r12, [r0]
    115   ldrh  r1, [r1]
    116   mul r3, r1, r12
    117   str r3, [r2]
    118 
    119   bx  lr
    120 
    121 @ void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore_t* aecm);
    122 .align 2
    123 DEFINE_FUNCTION WebRtcAecm_ResetAdaptiveChannelNeon
    124   movw r1, #offset_aecm_channelAdapt16
    125   movw r2, #offset_aecm_channelAdapt32
    126   movw r3, #offset_aecm_channelStored
    127   ldr r1, [r0, r1]                           @ &aecm->channelAdapt16[0]
    128   ldr r2, [r0, r2]                           @ &aecm->channelAdapt32[0]
    129   ldr r0, [r0, r3]                           @ &aecm->channelStored[0]
    130   mov r3, #(PART_LEN / 8)                    @ Loop counter, unrolled by 8.
    131 
    132 LOOP_RESET_ADAPTIVE_CHANNEL:
    133   vld1.16 {d24, d25}, [r0, :128]!
    134   subs r3, #1
    135   vst1.16 {d24, d25}, [r1, :128]!
    136   vshll.s16 q10, d24, #16
    137   vshll.s16 q11, d25, #16
    138   vst1.16 {q10, q11}, [r2, :256]!
    139   bgt LOOP_RESET_ADAPTIVE_CHANNEL
    140 
    141   ldrh  r0, [r0]
    142   strh  r0, [r1]
    143   mov r0, r0, asl #16
    144   str r0, [r2]
    145 
    146   bx  lr
    147 
    148 @ Square root of Hanning window in Q14.
    149 .align 4
    150 WebRtcAecm_kSqrtHanning:
    151 _WebRtcAecm_kSqrtHanning:
    152   .short 0
    153   .short 399, 798, 1196, 1594, 1990, 2386, 2780, 3172
    154   .short 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224
    155   .short 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040
    156   .short 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514
    157   .short 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553
    158   .short 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079
    159   .short 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034
    160   .short 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
    161 
    162 @ Square root of Hanning window in Q14. Compared to WebRtcAecm_kSqrtHanning,
    163 @ the order was reversed and one element (0) was removed.
    164 .align 4
    165 kSqrtHanningReversed:
    166   .short 16384, 16373, 16354, 16325, 16286, 16237, 16179, 16111, 16034, 15947
    167   .short 15851, 15746, 15631, 15506, 15373, 15231, 15079, 14918, 14749, 14571
    168   .short 14384, 14189, 13985, 13773, 13553, 13325, 13089, 12845, 12594, 12335
    169   .short 12068, 11795, 11514, 11227, 10933, 10633, 10326, 10013, 9695, 9370
    170   .short 9040, 8705, 8364, 8019, 7668, 7313, 6954, 6591, 6224, 5853, 5478, 5101
    171   .short 4720, 4337, 3951, 3562, 3172, 2780, 2386, 1990, 1594, 1196, 798, 399
    172