Home | History | Annotate | Download | only in source
      1 @
      2 @ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
      3 @
      4 @ Use of this source code is governed by a BSD-style license
      5 @ that can be found in the LICENSE file in the root of the source
      6 @ tree. An additional intellectual property rights grant can be found
      7 @ in the file PATENTS.  All contributing project authors may
      8 @ be found in the AUTHORS file in the root of the source tree.
      9 @
     10 
     11 @ Contains the core loop routine for the pitch filter function in iSAC,
     12 @ optimized for ARMv7 platforms.
     13 @
     14 @ Output is bit-exact with the reference C code in pitch_filter.c.
     15 
     16 #include "settings.h"
     17 
     18 .arch armv6
     19 .align  2
     20 .global WebRtcIsacfix_PitchFilterCore
     21 
     22 
     23 @ void WebRtcIsacfix_PitchFilterCore(int loopNumber,
     24 @                                    WebRtc_Word16 gain,
     25 @                                    int index,
     26 @                                    WebRtc_Word16 sign,
     27 @                                    WebRtc_Word16* inputState,
     28 @                                    WebRtc_Word16* outputBuf2,
     29 @                                    const WebRtc_Word16* coefficient,
     30 @                                    WebRtc_Word16* inputBuf,
     31 @                                    WebRtc_Word16* outputBuf,
     32 @                                    int* index2) {
     33 
     34 WebRtcIsacfix_PitchFilterCore:
     35 .fnstart
     36   push {r4-r11}
     37   sub sp, #8
     38 
     39   str r0, [sp]                @ loopNumber
     40   str r3, [sp, #4]            @ sign
     41   ldr r3, [sp, #44]           @ outputBuf2
     42   ldr r6, [sp, #60]           @ index2
     43   ldr r7, [r6]                @ *index2
     44   ldr r8, [sp, #52]           @ inputBuf
     45   ldr r12, [sp, #56]          @ outputBuf
     46 
     47   add r4, r7, r0
     48   str r4, [r6]                @ Store return value to index2.
     49 
     50   mov r10, r7, asl #1
     51   add r12, r10                @ &outputBuf[*index2]
     52   add r8, r10                 @ &inputBuf[*index2]
     53 
     54   add r4, r7, #PITCH_BUFFSIZE @ *index2 + PITCH_BUFFSIZE
     55   add r6, r3, r4, lsl #1      @ &outputBuf2[*index2 + PITCH_BUFFSIZE]
     56   sub r4, r2                  @ r2: index
     57   sub r4, #2                  @ *index2 + PITCH_BUFFSIZE - index - 2
     58   add r3, r4, lsl #1          @ &ubufQQpos2[*index2]
     59   ldr r9, [sp, #48]           @ coefficient
     60 
     61 LOOP:
     62 @ Usage of registers in the loop:
     63 @  r0: loop counter
     64 @  r1: gain
     65 @  r2: tmpW32
     66 @  r3: &ubufQQpos2[]
     67 @  r6: &outputBuf2[]
     68 @  r8: &inputBuf[]
     69 @  r9: &coefficient[]
     70 @  r12: &outputBuf[]
     71 @  r4, r5, r7, r10, r11: scratch
     72 
     73   @ Filter to get fractional pitch.
     74   @ The pitch filter loop here is unrolled with 9 multipications.
     75   pld [r3]
     76   ldr r10, [r3], #4           @ ubufQQpos2[*index2 + 0, *index2 + 1]
     77   ldr r4, [r9], #4            @ coefficient[0, 1]
     78   ldr r11, [r3], #4
     79   ldr r5, [r9], #4
     80   smuad r2, r10, r4
     81   smlad r2, r11, r5, r2
     82 
     83   ldr r10, [r3], #4
     84   ldr r4, [r9], #4
     85   ldr r11, [r3], #4
     86   ldr r5, [r9], #4
     87   smlad r2, r10, r4, r2
     88   ldrh r10, [r3], #-14        @ r3 back to &ubufQQpos2[*index2].
     89   ldrh  r4, [r9], #-16        @ r9 back to &coefficient[0].
     90   smlad r2, r11, r5, r2
     91   smlabb r2, r10, r4, r2
     92 
     93   @ Saturate to avoid overflow in tmpW16.
     94   asr r2, #1
     95   add r4, r2, #0x1000
     96   ssat r7, #16, r4, asr #13
     97 
     98   @ Shift low pass filter state, and excute the low pass filter.
     99   @ The memmove() and the low pass filter loop are unrolled and mixed.
    100   smulbb r5, r1, r7
    101   add r7, r5, #0x800
    102   asr r7, #12                 @ Get the value for inputState[0].
    103   ldr r11, [sp, #40]          @ inputState
    104   pld [r11]
    105   adr r10, kDampFilter
    106   ldrsh r4, [r10], #2         @ kDampFilter[0]
    107   mul r2, r7, r4
    108   ldr r4, [r11]               @ inputState[0, 1], before shift.
    109   strh r7, [r11]              @ inputState[0], after shift.
    110   ldr r5, [r11, #4]           @ inputState[2, 3], before shift.
    111   ldr r7, [r10], #4           @ kDampFilter[1, 2]
    112   ldr r10, [r10]              @ kDampFilter[3, 4]
    113   str r4, [r11, #2]           @ inputState[1, 2], after shift.
    114   str r5, [r11, #6]           @ inputState[3, 4], after shift.
    115   smlad r2, r4, r7, r2
    116   smlad r2, r5, r10, r2
    117 
    118   @ Saturate to avoid overflow.
    119   @ First shift the sample to the range of [0xC0000000, 0x3FFFFFFF],
    120   @ to avoid overflow in the next saturation step.
    121   asr r2, #1
    122   add r10, r2, #0x2000
    123   ssat r10, #16, r10, asr #14
    124 
    125   @ Subtract from input and update buffer.
    126   ldr r11, [sp, #4]           @ sign
    127   ldrsh r4, [r8]
    128   ldrsh r7, [r8], #2          @ inputBuf[*index2]
    129   smulbb r5, r11, r10
    130   subs r0, #1
    131   sub r4, r5
    132   ssat r2, #16, r4
    133   strh  r2, [r12], #2         @ outputBuf[*index2]
    134 
    135   add r2, r7
    136   ssat r2, #16, r2
    137   strh  r2, [r6], #2          @ outputBuff2[*index2 + PITCH_BUFFSIZE]
    138   bgt LOOP
    139 
    140   add sp, #8
    141   pop {r4-r11}
    142   bx  lr
    143 .fnend
    144 
    145 .align  2
    146 kDampFilter:
    147   .short  -2294, 8192, 20972, 8192, -2294
    148