Home | History | Annotate | Download | only in source
      1 @
      2 @ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
      3 @
      4 @ Use of this source code is governed by a BSD-style license
      5 @ that can be found in the LICENSE file in the root of the source
      6 @ tree. An additional intellectual property rights grant can be found
      7 @ in the file PATENTS.  All contributing project authors may
      8 @ be found in the AUTHORS file in the root of the source tree.
      9 @
     10 
     11 @ Contains a function for WebRtcIsacfix_CalculateResidualEnergyNeon() in
     12 @ iSAC codec, optimized for ARM Neon platform. Reference code in
     13 @ lpc_masking_model.c.
     14 
     15 .arch armv7-a
     16 .fpu neon
     17 .global WebRtcIsacfix_CalculateResidualEnergyNeon
     18 .align  2
     19 
     20 @ int32_t WebRtcIsacfix_CalculateResidualEnergyNeon(int lpc_order,
     21 @                                                   int32_t q_val_corr,
     22 @                                                   int q_val_polynomial,
     23 @                                                   int16_t* a_polynomial,
     24 @                                                   int32_t* corr_coeffs,
     25 @                                                   int* q_val_residual_energy);
     26 
     27 WebRtcIsacfix_CalculateResidualEnergyNeon:
     28 .fnstart
     29 .save {r4-r11}
     30   push {r4-r11}
     31 
     32   sub r13, r13, #16
     33   str r1, [r13, #8]
     34   str r2, [r13, #12]
     35 
     36   mov r4, #1
     37   vmov.s64 q11, #0            @ Initialize shift_internal.
     38   vmov.s64 q13, #0            @ Initialize sum64.
     39   vmov.s64 q10, #0
     40   vmov.u8 d20[0], r4          @ Set q10 to 1.
     41 
     42   cmp r0, #0
     43   blt POST_LOOP_I
     44 
     45   add r9, r3, r0, asl #1      @ &a_polynomial[lpc_order]
     46   mov r6, #0                  @ Loop counter i.
     47   ldr r11, [r13, #48]
     48   sub r10, r0, #1
     49   mov r7, r3                  @ &a_polynomial[0]
     50   str r9, [r13, #4]
     51 
     52 LOOP_I:
     53   ldr r2, [r11], #4            @ corr_coeffs[i]
     54   vmov.s64 q15, #0            @ Initialize the sum64_tmp.
     55   vdup.s32 d25, r2
     56 
     57   cmp r0, r6                  @ Compare lpc_order to i.
     58   movle r2, r6
     59   ble POST_LOOP_J
     60 
     61   mov r1, r6                  @ j = i;
     62   mov r12, r7                  @ &a_polynomial[i]
     63   mov r4, r3                  @ &a_polynomial[j - i]
     64 
     65 LOOP_J:
     66   ldr r8, [r12], #4
     67   ldr r5, [r4], #4
     68   vmov.u32 d0[0], r8
     69   vmov.u32 d1[0], r5
     70   vmull.s16 q0, d0, d1
     71   vmull.s32 q0, d0, d25
     72   cmp r6, #0                  @ i == 0?
     73   vshl.s64 q0, q11
     74   beq SUM1
     75   vshl.s64 q0, #1
     76 
     77 SUM1:
     78   vqadd.s64 q14, q0, q15      @ Sum and test overflow.
     79   add r1, r1, #2
     80   bvc MOV1                    @ Skip the shift if there's no overflow.
     81   vshr.s64 q0, #1
     82   vshr.s64 q15, #1
     83   vadd.s64 q14, q0, q15
     84   vsub.s64 q11, q10
     85 
     86 MOV1:
     87   cmp r0, r1                  @ Compare lpc_order to j.
     88   vmov.s64 q15, q14
     89   bgt LOOP_J
     90 
     91   bic r1, r10, #1
     92   add r2, r6, #2
     93   add r2, r1, r2
     94 
     95 POST_LOOP_J:
     96   vqadd.s64 q0, q13, q15      @ Sum and test overflow.
     97   bvc MOV2                    @ Skip the shift if there's no overflow.
     98   vshr.s64 q13, #1
     99   vshr.s64 q15, #1
    100   vadd.s64 q0, q13, q15
    101   vsub.s64 q11, q10
    102 
    103 MOV2:
    104   vmov.s64 q13, q0            @ update sum64.
    105   cmp r2, r0
    106   bne CHECK_LOOP_CONDITION
    107 
    108   @ Last sample in the inner loop.
    109   ldr r4, [r13, #4]
    110   ldrsh r8, [r4]
    111   ldrsh r12, [r9]
    112   mul r8, r8, r12
    113   vmov.s32 d0[0], r8
    114   vmull.s32 q0, d0, d25
    115   cmp r6, #0                  @ i == 0?
    116   vshl.s64 q0, q11
    117   beq SUM2
    118   vshl.s64 q0, #1
    119 
    120 SUM2:
    121   vqadd.s64 d1, d0, d26       @ Sum and test overflow.
    122   bvc MOV3                    @ Skip the shift if there's no overflow.
    123   vshr.s64 q13, #1
    124   vshr.s64 d0, #1
    125   vadd.s64 d1, d0, d26
    126   vsub.s64 q11, q10
    127 
    128 MOV3:
    129   vmov.s64 d26, d1            @ update sum64.
    130 
    131 CHECK_LOOP_CONDITION:
    132   add r6, r6, #1
    133   sub r9, r9, #2
    134   cmp r0, r6                  @ Compare i to lpc_order.
    135   sub r10, r10, #1
    136   add r7, r7, #2
    137   bge LOOP_I
    138 
    139 POST_LOOP_I:
    140   mov r3, #0
    141   vqadd.s64 d0, d26, d27      @ Sum and test overflow.
    142   bvc GET_SHIFT_NORM          @ Skip the shift if there's no overflow.
    143   vshr.s64 q13, #1
    144   vadd.s64 d0, d26, d27
    145   vsub.s64 q11, q10
    146 
    147 GET_SHIFT_NORM:
    148   vcls.s32 d1, d0             @ Count leading extra sign bits.
    149   vmov.32 r2, d1[1]           @ Store # of sign bits of only the 32 MSBs.
    150   vmovl.s32 q1, d1
    151   vshl.s64 d0, d3             @ d3 contains # of sign bits of the 32 MSBs.
    152 
    153   vcls.s32 d1, d0             @ Count again the leading extra sign bits.
    154   vmov.s32 r1, d1[1]          @ Store # of sign bits of only the 32 MSBs.
    155   vmovl.s32 q1, d1
    156   vshl.s64 d0, d3             @ d3 contains # of sign bits of the 32 MSBs.
    157 
    158   vmov.s32 r0, d0[1]          @ residual_energy
    159   vmov.s32 r3, d22[0]         @ shift_internal
    160 
    161   @ Calculate the value for q_val_residual_energy.
    162   ldr r4, [r13, #8]            @ q_val_corr
    163   ldr r5, [r13, #12]           @ q_val_polynomial
    164   sub r12, r4, #32
    165   add r12, r12, r5, asl #1
    166   add r1, r12, r1              @ add 1st part of shift_internal.
    167   add r12, r1, r2              @ add 2nd part of shift_internal.
    168   ldr r2, [r13, #52]
    169   add r3, r12, r3              @ value for q_val_residual_energy.
    170   str r3, [r2, #0]
    171 
    172   add r13, r13, #16
    173   pop {r4-r11}
    174   bx  r14
    175 
    176 .fnend
    177 
    178