Home | History | Annotate | Download | only in ARMV7
      1 @/*
      2 @ ** Copyright 2003-2010, VisualOn, Inc.
      3 @ **
      4 @ ** Licensed under the Apache License, Version 2.0 (the "License");
      5 @ ** you may not use this file except in compliance with the License.
      6 @ ** You may obtain a copy of the License at
      7 @ **
      8 @ **     http://www.apache.org/licenses/LICENSE-2.0
      9 @ **
     10 @ ** Unless required by applicable law or agreed to in writing, software
     11 @ ** distributed under the License is distributed on an "AS IS" BASIS,
     12 @ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 @ ** See the License for the specific language governing permissions and
     14 @ ** limitations under the License.
     15 @ */
     16 @
     17 @**********************************************************************/
     18 @void Syn_filt_32(
     19 @     Word16 a[],                           /* (i) Q12 : a[m+1] prediction coefficients */
     20 @     Word16 m,                             /* (i)     : order of LP filter             */
     21 @     Word16 exc[],                         /* (i) Qnew: excitation (exc[i] >> Qnew)    */
     22 @     Word16 Qnew,                          /* (i)     : exc scaling = 0(min) to 8(max) */
     23 @     Word16 sig_hi[],                      /* (o) /16 : synthesis high                 */
     24 @     Word16 sig_lo[],                      /* (o) /16 : synthesis low                  */
     25 @     Word16 lg                             /* (i)     : size of filtering              */
     26 @)
     27 @***********************************************************************
     28 @ a[]      --- r0
     29 @ m        --- r1
     30 @ exc[]    --- r2
     31 @ Qnew     --- r3
     32 @ sig_hi[] --- r4
     33 @ sig_lo[] --- r5
     34 @ lg       --- r6
     35 
     36           .section  .text
     37           .global   Syn_filt_32_asm
     38 
     39 Syn_filt_32_asm:
     40 
     41           STMFD   	r13!, {r4 - r12, r14}
     42           LDR           r4,  [r13, #40]                  @ get sig_hi[] address
     43           LDR           r5,  [r13, #44]                  @ get sig_lo[] address
     44 
     45           LDRSH         r6,  [r0], #2                    @ load Aq[0]
     46           ADD           r7,  r3, #4                      @ 4 + Q_new
     47           MOV           r3, r6, ASR r7                   @ a0 = Aq[0] >> (4 + Q_new)
     48 
     49 	  SUB           r10, r4, #32                     @ sig_hi[-16] address
     50 	  SUB           r11, r5, #32                     @ sig_lo[-16] address
     51 
     52 	  VLD1.S16      {D0, D1, D2, D3}, [r0]!          @a[1] ~ a[16]
     53 
     54           MOV           r8, #0                           @ i = 0
     55 
     56 	  VLD1.S16      {D4, D5, D6, D7}, [r10]!         @ sig_hi[-16] ~ sig_hi[-1]
     57           VREV64.16     D0, D0
     58           VREV64.16     D1, D1
     59 	  VLD1.S16      {D8, D9, D10, D11}, [r11]!       @ sig_lo[-16] ~ sig_lo[-1]
     60           VREV64.16     D2, D2
     61           VREV64.16     D3, D3
     62           VDUP.S32      Q15, r8
     63 
     64 SYN_LOOP:
     65 
     66           LDRSH         r6, [r2], #2                     @exc[i]
     67 	  @L_tmp = L_msu(L_tmp, sig_lo[i - j], a[j])@
     68 	  VMULL.S16     Q10, D8, D3
     69 	  VEXT.8        D8, D8, D9, #2
     70 	  VMLAL.S16     Q10, D9, D2
     71 	  VMLAL.S16     Q10, D10, D1
     72 	  VMLAL.S16     Q10, D11, D0
     73 
     74 	  VEXT.8        D9, D9, D10, #2
     75 	  VEXT.8        D10, D10, D11, #2
     76 
     77 	  VPADD.S32     D28, D20, D21
     78           MUL           r12, r6, r3                      @exc[i] * a0
     79 	  VPADD.S32     D29, D28, D28
     80 	  VDUP.S32      Q10, D29[0]                      @result1
     81 
     82 	  VMULL.S16     Q11, D4, D3
     83 	  VMLAL.S16     Q11, D5, D2
     84           VSUB.S32      Q10, Q15, Q10
     85 	  @L_tmp = L_msu(L_tmp, sig_hi[i - j], a[j])@
     86 
     87 	  VMLAL.S16     Q11, D6, D1
     88 	  VEXT.8        D4, D4, D5, #2
     89 	  VMLAL.S16     Q11, D7, D0
     90 
     91 
     92 	  VEXT.8        D5, D5, D6, #2
     93 	  VEXT.8        D6, D6, D7, #2
     94 
     95 	  VPADD.S32     D28, D22, D23
     96           VPADD.S32     D29, D28, D28
     97           MOV           r14, r12, LSL #1                 @exc[i] * a0 << 1
     98           VDUP.S32      Q11, D29[0]                      @result2
     99 
    100 
    101 
    102 	  VSHR.S32      Q10, Q10, #11                    @result1 >>= 11
    103 	  VSHL.S32      Q11, Q11, #1                     @result2 <<= 1
    104 	  VDUP.S32      Q12, r14
    105 	  VADD.S32      Q12, Q12, Q10                    @L_tmp = L_tmp - (result1 >>= 11) - (result2 <<= 1)
    106 	  VSUB.S32      Q12, Q12, Q11
    107 
    108 	  VSHL.S32      Q12, Q12, #3                     @L_tmp <<= 3
    109 
    110 
    111 	  VSHRN.S32     D20, Q12, #16                    @sig_hi[i] = L_tmp >> 16@
    112 	  VMOV.S16      r10, D20[0]
    113 	  VSHR.S32      Q12, Q12, #4                     @L_tmp >>= 4
    114 	  VEXT.8        D7, D7, D20, #2
    115 	  STRH          r10, [r4], #2                    @store sig_hi[i]
    116           VMOV.S32      r11, D24[0]                      @r11 --- L_tmp >>= 4
    117 	  ADD           r8, r8, #1
    118 	  SUB           r12, r11, r10, LSL #12
    119 	  @MOV           r11, r12, ASR #16                @sig_lo[i]
    120 	  VDUP.S16      D21, r12
    121 	  VEXT.8        D11, D11, D21, #2
    122 	  STRH          r12, [r5], #2                    @stroe sig_lo[i]
    123 
    124           CMP           r8, #64
    125           BLT           SYN_LOOP
    126 
    127 Syn_filt_32_end:
    128 
    129           LDMFD   	    r13!, {r4 - r12, r15}
    130           @ENDFUNC
    131           .end
    132 
    133 
    134