Home | History | Annotate | Download | only in ARMV7
      1 @/*
      2 @ ** Copyright 2003-2010, VisualOn, Inc.
      3 @ **
      4 @ ** Licensed under the Apache License, Version 2.0 (the "License");
      5 @ ** you may not use this file except in compliance with the License.
      6 @ ** You may obtain a copy of the License at
      7 @ **
      8 @ **     http://www.apache.org/licenses/LICENSE-2.0
      9 @ **
     10 @ ** Unless required by applicable law or agreed to in writing, software
     11 @ ** distributed under the License is distributed on an "AS IS" BASIS,
     12 @ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13 @ ** See the License for the specific language governing permissions and
     14 @ ** limitations under the License.
     15 @ */
     16 @
     17 @static void Norm_Corr (Word16 exc[],                    /* (i)     : excitation buffer          */
     18 @                       Word16 xn[],                     /* (i)     : target vector              */
     19 @                       Word16 h[],                      /* (i) Q15 : impulse response of synth/wgt filters */
     20 @                       Word16 L_subfr,                  /* (i)     : sub-frame length */
     21 @                       Word16 t_min,                    /* (i)     : minimum value of pitch lag.   */
     22 @                       Word16 t_max,                    /* (i)     : maximum value of pitch lag.   */
     23 @                       Word16 corr_norm[])              /* (o) Q15 : normalized correlation    */
     24 @
     25 
     26 @ r0 --- exc[]
     27 @ r1 --- xn[]
     28 @ r2 --- h[]
     29 @ r3 --- L_subfr
     30 @ r4 --- t_min
     31 @ r5 --- t_max
     32 @ r6 --- corr_norm[]
     33 
     34 
     35 	.section  .text
     36         .global    Norm_corr_asm
     37         .extern    Convolve_asm
     38         .extern    Isqrt_n
     39 @******************************
     40 @ constant
     41 @******************************
     42 .equ    EXC               , 0
     43 .equ    XN                , 4
     44 .equ    H                 , 8
     45 .equ    L_SUBFR           , 12
     46 .equ    voSTACK           , 172
     47 .equ    T_MIN             , 212
     48 .equ    T_MAX             , 216
     49 .equ    CORR_NORM         , 220
     50 
     51 Norm_corr_asm:
     52 
     53         STMFD          r13!, {r4 - r12, r14}
     54         SUB            r13, r13, #voSTACK
     55 
     56         ADD            r8, r13, #20                 @get the excf[L_SUBFR]
     57         LDR            r4, [r13, #T_MIN]            @get t_min
     58         RSB            r11, r4, #0                  @k = -t_min
     59         ADD            r5, r0, r11, LSL #1          @get the &exc[k]
     60 
     61         @transfer Convolve function
     62         STMFD          sp!, {r0 - r3}
     63         MOV            r0, r5
     64         MOV            r1, r2
     65         MOV            r2, r8                       @r2 --- excf[]
     66         BL             Convolve_asm
     67         LDMFD          sp!, {r0 - r3}
     68 
     69         @ r8 --- excf[]
     70 
     71 	MOV            r14, r1                       @copy xn[] address
     72         MOV            r7, #1
     73 	VLD1.S16       {Q0, Q1}, [r14]!
     74 	VLD1.S16       {Q2, Q3}, [r14]!
     75 	VLD1.S16       {Q4, Q5}, [r14]!
     76 	VLD1.S16       {Q6, Q7}, [r14]!
     77 
     78         VMULL.S16      Q10, D0, D0
     79         VMLAL.S16      Q10, D1, D1
     80         VMLAL.S16      Q10, D2, D2
     81         VMLAL.S16      Q10, D3, D3
     82         VMLAL.S16      Q10, D4, D4
     83         VMLAL.S16      Q10, D5, D5
     84         VMLAL.S16      Q10, D6, D6
     85         VMLAL.S16      Q10, D7, D7
     86         VMLAL.S16      Q10, D8, D8
     87         VMLAL.S16      Q10, D9, D9
     88 	VMLAL.S16      Q10, D10, D10
     89 	VMLAL.S16      Q10, D11, D11
     90 	VMLAL.S16      Q10, D12, D12
     91 	VMLAL.S16      Q10, D13, D13
     92 	VMLAL.S16      Q10, D14, D14
     93 	VMLAL.S16      Q10, D15, D15
     94 
     95         VQADD.S32      D20, D20, D21
     96         VMOV.S32       r9,  D20[0]
     97         VMOV.S32       r10, D20[1]
     98         QADD           r6, r9, r10
     99 	QADD           r6, r6, r6
    100         QADD           r9, r6, r7                   @L_tmp = (L_tmp << 1) + 1;
    101 	CLZ            r7, r9
    102 	SUB            r6, r7, #1                   @exp = norm_l(L_tmp)
    103         RSB            r7, r6, #32                  @exp = 32 - exp
    104 	MOV            r6, r7, ASR #1
    105 	RSB            r7, r6, #0                   @scale = -(exp >> 1)
    106 
    107         @loop for every possible period
    108 	@for(t = t_min@ t <= t_max@ t++)
    109 	@r7 --- scale r4 --- t_min r8 --- excf[]
    110 
    111 LOOPFOR:
    112 	ADD            r14, r13, #20                @copy of excf[]
    113 	MOV            r12, r1                      @copy of xn[]
    114 	MOV            r8, #0x8000
    115 
    116         VLD1.S16       {Q0, Q1}, [r14]!                 @ load 16 excf[]
    117         VLD1.S16       {Q2, Q3}, [r14]!                 @ load 16 excf[]
    118         VLD1.S16       {Q4, Q5}, [r12]!                 @ load 16 x[]
    119 	VLD1.S16       {Q6, Q7}, [r12]!                 @ load 16 x[]
    120         VMULL.S16    Q10, D0, D0                      @L_tmp1 += excf[] * excf[]
    121         VMULL.S16    Q11, D0, D8                      @L_tmp  += x[] * excf[]
    122         VMLAL.S16    Q10, D1, D1
    123         VMLAL.S16    Q11, D1, D9
    124         VMLAL.S16    Q10, D2, D2
    125         VMLAL.S16    Q11, D2, D10
    126         VMLAL.S16    Q10, D3, D3
    127         VMLAL.S16    Q11, D3, D11
    128         VMLAL.S16    Q10, D4, D4
    129         VMLAL.S16    Q11, D4, D12
    130         VMLAL.S16    Q10, D5, D5
    131         VMLAL.S16    Q11, D5, D13
    132         VMLAL.S16    Q10, D6, D6
    133         VMLAL.S16    Q11, D6, D14
    134         VMLAL.S16    Q10, D7, D7
    135         VMLAL.S16    Q11, D7, D15
    136 
    137 	VLD1.S16       {Q0, Q1}, [r14]!                 @ load 16 excf[]
    138         VLD1.S16       {Q2, Q3}, [r14]!                 @ load 16 excf[]
    139         VLD1.S16       {Q4, Q5}, [r12]!                 @ load 16 x[]
    140         VLD1.S16       {Q6, Q7}, [r12]!                 @ load 16 x[]
    141         VMLAL.S16    Q10, D0, D0
    142         VMLAL.S16    Q11, D0, D8
    143         VMLAL.S16    Q10, D1, D1
    144         VMLAL.S16    Q11, D1, D9
    145         VMLAL.S16    Q10, D2, D2
    146         VMLAL.S16    Q11, D2, D10
    147         VMLAL.S16    Q10, D3, D3
    148         VMLAL.S16    Q11, D3, D11
    149         VMLAL.S16    Q10, D4, D4
    150         VMLAL.S16    Q11, D4, D12
    151         VMLAL.S16    Q10, D5, D5
    152         VMLAL.S16    Q11, D5, D13
    153         VMLAL.S16    Q10, D6, D6
    154         VMLAL.S16    Q11, D6, D14
    155         VMLAL.S16    Q10, D7, D7
    156         VMLAL.S16    Q11, D7, D15
    157 
    158         VQADD.S32      D20, D20, D21
    159         VQADD.S32      D22, D22, D23
    160 
    161 	VPADD.S32      D20, D20, D20                   @D20[0] --- L_tmp1 << 1
    162 	VPADD.S32      D22, D22, D22                   @D22[0] --- L_tmp << 1
    163 
    164 	VMOV.S32       r6, D20[0]
    165         VMOV.S32       r5, D22[0]
    166 
    167 	@r5 --- L_tmp, r6 --- L_tmp1
    168 	MOV            r10, #1
    169 	ADD            r5, r10, r5, LSL #1                     @L_tmp = (L_tmp << 1) + 1
    170 	ADD            r6, r10, r6, LSL #1                     @L_tmp1 = (L_tmp1 << 1) + 1
    171 
    172 	CLZ            r10, r5
    173 	CMP            r5, #0
    174 	RSBLT          r11, r5, #0
    175 	CLZLT          r10, r11
    176 	SUB            r10, r10, #1                 @exp = norm_l(L_tmp)
    177 
    178 	MOV            r5, r5, LSL r10              @L_tmp = (L_tmp << exp)
    179 	RSB            r10, r10, #30                @exp_corr = 30 - exp
    180 	MOV            r11, r5, ASR #16             @corr = extract_h(L_tmp)
    181 
    182 	CLZ            r5, r6
    183 	SUB            r5, r5, #1
    184 	MOV            r6, r6, LSL r5               @L_tmp = (L_tmp1 << exp)
    185 	RSB            r5, r5, #30                  @exp_norm = 30 - exp
    186 
    187 	@r10 --- exp_corr, r11 --- corr
    188 	@r6  --- L_tmp, r5 --- exp_norm
    189 
    190 	@Isqrt_n(&L_tmp, &exp_norm)
    191 
    192 	MOV            r14, r0
    193 	MOV            r12, r1
    194 
    195         STMFD          sp!, {r0 - r4, r7 - r12, r14}
    196 	ADD            r1, sp, #4
    197 	ADD            r0, sp, #0
    198 	STR            r6, [sp]
    199 	STRH           r5, [sp, #4]
    200 	BL             Isqrt_n
    201 	LDR            r6, [sp]
    202 	LDRSH          r5, [sp, #4]
    203         LDMFD          sp!, {r0 - r4, r7 - r12, r14}
    204 	MOV            r0, r14
    205 	MOV            r1, r12
    206 
    207 
    208 	MOV            r6, r6, ASR #16              @norm = extract_h(L_tmp)
    209 	MUL            r12, r6, r11
    210 	ADD            r12, r12, r12                @L_tmp = vo_L_mult(corr, norm)
    211 
    212 	ADD            r6, r10, r5
    213 	ADD            r6, r6, r7                   @exp_corr + exp_norm + scale
    214 
    215         CMP            r6, #0
    216         RSBLT          r6, r6, #0
    217 	MOVLT          r12, r12, ASR r6
    218         MOVGT          r12, r12, LSL r6             @L_tmp = L_shl(L_tmp, exp_corr + exp_norm + scale)
    219 
    220         ADD            r12, r12, r8
    221         MOV            r12, r12, ASR #16            @vo_round(L_tmp)
    222 
    223         LDR            r5, [r13, #CORR_NORM]        @ get corr_norm address
    224 	LDR            r6, [r13, #T_MAX]            @ get t_max
    225 	ADD            r10, r5, r4, LSL #1          @ get corr_norm[t] address
    226 	STRH           r12, [r10]                   @ corr_norm[t] = vo_round(L_tmp)
    227 
    228 	CMP            r4, r6
    229 	BEQ            Norm_corr_asm_end
    230 
    231 	ADD            r4, r4, #1                   @ t_min ++
    232 	RSB            r5, r4, #0                   @ k
    233 
    234 	MOV            r6, #63                      @ i = 63
    235 	MOV            r8, r0                       @ exc[]
    236 	MOV            r9, r2                       @ h[]
    237 	ADD            r10, r13, #20                @ excf[]
    238 
    239 	ADD            r8, r8, r5, LSL #1           @ exc[k] address
    240 	ADD            r9, r9, r6, LSL #1           @ h[i] address
    241 	ADD            r10, r10, r6, LSL #1         @ excf[i] address
    242 	LDRSH          r11, [r8]                    @ tmp = exc[k]
    243 
    244 LOOPK:
    245         LDRSH          r8, [r9], #-2                @ load h[i]
    246 	LDRSH          r12, [r10, #-2]              @ load excf[i - 1]
    247 	MUL            r14, r11, r8
    248 	MOV            r8, r14, ASR #15
    249 	ADD            r14, r8, r12
    250 	STRH           r14, [r10], #-2
    251 	SUBS           r6, r6, #1
    252 	BGT            LOOPK
    253 
    254 	LDRSH          r8, [r9]                     @ load h[0]
    255 	MUL            r14, r11, r8
    256         LDR            r6, [r13, #T_MAX]            @ get t_max
    257 	MOV            r8, r14, ASR #15
    258 	STRH           r8, [r10]
    259 
    260 	CMP            r4, r6
    261 	BLE            LOOPFOR
    262 
    263 Norm_corr_asm_end:
    264 
    265         ADD            r13, r13, #voSTACK
    266         LDMFD          r13!, {r4 - r12, r15}
    267 
    268         .END
    269 
    270 
    271