1 @/* 2 @ ** Copyright 2003-2010, VisualOn, Inc. 3 @ ** 4 @ ** Licensed under the Apache License, Version 2.0 (the "License"); 5 @ ** you may not use this file except in compliance with the License. 6 @ ** You may obtain a copy of the License at 7 @ ** 8 @ ** http://www.apache.org/licenses/LICENSE-2.0 9 @ ** 10 @ ** Unless required by applicable law or agreed to in writing, software 11 @ ** distributed under the License is distributed on an "AS IS" BASIS, 12 @ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 @ ** See the License for the specific language governing permissions and 14 @ ** limitations under the License. 15 @ */ 16 @ 17 @static void Norm_Corr (Word16 exc[], /* (i) : excitation buffer */ 18 @ Word16 xn[], /* (i) : target vector */ 19 @ Word16 h[], /* (i) Q15 : impulse response of synth/wgt filters */ 20 @ Word16 L_subfr, /* (i) : sub-frame length */ 21 @ Word16 t_min, /* (i) : minimum value of pitch lag. */ 22 @ Word16 t_max, /* (i) : maximum value of pitch lag. */ 23 @ Word16 corr_norm[]) /* (o) Q15 : normalized correlation */ 24 @ 25 26 @ r0 --- exc[] 27 @ r1 --- xn[] 28 @ r2 --- h[] 29 @ r3 --- L_subfr 30 @ r4 --- t_min 31 @ r5 --- t_max 32 @ r6 --- corr_norm[] 33 34 35 .section .text 36 .global Norm_corr_asm 37 .extern Convolve_asm 38 .extern Isqrt_n 39 @****************************** 40 @ constant 41 @****************************** 42 .equ EXC , 0 43 .equ XN , 4 44 .equ H , 8 45 .equ L_SUBFR , 12 46 .equ voSTACK , 172 47 .equ T_MIN , 212 48 .equ T_MAX , 216 49 .equ CORR_NORM , 220 50 51 Norm_corr_asm: 52 53 STMFD r13!, {r4 - r12, r14} 54 SUB r13, r13, #voSTACK 55 56 ADD r8, r13, #20 @get the excf[L_SUBFR] 57 LDR r4, [r13, #T_MIN] @get t_min 58 RSB r11, r4, #0 @k = -t_min 59 ADD r5, r0, r11, LSL #1 @get the &exc[k] 60 61 @transfer Convolve function 62 STMFD sp!, {r0 - r3} 63 MOV r0, r5 64 MOV r1, r2 65 MOV r2, r8 @r2 --- excf[] 66 BL Convolve_asm 67 LDMFD sp!, {r0 - r3} 68 69 @ r8 --- excf[] 70 71 MOV r14, r1 @copy xn[] address 72 MOV r7, #1 73 VLD1.S16 {Q0, Q1}, [r14]! 74 VLD1.S16 {Q2, Q3}, [r14]! 75 VLD1.S16 {Q4, Q5}, [r14]! 76 VLD1.S16 {Q6, Q7}, [r14]! 77 78 VMULL.S16 Q10, D0, D0 79 VMLAL.S16 Q10, D1, D1 80 VMLAL.S16 Q10, D2, D2 81 VMLAL.S16 Q10, D3, D3 82 VMLAL.S16 Q10, D4, D4 83 VMLAL.S16 Q10, D5, D5 84 VMLAL.S16 Q10, D6, D6 85 VMLAL.S16 Q10, D7, D7 86 VMLAL.S16 Q10, D8, D8 87 VMLAL.S16 Q10, D9, D9 88 VMLAL.S16 Q10, D10, D10 89 VMLAL.S16 Q10, D11, D11 90 VMLAL.S16 Q10, D12, D12 91 VMLAL.S16 Q10, D13, D13 92 VMLAL.S16 Q10, D14, D14 93 VMLAL.S16 Q10, D15, D15 94 95 VQADD.S32 D20, D20, D21 96 VMOV.S32 r9, D20[0] 97 VMOV.S32 r10, D20[1] 98 QADD r6, r9, r10 99 QADD r6, r6, r6 100 QADD r9, r6, r7 @L_tmp = (L_tmp << 1) + 1; 101 CLZ r7, r9 102 SUB r6, r7, #1 @exp = norm_l(L_tmp) 103 RSB r7, r6, #32 @exp = 32 - exp 104 MOV r6, r7, ASR #1 105 RSB r7, r6, #0 @scale = -(exp >> 1) 106 107 @loop for every possible period 108 @for(t = t_min@ t <= t_max@ t++) 109 @r7 --- scale r4 --- t_min r8 --- excf[] 110 111 LOOPFOR: 112 ADD r14, r13, #20 @copy of excf[] 113 MOV r12, r1 @copy of xn[] 114 MOV r8, #0x8000 115 116 VLD1.S16 {Q0, Q1}, [r14]! @ load 16 excf[] 117 VLD1.S16 {Q2, Q3}, [r14]! @ load 16 excf[] 118 VLD1.S16 {Q4, Q5}, [r12]! @ load 16 x[] 119 VLD1.S16 {Q6, Q7}, [r12]! @ load 16 x[] 120 VMULL.S16 Q10, D0, D0 @L_tmp1 += excf[] * excf[] 121 VMULL.S16 Q11, D0, D8 @L_tmp += x[] * excf[] 122 VMLAL.S16 Q10, D1, D1 123 VMLAL.S16 Q11, D1, D9 124 VMLAL.S16 Q10, D2, D2 125 VMLAL.S16 Q11, D2, D10 126 VMLAL.S16 Q10, D3, D3 127 VMLAL.S16 Q11, D3, D11 128 VMLAL.S16 Q10, D4, D4 129 VMLAL.S16 Q11, D4, D12 130 VMLAL.S16 Q10, D5, D5 131 VMLAL.S16 Q11, D5, D13 132 VMLAL.S16 Q10, D6, D6 133 VMLAL.S16 Q11, D6, D14 134 VMLAL.S16 Q10, D7, D7 135 VMLAL.S16 Q11, D7, D15 136 137 VLD1.S16 {Q0, Q1}, [r14]! @ load 16 excf[] 138 VLD1.S16 {Q2, Q3}, [r14]! @ load 16 excf[] 139 VLD1.S16 {Q4, Q5}, [r12]! @ load 16 x[] 140 VLD1.S16 {Q6, Q7}, [r12]! @ load 16 x[] 141 VMLAL.S16 Q10, D0, D0 142 VMLAL.S16 Q11, D0, D8 143 VMLAL.S16 Q10, D1, D1 144 VMLAL.S16 Q11, D1, D9 145 VMLAL.S16 Q10, D2, D2 146 VMLAL.S16 Q11, D2, D10 147 VMLAL.S16 Q10, D3, D3 148 VMLAL.S16 Q11, D3, D11 149 VMLAL.S16 Q10, D4, D4 150 VMLAL.S16 Q11, D4, D12 151 VMLAL.S16 Q10, D5, D5 152 VMLAL.S16 Q11, D5, D13 153 VMLAL.S16 Q10, D6, D6 154 VMLAL.S16 Q11, D6, D14 155 VMLAL.S16 Q10, D7, D7 156 VMLAL.S16 Q11, D7, D15 157 158 VQADD.S32 D20, D20, D21 159 VQADD.S32 D22, D22, D23 160 161 VPADD.S32 D20, D20, D20 @D20[0] --- L_tmp1 << 1 162 VPADD.S32 D22, D22, D22 @D22[0] --- L_tmp << 1 163 164 VMOV.S32 r6, D20[0] 165 VMOV.S32 r5, D22[0] 166 167 @r5 --- L_tmp, r6 --- L_tmp1 168 MOV r10, #1 169 ADD r5, r10, r5, LSL #1 @L_tmp = (L_tmp << 1) + 1 170 ADD r6, r10, r6, LSL #1 @L_tmp1 = (L_tmp1 << 1) + 1 171 172 CLZ r10, r5 173 CMP r5, #0 174 RSBLT r11, r5, #0 175 CLZLT r10, r11 176 SUB r10, r10, #1 @exp = norm_l(L_tmp) 177 178 MOV r5, r5, LSL r10 @L_tmp = (L_tmp << exp) 179 RSB r10, r10, #30 @exp_corr = 30 - exp 180 MOV r11, r5, ASR #16 @corr = extract_h(L_tmp) 181 182 CLZ r5, r6 183 SUB r5, r5, #1 184 MOV r6, r6, LSL r5 @L_tmp = (L_tmp1 << exp) 185 RSB r5, r5, #30 @exp_norm = 30 - exp 186 187 @r10 --- exp_corr, r11 --- corr 188 @r6 --- L_tmp, r5 --- exp_norm 189 190 @Isqrt_n(&L_tmp, &exp_norm) 191 192 MOV r14, r0 193 MOV r12, r1 194 195 STMFD sp!, {r0 - r4, r7 - r12, r14} 196 ADD r1, sp, #4 197 ADD r0, sp, #0 198 STR r6, [sp] 199 STRH r5, [sp, #4] 200 BL Isqrt_n 201 LDR r6, [sp] 202 LDRSH r5, [sp, #4] 203 LDMFD sp!, {r0 - r4, r7 - r12, r14} 204 MOV r0, r14 205 MOV r1, r12 206 207 208 MOV r6, r6, ASR #16 @norm = extract_h(L_tmp) 209 MUL r12, r6, r11 210 ADD r12, r12, r12 @L_tmp = vo_L_mult(corr, norm) 211 212 ADD r6, r10, r5 213 ADD r6, r6, r7 @exp_corr + exp_norm + scale 214 215 CMP r6, #0 216 RSBLT r6, r6, #0 217 MOVLT r12, r12, ASR r6 218 MOVGT r12, r12, LSL r6 @L_tmp = L_shl(L_tmp, exp_corr + exp_norm + scale) 219 220 ADD r12, r12, r8 221 MOV r12, r12, ASR #16 @vo_round(L_tmp) 222 223 LDR r5, [r13, #CORR_NORM] @ get corr_norm address 224 LDR r6, [r13, #T_MAX] @ get t_max 225 ADD r10, r5, r4, LSL #1 @ get corr_norm[t] address 226 STRH r12, [r10] @ corr_norm[t] = vo_round(L_tmp) 227 228 CMP r4, r6 229 BEQ Norm_corr_asm_end 230 231 ADD r4, r4, #1 @ t_min ++ 232 RSB r5, r4, #0 @ k 233 234 MOV r6, #63 @ i = 63 235 MOV r8, r0 @ exc[] 236 MOV r9, r2 @ h[] 237 ADD r10, r13, #20 @ excf[] 238 239 ADD r8, r8, r5, LSL #1 @ exc[k] address 240 ADD r9, r9, r6, LSL #1 @ h[i] address 241 ADD r10, r10, r6, LSL #1 @ excf[i] address 242 LDRSH r11, [r8] @ tmp = exc[k] 243 244 LOOPK: 245 LDRSH r8, [r9], #-2 @ load h[i] 246 LDRSH r12, [r10, #-2] @ load excf[i - 1] 247 MUL r14, r11, r8 248 MOV r8, r14, ASR #15 249 ADD r14, r8, r12 250 STRH r14, [r10], #-2 251 SUBS r6, r6, #1 252 BGT LOOPK 253 254 LDRSH r8, [r9] @ load h[0] 255 MUL r14, r11, r8 256 LDR r6, [r13, #T_MAX] @ get t_max 257 MOV r8, r14, ASR #15 258 STRH r8, [r10] 259 260 CMP r4, r6 261 BLE LOOPFOR 262 263 Norm_corr_asm_end: 264 265 ADD r13, r13, #voSTACK 266 LDMFD r13!, {r4 - r12, r15} 267 268 .end 269 270 271