1 /* Copyright (c) 2007-2008 CSIRO 2 Copyright (c) 2007-2008 Xiph.Org Foundation 3 Written by Jean-Marc Valin */ 4 /* 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions 7 are met: 8 9 - Redistributions of source code must retain the above copyright 10 notice, this list of conditions and the following disclaimer. 11 12 - Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 20 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* This is a simple MDCT implementation that uses a N/4 complex FFT 30 to do most of the work. It should be relatively straightforward to 31 plug in pretty much and FFT here. 32 33 This replaces the Vorbis FFT (and uses the exact same API), which 34 was a bit too messy and that was ending up duplicating code 35 (might as well use the same FFT everywhere). 36 37 The algorithm is similar to (and inspired from) Fabrice Bellard's 38 MDCT implementation in FFMPEG, but has differences in signs, ordering 39 and scaling in many places. 40 */ 41 42 #ifndef SKIP_CONFIG_H 43 #ifdef HAVE_CONFIG_H 44 #include "config.h" 45 #endif 46 #endif 47 48 #include "mdct.h" 49 #include "kiss_fft.h" 50 #include "_kiss_fft_guts.h" 51 #include <math.h> 52 #include "os_support.h" 53 #include "mathops.h" 54 #include "stack_alloc.h" 55 56 #ifdef CUSTOM_MODES 57 58 int clt_mdct_init(mdct_lookup *l,int N, int maxshift) 59 { 60 int i; 61 int N4; 62 kiss_twiddle_scalar *trig; 63 #if defined(FIXED_POINT) 64 int N2=N>>1; 65 #endif 66 l->n = N; 67 N4 = N>>2; 68 l->maxshift = maxshift; 69 for (i=0;i<=maxshift;i++) 70 { 71 if (i==0) 72 l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0); 73 else 74 l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]); 75 #ifndef ENABLE_TI_DSPLIB55 76 if (l->kfft[i]==NULL) 77 return 0; 78 #endif 79 } 80 l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N4+1)*sizeof(kiss_twiddle_scalar)); 81 if (l->trig==NULL) 82 return 0; 83 /* We have enough points that sine isn't necessary */ 84 #if defined(FIXED_POINT) 85 for (i=0;i<=N4;i++) 86 trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2),N)); 87 #else 88 for (i=0;i<=N4;i++) 89 trig[i] = (kiss_twiddle_scalar)cos(2*PI*i/N); 90 #endif 91 return 1; 92 } 93 94 void clt_mdct_clear(mdct_lookup *l) 95 { 96 int i; 97 for (i=0;i<=l->maxshift;i++) 98 opus_fft_free(l->kfft[i]); 99 opus_free((kiss_twiddle_scalar*)l->trig); 100 } 101 102 #endif /* CUSTOM_MODES */ 103 104 /* Forward MDCT trashes the input array */ 105 void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, 106 const opus_val16 *window, int overlap, int shift, int stride) 107 { 108 int i; 109 int N, N2, N4; 110 kiss_twiddle_scalar sine; 111 VARDECL(kiss_fft_scalar, f); 112 SAVE_STACK; 113 N = l->n; 114 N >>= shift; 115 N2 = N>>1; 116 N4 = N>>2; 117 ALLOC(f, N2, kiss_fft_scalar); 118 /* sin(x) ~= x here */ 119 #ifdef FIXED_POINT 120 sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; 121 #else 122 sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; 123 #endif 124 125 /* Consider the input to be composed of four blocks: [a, b, c, d] */ 126 /* Window, shuffle, fold */ 127 { 128 /* Temp pointers to make it really clear to the compiler what we're doing */ 129 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); 130 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); 131 kiss_fft_scalar * OPUS_RESTRICT yp = f; 132 const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); 133 const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; 134 for(i=0;i<(overlap>>2);i++) 135 { 136 /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ 137 *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2); 138 *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]); 139 xp1+=2; 140 xp2-=2; 141 wp1+=2; 142 wp2-=2; 143 } 144 wp1 = window; 145 wp2 = window+overlap-1; 146 for(;i<N4-(overlap>>2);i++) 147 { 148 /* Real part arranged as a-bR, Imag part arranged as -c-dR */ 149 *yp++ = *xp2; 150 *yp++ = *xp1; 151 xp1+=2; 152 xp2-=2; 153 } 154 for(;i<N4;i++) 155 { 156 /* Real part arranged as a-bR, Imag part arranged as -c-dR */ 157 *yp++ = -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2); 158 *yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]); 159 xp1+=2; 160 xp2-=2; 161 wp1+=2; 162 wp2-=2; 163 } 164 } 165 /* Pre-rotation */ 166 { 167 kiss_fft_scalar * OPUS_RESTRICT yp = f; 168 const kiss_twiddle_scalar *t = &l->trig[0]; 169 for(i=0;i<N4;i++) 170 { 171 kiss_fft_scalar re, im, yr, yi; 172 re = yp[0]; 173 im = yp[1]; 174 yr = -S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]); 175 yi = -S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]); 176 /* works because the cos is nearly one */ 177 *yp++ = yr + S_MUL(yi,sine); 178 *yp++ = yi - S_MUL(yr,sine); 179 } 180 } 181 182 /* N/4 complex FFT, down-scales by 4/N */ 183 opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)in); 184 185 /* Post-rotate */ 186 { 187 /* Temp pointers to make it really clear to the compiler what we're doing */ 188 const kiss_fft_scalar * OPUS_RESTRICT fp = in; 189 kiss_fft_scalar * OPUS_RESTRICT yp1 = out; 190 kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); 191 const kiss_twiddle_scalar *t = &l->trig[0]; 192 /* Temp pointers to make it really clear to the compiler what we're doing */ 193 for(i=0;i<N4;i++) 194 { 195 kiss_fft_scalar yr, yi; 196 yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]); 197 yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]); 198 /* works because the cos is nearly one */ 199 *yp1 = yr - S_MUL(yi,sine); 200 *yp2 = yi + S_MUL(yr,sine);; 201 fp += 2; 202 yp1 += 2*stride; 203 yp2 -= 2*stride; 204 } 205 } 206 RESTORE_STACK; 207 } 208 209 void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, 210 const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride) 211 { 212 int i; 213 int N, N2, N4; 214 kiss_twiddle_scalar sine; 215 VARDECL(kiss_fft_scalar, f); 216 VARDECL(kiss_fft_scalar, f2); 217 SAVE_STACK; 218 N = l->n; 219 N >>= shift; 220 N2 = N>>1; 221 N4 = N>>2; 222 ALLOC(f, N2, kiss_fft_scalar); 223 ALLOC(f2, N2, kiss_fft_scalar); 224 /* sin(x) ~= x here */ 225 #ifdef FIXED_POINT 226 sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; 227 #else 228 sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; 229 #endif 230 231 /* Pre-rotate */ 232 { 233 /* Temp pointers to make it really clear to the compiler what we're doing */ 234 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; 235 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); 236 kiss_fft_scalar * OPUS_RESTRICT yp = f2; 237 const kiss_twiddle_scalar *t = &l->trig[0]; 238 for(i=0;i<N4;i++) 239 { 240 kiss_fft_scalar yr, yi; 241 yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]); 242 yi = -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]); 243 /* works because the cos is nearly one */ 244 *yp++ = yr - S_MUL(yi,sine); 245 *yp++ = yi + S_MUL(yr,sine); 246 xp1+=2*stride; 247 xp2-=2*stride; 248 } 249 } 250 251 /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */ 252 opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)f); 253 254 /* Post-rotate */ 255 { 256 kiss_fft_scalar * OPUS_RESTRICT fp = f; 257 const kiss_twiddle_scalar *t = &l->trig[0]; 258 259 for(i=0;i<N4;i++) 260 { 261 kiss_fft_scalar re, im, yr, yi; 262 re = fp[0]; 263 im = fp[1]; 264 /* We'd scale up by 2 here, but instead it's done when mixing the windows */ 265 yr = S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]); 266 yi = S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]); 267 /* works because the cos is nearly one */ 268 *fp++ = yr - S_MUL(yi,sine); 269 *fp++ = yi + S_MUL(yr,sine); 270 } 271 } 272 /* De-shuffle the components for the middle of the window only */ 273 { 274 const kiss_fft_scalar * OPUS_RESTRICT fp1 = f; 275 const kiss_fft_scalar * OPUS_RESTRICT fp2 = f+N2-1; 276 kiss_fft_scalar * OPUS_RESTRICT yp = f2; 277 for(i = 0; i < N4; i++) 278 { 279 *yp++ =-*fp1; 280 *yp++ = *fp2; 281 fp1 += 2; 282 fp2 -= 2; 283 } 284 } 285 out -= (N2-overlap)>>1; 286 /* Mirror on both sides for TDAC */ 287 { 288 kiss_fft_scalar * OPUS_RESTRICT fp1 = f2+N4-1; 289 kiss_fft_scalar * OPUS_RESTRICT xp1 = out+N2-1; 290 kiss_fft_scalar * OPUS_RESTRICT yp1 = out+N4-overlap/2; 291 const opus_val16 * OPUS_RESTRICT wp1 = window; 292 const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; 293 for(i = 0; i< N4-overlap/2; i++) 294 { 295 *xp1 = *fp1; 296 xp1--; 297 fp1--; 298 } 299 for(; i < N4; i++) 300 { 301 kiss_fft_scalar x1; 302 x1 = *fp1--; 303 *yp1++ +=-MULT16_32_Q15(*wp1, x1); 304 *xp1-- += MULT16_32_Q15(*wp2, x1); 305 wp1++; 306 wp2--; 307 } 308 } 309 { 310 kiss_fft_scalar * OPUS_RESTRICT fp2 = f2+N4; 311 kiss_fft_scalar * OPUS_RESTRICT xp2 = out+N2; 312 kiss_fft_scalar * OPUS_RESTRICT yp2 = out+N-1-(N4-overlap/2); 313 const opus_val16 * OPUS_RESTRICT wp1 = window; 314 const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; 315 for(i = 0; i< N4-overlap/2; i++) 316 { 317 *xp2 = *fp2; 318 xp2++; 319 fp2++; 320 } 321 for(; i < N4; i++) 322 { 323 kiss_fft_scalar x2; 324 x2 = *fp2++; 325 *yp2-- = MULT16_32_Q15(*wp1, x2); 326 *xp2++ = MULT16_32_Q15(*wp2, x2); 327 wp1++; 328 wp2--; 329 } 330 } 331 RESTORE_STACK; 332 } 333