1 /************************************************************************ 2 * Copyright (C) 2002-2009, Xiph.org Foundation 3 * Copyright (C) 2010, Robin Watts for Pinknoise Productions Ltd 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * * Redistributions in binary form must reproduce the above 13 * copyright notice, this list of conditions and the following disclaimer 14 * in the documentation and/or other materials provided with the 15 * distribution. 16 * * Neither the names of the Xiph.org Foundation nor Pinknoise 17 * Productions Ltd nor the names of its contributors may be used to 18 * endorse or promote products derived from this software without 19 * specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 ************************************************************************ 33 34 function: normalized modified discrete cosine transform 35 power of two length transform only [64 <= n ] 36 last mod: $Id: mdct.c,v 1.9.6.5 2003/04/29 04:03:27 xiphmont Exp $ 37 38 Original algorithm adapted long ago from _The use of multirate filter 39 banks for coding of high quality digital audio_, by T. Sporer, 40 K. Brandenburg and B. Edler, collection of the European Signal 41 Processing Conference (EUSIPCO), Amsterdam, June 1992, Vol.1, pp 42 211-214 43 44 The below code implements an algorithm that no longer looks much like 45 that presented in the paper, but the basic structure remains if you 46 dig deep enough to see it. 47 48 This module DOES NOT INCLUDE code to generate/apply the window 49 function. Everybody has their own weird favorite including me... I 50 happen to like the properties of y=sin(.5PI*sin^2(x)), but others may 51 vehemently disagree. 52 53 ************************************************************************/ 54 55 #include "ivorbiscodec.h" 56 #include "os.h" 57 #include "misc.h" 58 #include "mdct.h" 59 #include "mdct_lookup.h" 60 61 #include <stdio.h> 62 63 #if defined(ONLY_C) 64 STIN void presymmetry(DATA_TYPE *in,int n2,int step){ 65 DATA_TYPE *aX; 66 DATA_TYPE *bX; 67 LOOKUP_T *T; 68 int n4=n2>>1; 69 70 aX = in+n2-3; 71 T = sincos_lookup0; 72 73 do{ 74 REG_TYPE s0= aX[0]; 75 REG_TYPE s2= aX[2]; 76 XPROD31( s0, s2, T[0], T[1], &aX[0], &aX[2] ); T+=step; 77 aX-=4; 78 }while(aX>=in+n4); 79 do{ 80 REG_TYPE s0= aX[0]; 81 REG_TYPE s2= aX[2]; 82 XPROD31( s0, s2, T[1], T[0], &aX[0], &aX[2] ); T-=step; 83 aX-=4; 84 }while(aX>=in); 85 86 aX = in+n2-4; 87 bX = in; 88 T = sincos_lookup0; 89 do{ 90 REG_TYPE ri0= aX[0]; 91 REG_TYPE ri2= aX[2]; 92 REG_TYPE ro0= bX[0]; 93 REG_TYPE ro2= bX[2]; 94 95 XNPROD31( ro2, ro0, T[1], T[0], &aX[0], &aX[2] ); T+=step; 96 XNPROD31( ri2, ri0, T[0], T[1], &bX[0], &bX[2] ); 97 98 aX-=4; 99 bX+=4; 100 }while(aX>=bX); 101 } 102 103 /* 8 point butterfly (in place) */ 104 STIN void mdct_butterfly_8(DATA_TYPE *x){ 105 106 REG_TYPE s0 = x[0] + x[1]; 107 REG_TYPE s1 = x[0] - x[1]; 108 REG_TYPE s2 = x[2] + x[3]; 109 REG_TYPE s3 = x[2] - x[3]; 110 REG_TYPE s4 = x[4] + x[5]; 111 REG_TYPE s5 = x[4] - x[5]; 112 REG_TYPE s6 = x[6] + x[7]; 113 REG_TYPE s7 = x[6] - x[7]; 114 115 x[0] = s5 + s3; 116 x[1] = s7 - s1; 117 x[2] = s5 - s3; 118 x[3] = s7 + s1; 119 x[4] = s4 - s0; 120 x[5] = s6 - s2; 121 x[6] = s4 + s0; 122 x[7] = s6 + s2; 123 MB(); 124 } 125 126 /* 16 point butterfly (in place, 4 register) */ 127 STIN void mdct_butterfly_16(DATA_TYPE *x){ 128 129 REG_TYPE s0, s1, s2, s3; 130 131 s0 = x[ 8] - x[ 9]; x[ 8] += x[ 9]; 132 s1 = x[10] - x[11]; x[10] += x[11]; 133 s2 = x[ 1] - x[ 0]; x[ 9] = x[ 1] + x[0]; 134 s3 = x[ 3] - x[ 2]; x[11] = x[ 3] + x[2]; 135 x[ 0] = MULT31((s0 - s1) , cPI2_8); 136 x[ 1] = MULT31((s2 + s3) , cPI2_8); 137 x[ 2] = MULT31((s0 + s1) , cPI2_8); 138 x[ 3] = MULT31((s3 - s2) , cPI2_8); 139 MB(); 140 141 s2 = x[12] - x[13]; x[12] += x[13]; 142 s3 = x[14] - x[15]; x[14] += x[15]; 143 s0 = x[ 4] - x[ 5]; x[13] = x[ 5] + x[ 4]; 144 s1 = x[ 7] - x[ 6]; x[15] = x[ 7] + x[ 6]; 145 x[ 4] = s2; x[ 5] = s1; 146 x[ 6] = s3; x[ 7] = s0; 147 MB(); 148 149 mdct_butterfly_8(x); 150 mdct_butterfly_8(x+8); 151 } 152 153 /* 32 point butterfly (in place, 4 register) */ 154 STIN void mdct_butterfly_32(DATA_TYPE *x){ 155 156 REG_TYPE s0, s1, s2, s3; 157 158 s0 = x[16] - x[17]; x[16] += x[17]; 159 s1 = x[18] - x[19]; x[18] += x[19]; 160 s2 = x[ 1] - x[ 0]; x[17] = x[ 1] + x[ 0]; 161 s3 = x[ 3] - x[ 2]; x[19] = x[ 3] + x[ 2]; 162 XNPROD31( s0, s1, cPI3_8, cPI1_8, &x[ 0], &x[ 2] ); 163 XPROD31 ( s2, s3, cPI1_8, cPI3_8, &x[ 1], &x[ 3] ); 164 MB(); 165 166 s0 = x[20] - x[21]; x[20] += x[21]; 167 s1 = x[22] - x[23]; x[22] += x[23]; 168 s2 = x[ 5] - x[ 4]; x[21] = x[ 5] + x[ 4]; 169 s3 = x[ 7] - x[ 6]; x[23] = x[ 7] + x[ 6]; 170 x[ 4] = MULT31((s0 - s1) , cPI2_8); 171 x[ 5] = MULT31((s3 + s2) , cPI2_8); 172 x[ 6] = MULT31((s0 + s1) , cPI2_8); 173 x[ 7] = MULT31((s3 - s2) , cPI2_8); 174 MB(); 175 176 s0 = x[24] - x[25]; x[24] += x[25]; 177 s1 = x[26] - x[27]; x[26] += x[27]; 178 s2 = x[ 9] - x[ 8]; x[25] = x[ 9] + x[ 8]; 179 s3 = x[11] - x[10]; x[27] = x[11] + x[10]; 180 XNPROD31( s0, s1, cPI1_8, cPI3_8, &x[ 8], &x[10] ); 181 XPROD31 ( s2, s3, cPI3_8, cPI1_8, &x[ 9], &x[11] ); 182 MB(); 183 184 s0 = x[28] - x[29]; x[28] += x[29]; 185 s1 = x[30] - x[31]; x[30] += x[31]; 186 s2 = x[12] - x[13]; x[29] = x[13] + x[12]; 187 s3 = x[15] - x[14]; x[31] = x[15] + x[14]; 188 x[12] = s0; x[13] = s3; 189 x[14] = s1; x[15] = s2; 190 MB(); 191 192 mdct_butterfly_16(x); 193 mdct_butterfly_16(x+16); 194 } 195 196 /* N/stage point generic N stage butterfly (in place, 2 register) */ 197 STIN void mdct_butterfly_generic(DATA_TYPE *x,int points,int step){ 198 LOOKUP_T *T = sincos_lookup0; 199 DATA_TYPE *x1 = x + points - 4; 200 DATA_TYPE *x2 = x + (points>>1) - 4; 201 REG_TYPE s0, s1, s2, s3; 202 203 do{ 204 s0 = x1[0] - x1[1]; x1[0] += x1[1]; 205 s1 = x1[3] - x1[2]; x1[2] += x1[3]; 206 s2 = x2[1] - x2[0]; x1[1] = x2[1] + x2[0]; 207 s3 = x2[3] - x2[2]; x1[3] = x2[3] + x2[2]; 208 XPROD31( s1, s0, T[0], T[1], &x2[0], &x2[2] ); 209 XPROD31( s2, s3, T[0], T[1], &x2[1], &x2[3] ); T+=step; 210 x1-=4; 211 x2-=4; 212 }while(T<sincos_lookup0+1024); 213 x1 = x + (points>>1) + (points>>2) - 4; 214 x2 = x + (points>>2) - 4; 215 T = sincos_lookup0+1024; 216 do{ 217 s0 = x1[0] - x1[1]; x1[0] += x1[1]; 218 s1 = x1[2] - x1[3]; x1[2] += x1[3]; 219 s2 = x2[0] - x2[1]; x1[1] = x2[1] + x2[0]; 220 s3 = x2[3] - x2[2]; x1[3] = x2[3] + x2[2]; 221 XNPROD31( s0, s1, T[0], T[1], &x2[0], &x2[2] ); 222 XNPROD31( s3, s2, T[0], T[1], &x2[1], &x2[3] ); T-=step; 223 x1-=4; 224 x2-=4; 225 }while(T>sincos_lookup0); 226 } 227 228 STIN void mdct_butterflies(DATA_TYPE *x,int points,int shift){ 229 230 int stages=7-shift; 231 int i,j; 232 233 for(i=0;--stages>=0;i++){ 234 for(j=0;j<(1<<i);j++) 235 { 236 mdct_butterfly_generic(x+(points>>i)*j,points>>i,4<<(i+shift)); 237 } 238 } 239 240 for(j=0;j<points;j+=32) 241 mdct_butterfly_32(x+j); 242 } 243 244 static unsigned char bitrev[16]={0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15}; 245 246 STIN int bitrev12(int x){ 247 return bitrev[x>>8]|(bitrev[(x&0x0f0)>>4]<<4)|(((int)bitrev[x&0x00f])<<8); 248 } 249 250 STIN void mdct_bitreverse(DATA_TYPE *x,int n,int shift){ 251 int bit = 0; 252 DATA_TYPE *w = x+(n>>1); 253 254 do{ 255 DATA_TYPE b = bitrev12(bit++); 256 DATA_TYPE *xx = x + (b>>shift); 257 REG_TYPE r; 258 259 w -= 2; 260 261 if(w>xx){ 262 263 r = xx[0]; 264 xx[0] = w[0]; 265 w[0] = r; 266 267 r = xx[1]; 268 xx[1] = w[1]; 269 w[1] = r; 270 } 271 }while(w>x); 272 } 273 274 STIN void mdct_step7(DATA_TYPE *x,int n,int step){ 275 DATA_TYPE *w0 = x; 276 DATA_TYPE *w1 = x+(n>>1); 277 LOOKUP_T *T = (step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1; 278 LOOKUP_T *Ttop = T+1024; 279 REG_TYPE s0, s1, s2, s3; 280 281 do{ 282 w1 -= 2; 283 284 s0 = w0[0] + w1[0]; 285 s1 = w1[1] - w0[1]; 286 s2 = MULT32(s0, T[1]) + MULT32(s1, T[0]); 287 s3 = MULT32(s1, T[1]) - MULT32(s0, T[0]); 288 T+=step; 289 290 s0 = (w0[1] + w1[1])>>1; 291 s1 = (w0[0] - w1[0])>>1; 292 w0[0] = s0 + s2; 293 w0[1] = s1 + s3; 294 w1[0] = s0 - s2; 295 w1[1] = s3 - s1; 296 297 w0 += 2; 298 }while(T<Ttop); 299 do{ 300 w1 -= 2; 301 302 s0 = w0[0] + w1[0]; 303 s1 = w1[1] - w0[1]; 304 T-=step; 305 s2 = MULT32(s0, T[0]) + MULT32(s1, T[1]); 306 s3 = MULT32(s1, T[0]) - MULT32(s0, T[1]); 307 308 s0 = (w0[1] + w1[1])>>1; 309 s1 = (w0[0] - w1[0])>>1; 310 w0[0] = s0 + s2; 311 w0[1] = s1 + s3; 312 w1[0] = s0 - s2; 313 w1[1] = s3 - s1; 314 315 w0 += 2; 316 }while(w0<w1); 317 } 318 #endif 319 320 STIN void mdct_step8(DATA_TYPE *x, int n, int step){ 321 LOOKUP_T *T; 322 LOOKUP_T *V; 323 DATA_TYPE *iX =x+(n>>1); 324 325 switch(step) { 326 #if defined(ONLY_C) 327 default: 328 T=(step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1; 329 do{ 330 REG_TYPE s0 = x[0]; 331 REG_TYPE s1 = -x[1]; 332 XPROD31( s0, s1, T[0], T[1], x, x+1); T+=step; 333 x +=2; 334 }while(x<iX); 335 break; 336 #endif 337 338 case 1: 339 { 340 /* linear interpolation between table values: offset=0.5, step=1 */ 341 REG_TYPE t0,t1,v0,v1,s0,s1; 342 T = sincos_lookup0; 343 V = sincos_lookup1; 344 t0 = (*T++)>>1; 345 t1 = (*T++)>>1; 346 do{ 347 s0 = x[0]; 348 s1 = -x[1]; 349 t0 += (v0 = (*V++)>>1); 350 t1 += (v1 = (*V++)>>1); 351 XPROD31( s0, s1, t0, t1, x, x+1 ); 352 353 s0 = x[2]; 354 s1 = -x[3]; 355 v0 += (t0 = (*T++)>>1); 356 v1 += (t1 = (*T++)>>1); 357 XPROD31( s0, s1, v0, v1, x+2, x+3 ); 358 359 x += 4; 360 }while(x<iX); 361 break; 362 } 363 364 case 0: 365 { 366 /* linear interpolation between table values: offset=0.25, step=0.5 */ 367 REG_TYPE t0,t1,v0,v1,q0,q1,s0,s1; 368 T = sincos_lookup0; 369 V = sincos_lookup1; 370 t0 = *T++; 371 t1 = *T++; 372 do{ 373 374 375 v0 = *V++; 376 v1 = *V++; 377 t0 += (q0 = (v0-t0)>>2); 378 t1 += (q1 = (v1-t1)>>2); 379 s0 = x[0]; 380 s1 = -x[1]; 381 XPROD31( s0, s1, t0, t1, x, x+1 ); 382 t0 = v0-q0; 383 t1 = v1-q1; 384 s0 = x[2]; 385 s1 = -x[3]; 386 XPROD31( s0, s1, t0, t1, x+2, x+3 ); 387 388 t0 = *T++; 389 t1 = *T++; 390 v0 += (q0 = (t0-v0)>>2); 391 v1 += (q1 = (t1-v1)>>2); 392 s0 = x[4]; 393 s1 = -x[5]; 394 XPROD31( s0, s1, v0, v1, x+4, x+5 ); 395 v0 = t0-q0; 396 v1 = t1-q1; 397 s0 = x[6]; 398 s1 = -x[7]; 399 XPROD31( s0, s1, v0, v1, x+5, x+6 ); 400 401 x+=8; 402 }while(x<iX); 403 break; 404 } 405 } 406 } 407 408 extern int mdct_backwardARM(int n, DATA_TYPE *in); 409 410 /* partial; doesn't perform last-step deinterleave/unrolling. That 411 can be done more efficiently during pcm output */ 412 void mdct_backward(int n, DATA_TYPE *in){ 413 int step; 414 415 #if defined(ONLY_C) 416 int shift; 417 418 for (shift=4;!(n&(1<<shift));shift++); 419 shift=13-shift; 420 step=2<<shift; 421 422 presymmetry(in,n>>1,step); 423 mdct_butterflies(in,n>>1,shift); 424 mdct_bitreverse(in,n,shift); 425 mdct_step7(in,n,step); 426 mdct_step8(in,n,step>>2); 427 #else 428 step = mdct_backwardARM(n, in); 429 if (step <= 1) 430 mdct_step8(in,n,step); 431 #endif 432 } 433 434 #if defined(ONLY_C) 435 void mdct_shift_right(int n, DATA_TYPE *in, DATA_TYPE *right){ 436 int i; 437 n>>=2; 438 in+=1; 439 440 for(i=0;i<n;i++) 441 right[i]=in[i<<1]; 442 } 443 #endif 444 445 extern ogg_int16_t *mdct_unroll_prelap(ogg_int16_t *out, 446 DATA_TYPE *post, 447 DATA_TYPE *l, 448 int step); 449 extern ogg_int16_t *mdct_unroll_part2(ogg_int16_t *out, 450 DATA_TYPE *post, 451 DATA_TYPE *l, 452 DATA_TYPE *r, 453 int step, 454 LOOKUP_T *wL, 455 LOOKUP_T *wR); 456 extern ogg_int16_t *mdct_unroll_part3(ogg_int16_t *out, 457 DATA_TYPE *post, 458 DATA_TYPE *l, 459 DATA_TYPE *r, 460 int step, 461 LOOKUP_T *wL, 462 LOOKUP_T *wR); 463 extern ogg_int16_t *mdct_unroll_postlap(ogg_int16_t *out, 464 DATA_TYPE *post, 465 DATA_TYPE *l, 466 int step); 467 468 void mdct_unroll_lap(int n0,int n1, 469 int lW,int W, 470 DATA_TYPE *in, 471 DATA_TYPE *right, 472 LOOKUP_T *w0, 473 LOOKUP_T *w1, 474 ogg_int16_t *out, 475 int step, 476 int start, /* samples, this frame */ 477 int end /* samples, this frame */){ 478 479 DATA_TYPE *l=in+(W&&lW ? n1>>1 : n0>>1); 480 DATA_TYPE *r=right+(lW ? n1>>2 : n0>>2); 481 DATA_TYPE *post; 482 LOOKUP_T *wR=(W && lW ? w1+(n1>>1) : w0+(n0>>1)); 483 LOOKUP_T *wL=(W && lW ? w1 : w0 ); 484 485 int preLap=(lW && !W ? (n1>>2)-(n0>>2) : 0 ); 486 int halfLap=(lW && W ? (n1>>2) : (n0>>2) ); 487 int postLap=(!lW && W ? (n1>>2)-(n0>>2) : 0 ); 488 int n,off; 489 490 /* preceeding direct-copy lapping from previous frame, if any */ 491 if(preLap){ 492 n = (end<preLap?end:preLap); 493 off = (start<preLap?start:preLap); 494 post = r-n; 495 r -= off; 496 start -= off; 497 end -= n; 498 #if defined(ONLY_C) 499 while(r>post){ 500 *out = CLIP_TO_15((*--r)>>9); 501 out+=step; 502 } 503 #else 504 out = mdct_unroll_prelap(out,post,r,step); 505 n -= off; 506 if (n < 0) 507 n = 0; 508 r -= n; 509 #endif 510 } 511 512 /* cross-lap; two halves due to wrap-around */ 513 n = (end<halfLap?end:halfLap); 514 off = (start<halfLap?start:halfLap); 515 post = r-n; 516 r -= off; 517 l -= off*2; 518 start -= off; 519 wR -= off; 520 wL += off; 521 end -= n; 522 #if defined(ONLY_C) 523 while(r>post){ 524 l-=2; 525 *out = CLIP_TO_15((MULT31(*--r,*--wR) + MULT31(*l,*wL++))>>9); 526 out+=step; 527 } 528 #else 529 out = mdct_unroll_part2(out, post, l, r, step, wL, wR); 530 n -= off; 531 if (n < 0) 532 n = 0; 533 l -= 2*n; 534 r -= n; 535 wR -= n; 536 wL += n; 537 #endif 538 539 n = (end<halfLap?end:halfLap); 540 off = (start<halfLap?start:halfLap); 541 post = r+n; 542 r += off; 543 l += off*2; 544 start -= off; 545 end -= n; 546 wR -= off; 547 wL += off; 548 #if defined(ONLY_C) 549 while(r<post){ 550 *out = CLIP_TO_15((MULT31(*r++,*--wR) - MULT31(*l,*wL++))>>9); 551 out+=step; 552 l+=2; 553 } 554 #else 555 out = mdct_unroll_part3(out, post, l, r, step, wL, wR); 556 n -= off; 557 if (n < 0) 558 n = 0; 559 l += 2*n; 560 r += n; 561 wR -= n; 562 wL += n; 563 #endif 564 565 /* preceeding direct-copy lapping from previous frame, if any */ 566 if(postLap){ 567 n = (end<postLap?end:postLap); 568 off = (start<postLap?start:postLap); 569 post = l+n*2; 570 l += off*2; 571 #if defined(ONLY_C) 572 while(l<post){ 573 *out = CLIP_TO_15((-*l)>>9); 574 out+=step; 575 l+=2; 576 } 577 #else 578 out = mdct_unroll_postlap(out,post,l,step); 579 #endif 580 } 581 } 582 583