1 /* Copyright (c) 2007-2008 CSIRO 2 Copyright (c) 2007-2009 Xiph.Org Foundation 3 Written by Jean-Marc Valin */ 4 /* 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions 7 are met: 8 9 - Redistributions of source code must retain the above copyright 10 notice, this list of conditions and the following disclaimer. 11 12 - Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 20 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #ifdef HAVE_CONFIG_H 30 #include "config.h" 31 #endif 32 33 #include "mathops.h" 34 #include "cwrs.h" 35 #include "vq.h" 36 #include "arch.h" 37 #include "os_support.h" 38 #include "bands.h" 39 #include "rate.h" 40 41 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) 42 { 43 int i; 44 celt_norm *Xptr; 45 Xptr = X; 46 for (i=0;i<len-stride;i++) 47 { 48 celt_norm x1, x2; 49 x1 = Xptr[0]; 50 x2 = Xptr[stride]; 51 Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); 52 *Xptr++ = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); 53 } 54 Xptr = &X[len-2*stride-1]; 55 for (i=len-2*stride-1;i>=0;i--) 56 { 57 celt_norm x1, x2; 58 x1 = Xptr[0]; 59 x2 = Xptr[stride]; 60 Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); 61 *Xptr-- = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); 62 } 63 } 64 65 static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) 66 { 67 static const int SPREAD_FACTOR[3]={15,10,5}; 68 int i; 69 opus_val16 c, s; 70 opus_val16 gain, theta; 71 int stride2=0; 72 int factor; 73 74 if (2*K>=len || spread==SPREAD_NONE) 75 return; 76 factor = SPREAD_FACTOR[spread-1]; 77 78 gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K)); 79 theta = HALF16(MULT16_16_Q15(gain,gain)); 80 81 c = celt_cos_norm(EXTEND32(theta)); 82 s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /* sin(theta) */ 83 84 if (len>=8*stride) 85 { 86 stride2 = 1; 87 /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding. 88 It's basically incrementing long as (stride2+0.5)^2 < len/stride. */ 89 while ((stride2*stride2+stride2)*stride + (stride>>2) < len) 90 stride2++; 91 } 92 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for 93 extract_collapse_mask().*/ 94 len /= stride; 95 for (i=0;i<stride;i++) 96 { 97 if (dir < 0) 98 { 99 if (stride2) 100 exp_rotation1(X+i*len, len, stride2, s, c); 101 exp_rotation1(X+i*len, len, 1, c, s); 102 } else { 103 exp_rotation1(X+i*len, len, 1, c, -s); 104 if (stride2) 105 exp_rotation1(X+i*len, len, stride2, s, -c); 106 } 107 } 108 } 109 110 /** Takes the pitch vector and the decoded residual vector, computes the gain 111 that will give ||p+g*y||=1 and mixes the residual with the pitch. */ 112 static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X, 113 int N, opus_val32 Ryy, opus_val16 gain) 114 { 115 int i; 116 #ifdef FIXED_POINT 117 int k; 118 #endif 119 opus_val32 t; 120 opus_val16 g; 121 122 #ifdef FIXED_POINT 123 k = celt_ilog2(Ryy)>>1; 124 #endif 125 t = VSHR32(Ryy, 2*(k-7)); 126 g = MULT16_16_P15(celt_rsqrt_norm(t),gain); 127 128 i=0; 129 do 130 X[i] = EXTRACT16(PSHR32(MULT16_16(g, iy[i]), k+1)); 131 while (++i < N); 132 } 133 134 static unsigned extract_collapse_mask(int *iy, int N, int B) 135 { 136 unsigned collapse_mask; 137 int N0; 138 int i; 139 if (B<=1) 140 return 1; 141 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for 142 exp_rotation().*/ 143 N0 = N/B; 144 collapse_mask = 0; 145 i=0; do { 146 int j; 147 j=0; do { 148 collapse_mask |= (iy[i*N0+j]!=0)<<i; 149 } while (++j<N0); 150 } while (++i<B); 151 return collapse_mask; 152 } 153 154 unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc 155 #ifdef RESYNTH 156 , opus_val16 gain 157 #endif 158 ) 159 { 160 VARDECL(celt_norm, y); 161 VARDECL(int, iy); 162 VARDECL(opus_val16, signx); 163 int i, j; 164 opus_val16 s; 165 int pulsesLeft; 166 opus_val32 sum; 167 opus_val32 xy; 168 opus_val16 yy; 169 unsigned collapse_mask; 170 SAVE_STACK; 171 172 celt_assert2(K>0, "alg_quant() needs at least one pulse"); 173 celt_assert2(N>1, "alg_quant() needs at least two dimensions"); 174 175 ALLOC(y, N, celt_norm); 176 ALLOC(iy, N, int); 177 ALLOC(signx, N, opus_val16); 178 179 exp_rotation(X, N, 1, B, K, spread); 180 181 /* Get rid of the sign */ 182 sum = 0; 183 j=0; do { 184 if (X[j]>0) 185 signx[j]=1; 186 else { 187 signx[j]=-1; 188 X[j]=-X[j]; 189 } 190 iy[j] = 0; 191 y[j] = 0; 192 } while (++j<N); 193 194 xy = yy = 0; 195 196 pulsesLeft = K; 197 198 /* Do a pre-search by projecting on the pyramid */ 199 if (K > (N>>1)) 200 { 201 opus_val16 rcp; 202 j=0; do { 203 sum += X[j]; 204 } while (++j<N); 205 206 /* If X is too small, just replace it with a pulse at 0 */ 207 #ifdef FIXED_POINT 208 if (sum <= K) 209 #else 210 /* Prevents infinities and NaNs from causing too many pulses 211 to be allocated. 64 is an approximation of infinity here. */ 212 if (!(sum > EPSILON && sum < 64)) 213 #endif 214 { 215 X[0] = QCONST16(1.f,14); 216 j=1; do 217 X[j]=0; 218 while (++j<N); 219 sum = QCONST16(1.f,14); 220 } 221 rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum))); 222 j=0; do { 223 #ifdef FIXED_POINT 224 /* It's really important to round *towards zero* here */ 225 iy[j] = MULT16_16_Q15(X[j],rcp); 226 #else 227 iy[j] = (int)floor(rcp*X[j]); 228 #endif 229 y[j] = (celt_norm)iy[j]; 230 yy = MAC16_16(yy, y[j],y[j]); 231 xy = MAC16_16(xy, X[j],y[j]); 232 y[j] *= 2; 233 pulsesLeft -= iy[j]; 234 } while (++j<N); 235 } 236 celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass"); 237 238 /* This should never happen, but just in case it does (e.g. on silence) 239 we fill the first bin with pulses. */ 240 #ifdef FIXED_POINT_DEBUG 241 celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass"); 242 #endif 243 if (pulsesLeft > N+3) 244 { 245 opus_val16 tmp = (opus_val16)pulsesLeft; 246 yy = MAC16_16(yy, tmp, tmp); 247 yy = MAC16_16(yy, tmp, y[0]); 248 iy[0] += pulsesLeft; 249 pulsesLeft=0; 250 } 251 252 s = 1; 253 for (i=0;i<pulsesLeft;i++) 254 { 255 int best_id; 256 opus_val32 best_num = -VERY_LARGE16; 257 opus_val16 best_den = 0; 258 #ifdef FIXED_POINT 259 int rshift; 260 #endif 261 #ifdef FIXED_POINT 262 rshift = 1+celt_ilog2(K-pulsesLeft+i+1); 263 #endif 264 best_id = 0; 265 /* The squared magnitude term gets added anyway, so we might as well 266 add it outside the loop */ 267 yy = ADD32(yy, 1); 268 j=0; 269 do { 270 opus_val16 Rxy, Ryy; 271 /* Temporary sums of the new pulse(s) */ 272 Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift)); 273 /* We're multiplying y[j] by two so we don't have to do it here */ 274 Ryy = ADD16(yy, y[j]); 275 276 /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that 277 Rxy is positive because the sign is pre-computed) */ 278 Rxy = MULT16_16_Q15(Rxy,Rxy); 279 /* The idea is to check for num/den >= best_num/best_den, but that way 280 we can do it without any division */ 281 /* OPT: Make sure to use conditional moves here */ 282 if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)) 283 { 284 best_den = Ryy; 285 best_num = Rxy; 286 best_id = j; 287 } 288 } while (++j<N); 289 290 /* Updating the sums of the new pulse(s) */ 291 xy = ADD32(xy, EXTEND32(X[best_id])); 292 /* We're multiplying y[j] by two so we don't have to do it here */ 293 yy = ADD16(yy, y[best_id]); 294 295 /* Only now that we've made the final choice, update y/iy */ 296 /* Multiplying y[j] by 2 so we don't have to do it everywhere else */ 297 y[best_id] += 2*s; 298 iy[best_id]++; 299 } 300 301 /* Put the original sign back */ 302 j=0; 303 do { 304 X[j] = MULT16_16(signx[j],X[j]); 305 if (signx[j] < 0) 306 iy[j] = -iy[j]; 307 } while (++j<N); 308 encode_pulses(iy, N, K, enc); 309 310 #ifdef RESYNTH 311 normalise_residual(iy, X, N, yy, gain); 312 exp_rotation(X, N, -1, B, K, spread); 313 #endif 314 315 collapse_mask = extract_collapse_mask(iy, N, B); 316 RESTORE_STACK; 317 return collapse_mask; 318 } 319 320 /** Decode pulse vector and combine the result with the pitch vector to produce 321 the final normalised signal in the current band. */ 322 unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, 323 ec_dec *dec, opus_val16 gain) 324 { 325 int i; 326 opus_val32 Ryy; 327 unsigned collapse_mask; 328 VARDECL(int, iy); 329 SAVE_STACK; 330 331 celt_assert2(K>0, "alg_unquant() needs at least one pulse"); 332 celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); 333 ALLOC(iy, N, int); 334 decode_pulses(iy, N, K, dec); 335 Ryy = 0; 336 i=0; 337 do { 338 Ryy = MAC16_16(Ryy, iy[i], iy[i]); 339 } while (++i < N); 340 normalise_residual(iy, X, N, Ryy, gain); 341 exp_rotation(X, N, -1, B, K, spread); 342 collapse_mask = extract_collapse_mask(iy, N, B); 343 RESTORE_STACK; 344 return collapse_mask; 345 } 346 347 void renormalise_vector(celt_norm *X, int N, opus_val16 gain) 348 { 349 int i; 350 #ifdef FIXED_POINT 351 int k; 352 #endif 353 opus_val32 E = EPSILON; 354 opus_val16 g; 355 opus_val32 t; 356 celt_norm *xptr = X; 357 for (i=0;i<N;i++) 358 { 359 E = MAC16_16(E, *xptr, *xptr); 360 xptr++; 361 } 362 #ifdef FIXED_POINT 363 k = celt_ilog2(E)>>1; 364 #endif 365 t = VSHR32(E, 2*(k-7)); 366 g = MULT16_16_P15(celt_rsqrt_norm(t),gain); 367 368 xptr = X; 369 for (i=0;i<N;i++) 370 { 371 *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1)); 372 xptr++; 373 } 374 /*return celt_sqrt(E);*/ 375 } 376 377 int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N) 378 { 379 int i; 380 int itheta; 381 opus_val16 mid, side; 382 opus_val32 Emid, Eside; 383 384 Emid = Eside = EPSILON; 385 if (stereo) 386 { 387 for (i=0;i<N;i++) 388 { 389 celt_norm m, s; 390 m = ADD16(SHR16(X[i],1),SHR16(Y[i],1)); 391 s = SUB16(SHR16(X[i],1),SHR16(Y[i],1)); 392 Emid = MAC16_16(Emid, m, m); 393 Eside = MAC16_16(Eside, s, s); 394 } 395 } else { 396 for (i=0;i<N;i++) 397 { 398 celt_norm m, s; 399 m = X[i]; 400 s = Y[i]; 401 Emid = MAC16_16(Emid, m, m); 402 Eside = MAC16_16(Eside, s, s); 403 } 404 } 405 mid = celt_sqrt(Emid); 406 side = celt_sqrt(Eside); 407 #ifdef FIXED_POINT 408 /* 0.63662 = 2/pi */ 409 itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid)); 410 #else 411 itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid)); 412 #endif 413 414 return itheta; 415 } 416