1 /* Copyright (c) 2007-2008 CSIRO 2 Copyright (c) 2007-2009 Xiph.Org Foundation 3 Written by Jean-Marc Valin */ 4 /* 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions 7 are met: 8 9 - Redistributions of source code must retain the above copyright 10 notice, this list of conditions and the following disclaimer. 11 12 - Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 20 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 21 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 22 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #ifdef HAVE_CONFIG_H 30 #include "config.h" 31 #endif 32 33 #include "mathops.h" 34 #include "cwrs.h" 35 #include "vq.h" 36 #include "arch.h" 37 #include "os_support.h" 38 #include "bands.h" 39 #include "rate.h" 40 #include "pitch.h" 41 42 #ifndef OVERRIDE_vq_exp_rotation1 43 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) 44 { 45 int i; 46 opus_val16 ms; 47 celt_norm *Xptr; 48 Xptr = X; 49 ms = NEG16(s); 50 for (i=0;i<len-stride;i++) 51 { 52 celt_norm x1, x2; 53 x1 = Xptr[0]; 54 x2 = Xptr[stride]; 55 Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); 56 *Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); 57 } 58 Xptr = &X[len-2*stride-1]; 59 for (i=len-2*stride-1;i>=0;i--) 60 { 61 celt_norm x1, x2; 62 x1 = Xptr[0]; 63 x2 = Xptr[stride]; 64 Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); 65 *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); 66 } 67 } 68 #endif /* OVERRIDE_vq_exp_rotation1 */ 69 70 static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) 71 { 72 static const int SPREAD_FACTOR[3]={15,10,5}; 73 int i; 74 opus_val16 c, s; 75 opus_val16 gain, theta; 76 int stride2=0; 77 int factor; 78 79 if (2*K>=len || spread==SPREAD_NONE) 80 return; 81 factor = SPREAD_FACTOR[spread-1]; 82 83 gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K)); 84 theta = HALF16(MULT16_16_Q15(gain,gain)); 85 86 c = celt_cos_norm(EXTEND32(theta)); 87 s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /* sin(theta) */ 88 89 if (len>=8*stride) 90 { 91 stride2 = 1; 92 /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding. 93 It's basically incrementing long as (stride2+0.5)^2 < len/stride. */ 94 while ((stride2*stride2+stride2)*stride + (stride>>2) < len) 95 stride2++; 96 } 97 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for 98 extract_collapse_mask().*/ 99 len = celt_udiv(len, stride); 100 for (i=0;i<stride;i++) 101 { 102 if (dir < 0) 103 { 104 if (stride2) 105 exp_rotation1(X+i*len, len, stride2, s, c); 106 exp_rotation1(X+i*len, len, 1, c, s); 107 } else { 108 exp_rotation1(X+i*len, len, 1, c, -s); 109 if (stride2) 110 exp_rotation1(X+i*len, len, stride2, s, -c); 111 } 112 } 113 } 114 115 /** Takes the pitch vector and the decoded residual vector, computes the gain 116 that will give ||p+g*y||=1 and mixes the residual with the pitch. */ 117 static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X, 118 int N, opus_val32 Ryy, opus_val16 gain) 119 { 120 int i; 121 #ifdef FIXED_POINT 122 int k; 123 #endif 124 opus_val32 t; 125 opus_val16 g; 126 127 #ifdef FIXED_POINT 128 k = celt_ilog2(Ryy)>>1; 129 #endif 130 t = VSHR32(Ryy, 2*(k-7)); 131 g = MULT16_16_P15(celt_rsqrt_norm(t),gain); 132 133 i=0; 134 do 135 X[i] = EXTRACT16(PSHR32(MULT16_16(g, iy[i]), k+1)); 136 while (++i < N); 137 } 138 139 static unsigned extract_collapse_mask(int *iy, int N, int B) 140 { 141 unsigned collapse_mask; 142 int N0; 143 int i; 144 if (B<=1) 145 return 1; 146 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for 147 exp_rotation().*/ 148 N0 = celt_udiv(N, B); 149 collapse_mask = 0; 150 i=0; do { 151 int j; 152 unsigned tmp=0; 153 j=0; do { 154 tmp |= iy[i*N0+j]; 155 } while (++j<N0); 156 collapse_mask |= (tmp!=0)<<i; 157 } while (++i<B); 158 return collapse_mask; 159 } 160 161 unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc 162 #ifdef RESYNTH 163 , opus_val16 gain 164 #endif 165 ) 166 { 167 VARDECL(celt_norm, y); 168 VARDECL(int, iy); 169 VARDECL(opus_val16, signx); 170 int i, j; 171 opus_val16 s; 172 int pulsesLeft; 173 opus_val32 sum; 174 opus_val32 xy; 175 opus_val16 yy; 176 unsigned collapse_mask; 177 SAVE_STACK; 178 179 celt_assert2(K>0, "alg_quant() needs at least one pulse"); 180 celt_assert2(N>1, "alg_quant() needs at least two dimensions"); 181 182 ALLOC(y, N, celt_norm); 183 ALLOC(iy, N, int); 184 ALLOC(signx, N, opus_val16); 185 186 exp_rotation(X, N, 1, B, K, spread); 187 188 /* Get rid of the sign */ 189 sum = 0; 190 j=0; do { 191 if (X[j]>0) 192 signx[j]=1; 193 else { 194 signx[j]=-1; 195 X[j]=-X[j]; 196 } 197 iy[j] = 0; 198 y[j] = 0; 199 } while (++j<N); 200 201 xy = yy = 0; 202 203 pulsesLeft = K; 204 205 /* Do a pre-search by projecting on the pyramid */ 206 if (K > (N>>1)) 207 { 208 opus_val16 rcp; 209 j=0; do { 210 sum += X[j]; 211 } while (++j<N); 212 213 /* If X is too small, just replace it with a pulse at 0 */ 214 #ifdef FIXED_POINT 215 if (sum <= K) 216 #else 217 /* Prevents infinities and NaNs from causing too many pulses 218 to be allocated. 64 is an approximation of infinity here. */ 219 if (!(sum > EPSILON && sum < 64)) 220 #endif 221 { 222 X[0] = QCONST16(1.f,14); 223 j=1; do 224 X[j]=0; 225 while (++j<N); 226 sum = QCONST16(1.f,14); 227 } 228 rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum))); 229 j=0; do { 230 #ifdef FIXED_POINT 231 /* It's really important to round *towards zero* here */ 232 iy[j] = MULT16_16_Q15(X[j],rcp); 233 #else 234 iy[j] = (int)floor(rcp*X[j]); 235 #endif 236 y[j] = (celt_norm)iy[j]; 237 yy = MAC16_16(yy, y[j],y[j]); 238 xy = MAC16_16(xy, X[j],y[j]); 239 y[j] *= 2; 240 pulsesLeft -= iy[j]; 241 } while (++j<N); 242 } 243 celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass"); 244 245 /* This should never happen, but just in case it does (e.g. on silence) 246 we fill the first bin with pulses. */ 247 #ifdef FIXED_POINT_DEBUG 248 celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass"); 249 #endif 250 if (pulsesLeft > N+3) 251 { 252 opus_val16 tmp = (opus_val16)pulsesLeft; 253 yy = MAC16_16(yy, tmp, tmp); 254 yy = MAC16_16(yy, tmp, y[0]); 255 iy[0] += pulsesLeft; 256 pulsesLeft=0; 257 } 258 259 s = 1; 260 for (i=0;i<pulsesLeft;i++) 261 { 262 int best_id; 263 opus_val32 best_num = -VERY_LARGE16; 264 opus_val16 best_den = 0; 265 #ifdef FIXED_POINT 266 int rshift; 267 #endif 268 #ifdef FIXED_POINT 269 rshift = 1+celt_ilog2(K-pulsesLeft+i+1); 270 #endif 271 best_id = 0; 272 /* The squared magnitude term gets added anyway, so we might as well 273 add it outside the loop */ 274 yy = ADD16(yy, 1); 275 j=0; 276 do { 277 opus_val16 Rxy, Ryy; 278 /* Temporary sums of the new pulse(s) */ 279 Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift)); 280 /* We're multiplying y[j] by two so we don't have to do it here */ 281 Ryy = ADD16(yy, y[j]); 282 283 /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that 284 Rxy is positive because the sign is pre-computed) */ 285 Rxy = MULT16_16_Q15(Rxy,Rxy); 286 /* The idea is to check for num/den >= best_num/best_den, but that way 287 we can do it without any division */ 288 /* OPT: Make sure to use conditional moves here */ 289 if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)) 290 { 291 best_den = Ryy; 292 best_num = Rxy; 293 best_id = j; 294 } 295 } while (++j<N); 296 297 /* Updating the sums of the new pulse(s) */ 298 xy = ADD32(xy, EXTEND32(X[best_id])); 299 /* We're multiplying y[j] by two so we don't have to do it here */ 300 yy = ADD16(yy, y[best_id]); 301 302 /* Only now that we've made the final choice, update y/iy */ 303 /* Multiplying y[j] by 2 so we don't have to do it everywhere else */ 304 y[best_id] += 2*s; 305 iy[best_id]++; 306 } 307 308 /* Put the original sign back */ 309 j=0; 310 do { 311 X[j] = MULT16_16(signx[j],X[j]); 312 if (signx[j] < 0) 313 iy[j] = -iy[j]; 314 } while (++j<N); 315 encode_pulses(iy, N, K, enc); 316 317 #ifdef RESYNTH 318 normalise_residual(iy, X, N, yy, gain); 319 exp_rotation(X, N, -1, B, K, spread); 320 #endif 321 322 collapse_mask = extract_collapse_mask(iy, N, B); 323 RESTORE_STACK; 324 return collapse_mask; 325 } 326 327 /** Decode pulse vector and combine the result with the pitch vector to produce 328 the final normalised signal in the current band. */ 329 unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, 330 ec_dec *dec, opus_val16 gain) 331 { 332 opus_val32 Ryy; 333 unsigned collapse_mask; 334 VARDECL(int, iy); 335 SAVE_STACK; 336 337 celt_assert2(K>0, "alg_unquant() needs at least one pulse"); 338 celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); 339 ALLOC(iy, N, int); 340 Ryy = decode_pulses(iy, N, K, dec); 341 normalise_residual(iy, X, N, Ryy, gain); 342 exp_rotation(X, N, -1, B, K, spread); 343 collapse_mask = extract_collapse_mask(iy, N, B); 344 RESTORE_STACK; 345 return collapse_mask; 346 } 347 348 #ifndef OVERRIDE_renormalise_vector 349 void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch) 350 { 351 int i; 352 #ifdef FIXED_POINT 353 int k; 354 #endif 355 opus_val32 E; 356 opus_val16 g; 357 opus_val32 t; 358 celt_norm *xptr; 359 E = EPSILON + celt_inner_prod(X, X, N, arch); 360 #ifdef FIXED_POINT 361 k = celt_ilog2(E)>>1; 362 #endif 363 t = VSHR32(E, 2*(k-7)); 364 g = MULT16_16_P15(celt_rsqrt_norm(t),gain); 365 366 xptr = X; 367 for (i=0;i<N;i++) 368 { 369 *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1)); 370 xptr++; 371 } 372 /*return celt_sqrt(E);*/ 373 } 374 #endif /* OVERRIDE_renormalise_vector */ 375 376 int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch) 377 { 378 int i; 379 int itheta; 380 opus_val16 mid, side; 381 opus_val32 Emid, Eside; 382 383 Emid = Eside = EPSILON; 384 if (stereo) 385 { 386 for (i=0;i<N;i++) 387 { 388 celt_norm m, s; 389 m = ADD16(SHR16(X[i],1),SHR16(Y[i],1)); 390 s = SUB16(SHR16(X[i],1),SHR16(Y[i],1)); 391 Emid = MAC16_16(Emid, m, m); 392 Eside = MAC16_16(Eside, s, s); 393 } 394 } else { 395 Emid += celt_inner_prod(X, X, N, arch); 396 Eside += celt_inner_prod(Y, Y, N, arch); 397 } 398 mid = celt_sqrt(Emid); 399 side = celt_sqrt(Eside); 400 #ifdef FIXED_POINT 401 /* 0.63662 = 2/pi */ 402 itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid)); 403 #else 404 itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid)); 405 #endif 406 407 return itheta; 408 } 409