Home | History | Annotate | Download | only in libFLAC
      1 /* libFLAC - Free Lossless Audio Codec library
      2  * Copyright (C) 2000-2009  Josh Coalson
      3  * Copyright (C) 2011-2016  Xiph.Org Foundation
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  *
      9  * - Redistributions of source code must retain the above copyright
     10  * notice, this list of conditions and the following disclaimer.
     11  *
     12  * - Redistributions in binary form must reproduce the above copyright
     13  * notice, this list of conditions and the following disclaimer in the
     14  * documentation and/or other materials provided with the distribution.
     15  *
     16  * - Neither the name of the Xiph.org Foundation nor the names of its
     17  * contributors may be used to endorse or promote products derived from
     18  * this software without specific prior written permission.
     19  *
     20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     23  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
     24  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     25  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     26  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     27  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     28  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     29  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     30  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     31  */
     32 
     33 #ifdef HAVE_CONFIG_H
     34 #  include <config.h>
     35 #endif
     36 
     37 #include <math.h>
     38 
     39 #include "FLAC/assert.h"
     40 #include "FLAC/format.h"
     41 #include "share/compat.h"
     42 #include "private/bitmath.h"
     43 #include "private/lpc.h"
     44 #include "private/macros.h"
     45 #if defined DEBUG || defined FLAC__OVERFLOW_DETECT || defined FLAC__OVERFLOW_DETECT_VERBOSE
     46 #include <stdio.h>
     47 #endif
     48 
     49 /* OPT: #undef'ing this may improve the speed on some architectures */
     50 #define FLAC__LPC_UNROLLED_FILTER_LOOPS
     51 
     52 #ifndef FLAC__INTEGER_ONLY_LIBRARY
     53 
     54 #if defined(_MSC_VER) && (_MSC_VER < 1800)
     55 #include <float.h>
     56 static inline long int lround(double x) {
     57 	return (long)(x + _copysign(0.5, x));
     58 }
     59 #elif !defined(HAVE_LROUND) && defined(__GNUC__)
     60 static inline long int lround(double x) {
     61 	return (long)(x + __builtin_copysign(0.5, x));
     62 }
     63 /* If this fails, we are in the presence of a mid 90's compiler, move along... */
     64 #endif
     65 
     66 void FLAC__lpc_window_data(const FLAC__int32 in[], const FLAC__real window[], FLAC__real out[], unsigned data_len)
     67 {
     68 	unsigned i;
     69 	for(i = 0; i < data_len; i++)
     70 		out[i] = in[i] * window[i];
     71 }
     72 
     73 void FLAC__lpc_compute_autocorrelation(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
     74 {
     75 	/* a readable, but slower, version */
     76 #if 0
     77 	FLAC__real d;
     78 	unsigned i;
     79 
     80 	FLAC__ASSERT(lag > 0);
     81 	FLAC__ASSERT(lag <= data_len);
     82 
     83 	/*
     84 	 * Technically we should subtract the mean first like so:
     85 	 *   for(i = 0; i < data_len; i++)
     86 	 *     data[i] -= mean;
     87 	 * but it appears not to make enough of a difference to matter, and
     88 	 * most signals are already closely centered around zero
     89 	 */
     90 	while(lag--) {
     91 		for(i = lag, d = 0.0; i < data_len; i++)
     92 			d += data[i] * data[i - lag];
     93 		autoc[lag] = d;
     94 	}
     95 #endif
     96 
     97 	/*
     98 	 * this version tends to run faster because of better data locality
     99 	 * ('data_len' is usually much larger than 'lag')
    100 	 */
    101 	FLAC__real d;
    102 	unsigned sample, coeff;
    103 	const unsigned limit = data_len - lag;
    104 
    105 	FLAC__ASSERT(lag > 0);
    106 	FLAC__ASSERT(lag <= data_len);
    107 
    108 	for(coeff = 0; coeff < lag; coeff++)
    109 		autoc[coeff] = 0.0;
    110 	for(sample = 0; sample <= limit; sample++) {
    111 		d = data[sample];
    112 		for(coeff = 0; coeff < lag; coeff++)
    113 			autoc[coeff] += d * data[sample+coeff];
    114 	}
    115 	for(; sample < data_len; sample++) {
    116 		d = data[sample];
    117 		for(coeff = 0; coeff < data_len - sample; coeff++)
    118 			autoc[coeff] += d * data[sample+coeff];
    119 	}
    120 }
    121 
    122 void FLAC__lpc_compute_lp_coefficients(const FLAC__real autoc[], unsigned *max_order, FLAC__real lp_coeff[][FLAC__MAX_LPC_ORDER], double error[])
    123 {
    124 	unsigned i, j;
    125 	double r, err, lpc[FLAC__MAX_LPC_ORDER];
    126 
    127 	FLAC__ASSERT(0 != max_order);
    128 	FLAC__ASSERT(0 < *max_order);
    129 	FLAC__ASSERT(*max_order <= FLAC__MAX_LPC_ORDER);
    130 	FLAC__ASSERT(autoc[0] != 0.0);
    131 
    132 	err = autoc[0];
    133 
    134 	for(i = 0; i < *max_order; i++) {
    135 		/* Sum up this iteration's reflection coefficient. */
    136 		r = -autoc[i+1];
    137 		for(j = 0; j < i; j++)
    138 			r -= lpc[j] * autoc[i-j];
    139 		r /= err;
    140 
    141 		/* Update LPC coefficients and total error. */
    142 		lpc[i]=r;
    143 		for(j = 0; j < (i>>1); j++) {
    144 			double tmp = lpc[j];
    145 			lpc[j] += r * lpc[i-1-j];
    146 			lpc[i-1-j] += r * tmp;
    147 		}
    148 		if(i & 1)
    149 			lpc[j] += lpc[j] * r;
    150 
    151 		err *= (1.0 - r * r);
    152 
    153 		/* save this order */
    154 		for(j = 0; j <= i; j++)
    155 			lp_coeff[i][j] = (FLAC__real)(-lpc[j]); /* negate FIR filter coeff to get predictor coeff */
    156 		error[i] = err;
    157 
    158 		/* see SF bug https://sourceforge.net/p/flac/bugs/234/ */
    159 		if(err == 0.0) {
    160 			*max_order = i+1;
    161 			return;
    162 		}
    163 	}
    164 }
    165 
    166 int FLAC__lpc_quantize_coefficients(const FLAC__real lp_coeff[], unsigned order, unsigned precision, FLAC__int32 qlp_coeff[], int *shift)
    167 {
    168 	unsigned i;
    169 	double cmax;
    170 	FLAC__int32 qmax, qmin;
    171 
    172 	FLAC__ASSERT(precision > 0);
    173 	FLAC__ASSERT(precision >= FLAC__MIN_QLP_COEFF_PRECISION);
    174 
    175 	/* drop one bit for the sign; from here on out we consider only |lp_coeff[i]| */
    176 	precision--;
    177 	qmax = 1 << precision;
    178 	qmin = -qmax;
    179 	qmax--;
    180 
    181 	/* calc cmax = max( |lp_coeff[i]| ) */
    182 	cmax = 0.0;
    183 	for(i = 0; i < order; i++) {
    184 		const double d = fabs(lp_coeff[i]);
    185 		if(d > cmax)
    186 			cmax = d;
    187 	}
    188 
    189 	if(cmax <= 0.0) {
    190 		/* => coefficients are all 0, which means our constant-detect didn't work */
    191 		return 2;
    192 	}
    193 	else {
    194 		const int max_shiftlimit = (1 << (FLAC__SUBFRAME_LPC_QLP_SHIFT_LEN-1)) - 1;
    195 		const int min_shiftlimit = -max_shiftlimit - 1;
    196 		int log2cmax;
    197 
    198 		(void)frexp(cmax, &log2cmax);
    199 		log2cmax--;
    200 		*shift = (int)precision - log2cmax - 1;
    201 
    202 		if(*shift > max_shiftlimit)
    203 			*shift = max_shiftlimit;
    204 		else if(*shift < min_shiftlimit)
    205 			return 1;
    206 	}
    207 
    208 	if(*shift >= 0) {
    209 		double error = 0.0;
    210 		FLAC__int32 q;
    211 		for(i = 0; i < order; i++) {
    212 			error += lp_coeff[i] * (1 << *shift);
    213 			q = lround(error);
    214 
    215 #ifdef FLAC__OVERFLOW_DETECT
    216 			if(q > qmax+1) /* we expect q==qmax+1 occasionally due to rounding */
    217 				fprintf(stderr,"FLAC__lpc_quantize_coefficients: quantizer overflow: q>qmax %d>%d shift=%d cmax=%f precision=%u lpc[%u]=%f\n",q,qmax,*shift,cmax,precision+1,i,lp_coeff[i]);
    218 			else if(q < qmin)
    219 				fprintf(stderr,"FLAC__lpc_quantize_coefficients: quantizer overflow: q<qmin %d<%d shift=%d cmax=%f precision=%u lpc[%u]=%f\n",q,qmin,*shift,cmax,precision+1,i,lp_coeff[i]);
    220 #endif
    221 			if(q > qmax)
    222 				q = qmax;
    223 			else if(q < qmin)
    224 				q = qmin;
    225 			error -= q;
    226 			qlp_coeff[i] = q;
    227 		}
    228 	}
    229 	/* negative shift is very rare but due to design flaw, negative shift is
    230 	 * not allowed in the decoder, so it must be handled specially by scaling
    231 	 * down coeffs
    232 	 */
    233 	else {
    234 		const int nshift = -(*shift);
    235 		double error = 0.0;
    236 		FLAC__int32 q;
    237 #ifdef DEBUG
    238 		fprintf(stderr,"FLAC__lpc_quantize_coefficients: negative shift=%d order=%u cmax=%f\n", *shift, order, cmax);
    239 #endif
    240 		for(i = 0; i < order; i++) {
    241 			error += lp_coeff[i] / (1 << nshift);
    242 			q = lround(error);
    243 #ifdef FLAC__OVERFLOW_DETECT
    244 			if(q > qmax+1) /* we expect q==qmax+1 occasionally due to rounding */
    245 				fprintf(stderr,"FLAC__lpc_quantize_coefficients: quantizer overflow: q>qmax %d>%d shift=%d cmax=%f precision=%u lpc[%u]=%f\n",q,qmax,*shift,cmax,precision+1,i,lp_coeff[i]);
    246 			else if(q < qmin)
    247 				fprintf(stderr,"FLAC__lpc_quantize_coefficients: quantizer overflow: q<qmin %d<%d shift=%d cmax=%f precision=%u lpc[%u]=%f\n",q,qmin,*shift,cmax,precision+1,i,lp_coeff[i]);
    248 #endif
    249 			if(q > qmax)
    250 				q = qmax;
    251 			else if(q < qmin)
    252 				q = qmin;
    253 			error -= q;
    254 			qlp_coeff[i] = q;
    255 		}
    256 		*shift = 0;
    257 	}
    258 
    259 	return 0;
    260 }
    261 
    262 #if defined(_MSC_VER)
    263 // silence MSVC warnings about __restrict modifier
    264 #pragma warning ( disable : 4028 )
    265 #endif
    266 
    267 void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 * flac_restrict data, unsigned data_len, const FLAC__int32 * flac_restrict qlp_coeff, unsigned order, int lp_quantization, FLAC__int32 * flac_restrict residual)
    268 #if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
    269 {
    270 	FLAC__int64 sumo;
    271 	unsigned i, j;
    272 	FLAC__int32 sum;
    273 	const FLAC__int32 *history;
    274 
    275 #ifdef FLAC__OVERFLOW_DETECT_VERBOSE
    276 	fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients: data_len=%d, order=%u, lpq=%d",data_len,order,lp_quantization);
    277 	for(i=0;i<order;i++)
    278 		fprintf(stderr,", q[%u]=%d",i,qlp_coeff[i]);
    279 	fprintf(stderr,"\n");
    280 #endif
    281 	FLAC__ASSERT(order > 0);
    282 
    283 	for(i = 0; i < data_len; i++) {
    284 		sumo = 0;
    285 		sum = 0;
    286 		history = data;
    287 		for(j = 0; j < order; j++) {
    288 			sum += qlp_coeff[j] * (*(--history));
    289 			sumo += (FLAC__int64)qlp_coeff[j] * (FLAC__int64)(*history);
    290 			if(sumo > 2147483647ll || sumo < -2147483648ll)
    291 				fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients: OVERFLOW, i=%u, j=%u, c=%d, d=%d, sumo=%" PRId64 "\n",i,j,qlp_coeff[j],*history,sumo);
    292 		}
    293 		*(residual++) = *(data++) - (sum >> lp_quantization);
    294 	}
    295 
    296 	/* Here's a slower but clearer version:
    297 	for(i = 0; i < data_len; i++) {
    298 		sum = 0;
    299 		for(j = 0; j < order; j++)
    300 			sum += qlp_coeff[j] * data[i-j-1];
    301 		residual[i] = data[i] - (sum >> lp_quantization);
    302 	}
    303 	*/
    304 }
    305 #else /* fully unrolled version for normal use */
    306 {
    307 	int i;
    308 	FLAC__int32 sum;
    309 
    310 	FLAC__ASSERT(order > 0);
    311 	FLAC__ASSERT(order <= 32);
    312 
    313 	/*
    314 	 * We do unique versions up to 12th order since that's the subset limit.
    315 	 * Also they are roughly ordered to match frequency of occurrence to
    316 	 * minimize branching.
    317 	 */
    318 	if(order <= 12) {
    319 		if(order > 8) {
    320 			if(order > 10) {
    321 				if(order == 12) {
    322 					for(i = 0; i < (int)data_len; i++) {
    323 						sum = 0;
    324 						sum += qlp_coeff[11] * data[i-12];
    325 						sum += qlp_coeff[10] * data[i-11];
    326 						sum += qlp_coeff[9] * data[i-10];
    327 						sum += qlp_coeff[8] * data[i-9];
    328 						sum += qlp_coeff[7] * data[i-8];
    329 						sum += qlp_coeff[6] * data[i-7];
    330 						sum += qlp_coeff[5] * data[i-6];
    331 						sum += qlp_coeff[4] * data[i-5];
    332 						sum += qlp_coeff[3] * data[i-4];
    333 						sum += qlp_coeff[2] * data[i-3];
    334 						sum += qlp_coeff[1] * data[i-2];
    335 						sum += qlp_coeff[0] * data[i-1];
    336 						residual[i] = data[i] - (sum >> lp_quantization);
    337 					}
    338 				}
    339 				else { /* order == 11 */
    340 					for(i = 0; i < (int)data_len; i++) {
    341 						sum = 0;
    342 						sum += qlp_coeff[10] * data[i-11];
    343 						sum += qlp_coeff[9] * data[i-10];
    344 						sum += qlp_coeff[8] * data[i-9];
    345 						sum += qlp_coeff[7] * data[i-8];
    346 						sum += qlp_coeff[6] * data[i-7];
    347 						sum += qlp_coeff[5] * data[i-6];
    348 						sum += qlp_coeff[4] * data[i-5];
    349 						sum += qlp_coeff[3] * data[i-4];
    350 						sum += qlp_coeff[2] * data[i-3];
    351 						sum += qlp_coeff[1] * data[i-2];
    352 						sum += qlp_coeff[0] * data[i-1];
    353 						residual[i] = data[i] - (sum >> lp_quantization);
    354 					}
    355 				}
    356 			}
    357 			else {
    358 				if(order == 10) {
    359 					for(i = 0; i < (int)data_len; i++) {
    360 						sum = 0;
    361 						sum += qlp_coeff[9] * data[i-10];
    362 						sum += qlp_coeff[8] * data[i-9];
    363 						sum += qlp_coeff[7] * data[i-8];
    364 						sum += qlp_coeff[6] * data[i-7];
    365 						sum += qlp_coeff[5] * data[i-6];
    366 						sum += qlp_coeff[4] * data[i-5];
    367 						sum += qlp_coeff[3] * data[i-4];
    368 						sum += qlp_coeff[2] * data[i-3];
    369 						sum += qlp_coeff[1] * data[i-2];
    370 						sum += qlp_coeff[0] * data[i-1];
    371 						residual[i] = data[i] - (sum >> lp_quantization);
    372 					}
    373 				}
    374 				else { /* order == 9 */
    375 					for(i = 0; i < (int)data_len; i++) {
    376 						sum = 0;
    377 						sum += qlp_coeff[8] * data[i-9];
    378 						sum += qlp_coeff[7] * data[i-8];
    379 						sum += qlp_coeff[6] * data[i-7];
    380 						sum += qlp_coeff[5] * data[i-6];
    381 						sum += qlp_coeff[4] * data[i-5];
    382 						sum += qlp_coeff[3] * data[i-4];
    383 						sum += qlp_coeff[2] * data[i-3];
    384 						sum += qlp_coeff[1] * data[i-2];
    385 						sum += qlp_coeff[0] * data[i-1];
    386 						residual[i] = data[i] - (sum >> lp_quantization);
    387 					}
    388 				}
    389 			}
    390 		}
    391 		else if(order > 4) {
    392 			if(order > 6) {
    393 				if(order == 8) {
    394 					for(i = 0; i < (int)data_len; i++) {
    395 						sum = 0;
    396 						sum += qlp_coeff[7] * data[i-8];
    397 						sum += qlp_coeff[6] * data[i-7];
    398 						sum += qlp_coeff[5] * data[i-6];
    399 						sum += qlp_coeff[4] * data[i-5];
    400 						sum += qlp_coeff[3] * data[i-4];
    401 						sum += qlp_coeff[2] * data[i-3];
    402 						sum += qlp_coeff[1] * data[i-2];
    403 						sum += qlp_coeff[0] * data[i-1];
    404 						residual[i] = data[i] - (sum >> lp_quantization);
    405 					}
    406 				}
    407 				else { /* order == 7 */
    408 					for(i = 0; i < (int)data_len; i++) {
    409 						sum = 0;
    410 						sum += qlp_coeff[6] * data[i-7];
    411 						sum += qlp_coeff[5] * data[i-6];
    412 						sum += qlp_coeff[4] * data[i-5];
    413 						sum += qlp_coeff[3] * data[i-4];
    414 						sum += qlp_coeff[2] * data[i-3];
    415 						sum += qlp_coeff[1] * data[i-2];
    416 						sum += qlp_coeff[0] * data[i-1];
    417 						residual[i] = data[i] - (sum >> lp_quantization);
    418 					}
    419 				}
    420 			}
    421 			else {
    422 				if(order == 6) {
    423 					for(i = 0; i < (int)data_len; i++) {
    424 						sum = 0;
    425 						sum += qlp_coeff[5] * data[i-6];
    426 						sum += qlp_coeff[4] * data[i-5];
    427 						sum += qlp_coeff[3] * data[i-4];
    428 						sum += qlp_coeff[2] * data[i-3];
    429 						sum += qlp_coeff[1] * data[i-2];
    430 						sum += qlp_coeff[0] * data[i-1];
    431 						residual[i] = data[i] - (sum >> lp_quantization);
    432 					}
    433 				}
    434 				else { /* order == 5 */
    435 					for(i = 0; i < (int)data_len; i++) {
    436 						sum = 0;
    437 						sum += qlp_coeff[4] * data[i-5];
    438 						sum += qlp_coeff[3] * data[i-4];
    439 						sum += qlp_coeff[2] * data[i-3];
    440 						sum += qlp_coeff[1] * data[i-2];
    441 						sum += qlp_coeff[0] * data[i-1];
    442 						residual[i] = data[i] - (sum >> lp_quantization);
    443 					}
    444 				}
    445 			}
    446 		}
    447 		else {
    448 			if(order > 2) {
    449 				if(order == 4) {
    450 					for(i = 0; i < (int)data_len; i++) {
    451 						sum = 0;
    452 						sum += qlp_coeff[3] * data[i-4];
    453 						sum += qlp_coeff[2] * data[i-3];
    454 						sum += qlp_coeff[1] * data[i-2];
    455 						sum += qlp_coeff[0] * data[i-1];
    456 						residual[i] = data[i] - (sum >> lp_quantization);
    457 					}
    458 				}
    459 				else { /* order == 3 */
    460 					for(i = 0; i < (int)data_len; i++) {
    461 						sum = 0;
    462 						sum += qlp_coeff[2] * data[i-3];
    463 						sum += qlp_coeff[1] * data[i-2];
    464 						sum += qlp_coeff[0] * data[i-1];
    465 						residual[i] = data[i] - (sum >> lp_quantization);
    466 					}
    467 				}
    468 			}
    469 			else {
    470 				if(order == 2) {
    471 					for(i = 0; i < (int)data_len; i++) {
    472 						sum = 0;
    473 						sum += qlp_coeff[1] * data[i-2];
    474 						sum += qlp_coeff[0] * data[i-1];
    475 						residual[i] = data[i] - (sum >> lp_quantization);
    476 					}
    477 				}
    478 				else { /* order == 1 */
    479 					for(i = 0; i < (int)data_len; i++)
    480 						residual[i] = data[i] - ((qlp_coeff[0] * data[i-1]) >> lp_quantization);
    481 				}
    482 			}
    483 		}
    484 	}
    485 	else { /* order > 12 */
    486 		for(i = 0; i < (int)data_len; i++) {
    487 			sum = 0;
    488 			switch(order) {
    489 				case 32: sum += qlp_coeff[31] * data[i-32];
    490 				case 31: sum += qlp_coeff[30] * data[i-31];
    491 				case 30: sum += qlp_coeff[29] * data[i-30];
    492 				case 29: sum += qlp_coeff[28] * data[i-29];
    493 				case 28: sum += qlp_coeff[27] * data[i-28];
    494 				case 27: sum += qlp_coeff[26] * data[i-27];
    495 				case 26: sum += qlp_coeff[25] * data[i-26];
    496 				case 25: sum += qlp_coeff[24] * data[i-25];
    497 				case 24: sum += qlp_coeff[23] * data[i-24];
    498 				case 23: sum += qlp_coeff[22] * data[i-23];
    499 				case 22: sum += qlp_coeff[21] * data[i-22];
    500 				case 21: sum += qlp_coeff[20] * data[i-21];
    501 				case 20: sum += qlp_coeff[19] * data[i-20];
    502 				case 19: sum += qlp_coeff[18] * data[i-19];
    503 				case 18: sum += qlp_coeff[17] * data[i-18];
    504 				case 17: sum += qlp_coeff[16] * data[i-17];
    505 				case 16: sum += qlp_coeff[15] * data[i-16];
    506 				case 15: sum += qlp_coeff[14] * data[i-15];
    507 				case 14: sum += qlp_coeff[13] * data[i-14];
    508 				case 13: sum += qlp_coeff[12] * data[i-13];
    509 				         sum += qlp_coeff[11] * data[i-12];
    510 				         sum += qlp_coeff[10] * data[i-11];
    511 				         sum += qlp_coeff[ 9] * data[i-10];
    512 				         sum += qlp_coeff[ 8] * data[i- 9];
    513 				         sum += qlp_coeff[ 7] * data[i- 8];
    514 				         sum += qlp_coeff[ 6] * data[i- 7];
    515 				         sum += qlp_coeff[ 5] * data[i- 6];
    516 				         sum += qlp_coeff[ 4] * data[i- 5];
    517 				         sum += qlp_coeff[ 3] * data[i- 4];
    518 				         sum += qlp_coeff[ 2] * data[i- 3];
    519 				         sum += qlp_coeff[ 1] * data[i- 2];
    520 				         sum += qlp_coeff[ 0] * data[i- 1];
    521 			}
    522 			residual[i] = data[i] - (sum >> lp_quantization);
    523 		}
    524 	}
    525 }
    526 #endif
    527 
    528 void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(const FLAC__int32 * flac_restrict data, unsigned data_len, const FLAC__int32 * flac_restrict qlp_coeff, unsigned order, int lp_quantization, FLAC__int32 * flac_restrict residual)
    529 #if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
    530 {
    531 	unsigned i, j;
    532 	FLAC__int64 sum;
    533 	const FLAC__int32 *history;
    534 
    535 #ifdef FLAC__OVERFLOW_DETECT_VERBOSE
    536 	fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients_wide: data_len=%d, order=%u, lpq=%d",data_len,order,lp_quantization);
    537 	for(i=0;i<order;i++)
    538 		fprintf(stderr,", q[%u]=%d",i,qlp_coeff[i]);
    539 	fprintf(stderr,"\n");
    540 #endif
    541 	FLAC__ASSERT(order > 0);
    542 
    543 	for(i = 0; i < data_len; i++) {
    544 		sum = 0;
    545 		history = data;
    546 		for(j = 0; j < order; j++)
    547 			sum += (FLAC__int64)qlp_coeff[j] * (FLAC__int64)(*(--history));
    548 		if(FLAC__bitmath_silog2(sum >> lp_quantization) > 32) {
    549 			fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients_wide: OVERFLOW, i=%u, sum=%" PRId64 "\n", i, (sum >> lp_quantization));
    550 			break;
    551 		}
    552 		if(FLAC__bitmath_silog2((FLAC__int64)(*data) - (sum >> lp_quantization)) > 32) {
    553 			fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients_wide: OVERFLOW, i=%u, data=%d, sum=%" PRId64 ", residual=%" PRId64 "\n", i, *data, (int64_t)(sum >> lp_quantization), ((FLAC__int64)(*data) - (sum >> lp_quantization)));
    554 			break;
    555 		}
    556 		*(residual++) = *(data++) - (FLAC__int32)(sum >> lp_quantization);
    557 	}
    558 }
    559 #else /* fully unrolled version for normal use */
    560 {
    561 	int i;
    562 	FLAC__int64 sum;
    563 
    564 	FLAC__ASSERT(order > 0);
    565 	FLAC__ASSERT(order <= 32);
    566 
    567 	/*
    568 	 * We do unique versions up to 12th order since that's the subset limit.
    569 	 * Also they are roughly ordered to match frequency of occurrence to
    570 	 * minimize branching.
    571 	 */
    572 	if(order <= 12) {
    573 		if(order > 8) {
    574 			if(order > 10) {
    575 				if(order == 12) {
    576 					for(i = 0; i < (int)data_len; i++) {
    577 						sum = 0;
    578 						sum += qlp_coeff[11] * (FLAC__int64)data[i-12];
    579 						sum += qlp_coeff[10] * (FLAC__int64)data[i-11];
    580 						sum += qlp_coeff[9] * (FLAC__int64)data[i-10];
    581 						sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
    582 						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
    583 						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
    584 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
    585 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
    586 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
    587 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
    588 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
    589 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
    590 						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
    591 					}
    592 				}
    593 				else { /* order == 11 */
    594 					for(i = 0; i < (int)data_len; i++) {
    595 						sum = 0;
    596 						sum += qlp_coeff[10] * (FLAC__int64)data[i-11];
    597 						sum += qlp_coeff[9] * (FLAC__int64)data[i-10];
    598 						sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
    599 						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
    600 						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
    601 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
    602 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
    603 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
    604 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
    605 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
    606 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
    607 						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
    608 					}
    609 				}
    610 			}
    611 			else {
    612 				if(order == 10) {
    613 					for(i = 0; i < (int)data_len; i++) {
    614 						sum = 0;
    615 						sum += qlp_coeff[9] * (FLAC__int64)data[i-10];
    616 						sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
    617 						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
    618 						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
    619 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
    620 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
    621 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
    622 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
    623 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
    624 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
    625 						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
    626 					}
    627 				}
    628 				else { /* order == 9 */
    629 					for(i = 0; i < (int)data_len; i++) {
    630 						sum = 0;
    631 						sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
    632 						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
    633 						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
    634 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
    635 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
    636 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
    637 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
    638 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
    639 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
    640 						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
    641 					}
    642 				}
    643 			}
    644 		}
    645 		else if(order > 4) {
    646 			if(order > 6) {
    647 				if(order == 8) {
    648 					for(i = 0; i < (int)data_len; i++) {
    649 						sum = 0;
    650 						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
    651 						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
    652 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
    653 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
    654 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
    655 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
    656 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
    657 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
    658 						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
    659 					}
    660 				}
    661 				else { /* order == 7 */
    662 					for(i = 0; i < (int)data_len; i++) {
    663 						sum = 0;
    664 						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
    665 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
    666 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
    667 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
    668 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
    669 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
    670 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
    671 						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
    672 					}
    673 				}
    674 			}
    675 			else {
    676 				if(order == 6) {
    677 					for(i = 0; i < (int)data_len; i++) {
    678 						sum = 0;
    679 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
    680 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
    681 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
    682 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
    683 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
    684 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
    685 						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
    686 					}
    687 				}
    688 				else { /* order == 5 */
    689 					for(i = 0; i < (int)data_len; i++) {
    690 						sum = 0;
    691 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
    692 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
    693 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
    694 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
    695 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
    696 						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
    697 					}
    698 				}
    699 			}
    700 		}
    701 		else {
    702 			if(order > 2) {
    703 				if(order == 4) {
    704 					for(i = 0; i < (int)data_len; i++) {
    705 						sum = 0;
    706 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
    707 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
    708 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
    709 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
    710 						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
    711 					}
    712 				}
    713 				else { /* order == 3 */
    714 					for(i = 0; i < (int)data_len; i++) {
    715 						sum = 0;
    716 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
    717 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
    718 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
    719 						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
    720 					}
    721 				}
    722 			}
    723 			else {
    724 				if(order == 2) {
    725 					for(i = 0; i < (int)data_len; i++) {
    726 						sum = 0;
    727 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
    728 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
    729 						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
    730 					}
    731 				}
    732 				else { /* order == 1 */
    733 					for(i = 0; i < (int)data_len; i++)
    734 						residual[i] = data[i] - (FLAC__int32)((qlp_coeff[0] * (FLAC__int64)data[i-1]) >> lp_quantization);
    735 				}
    736 			}
    737 		}
    738 	}
    739 	else { /* order > 12 */
    740 		for(i = 0; i < (int)data_len; i++) {
    741 			sum = 0;
    742 			switch(order) {
    743 				case 32: sum += qlp_coeff[31] * (FLAC__int64)data[i-32];
    744 				case 31: sum += qlp_coeff[30] * (FLAC__int64)data[i-31];
    745 				case 30: sum += qlp_coeff[29] * (FLAC__int64)data[i-30];
    746 				case 29: sum += qlp_coeff[28] * (FLAC__int64)data[i-29];
    747 				case 28: sum += qlp_coeff[27] * (FLAC__int64)data[i-28];
    748 				case 27: sum += qlp_coeff[26] * (FLAC__int64)data[i-27];
    749 				case 26: sum += qlp_coeff[25] * (FLAC__int64)data[i-26];
    750 				case 25: sum += qlp_coeff[24] * (FLAC__int64)data[i-25];
    751 				case 24: sum += qlp_coeff[23] * (FLAC__int64)data[i-24];
    752 				case 23: sum += qlp_coeff[22] * (FLAC__int64)data[i-23];
    753 				case 22: sum += qlp_coeff[21] * (FLAC__int64)data[i-22];
    754 				case 21: sum += qlp_coeff[20] * (FLAC__int64)data[i-21];
    755 				case 20: sum += qlp_coeff[19] * (FLAC__int64)data[i-20];
    756 				case 19: sum += qlp_coeff[18] * (FLAC__int64)data[i-19];
    757 				case 18: sum += qlp_coeff[17] * (FLAC__int64)data[i-18];
    758 				case 17: sum += qlp_coeff[16] * (FLAC__int64)data[i-17];
    759 				case 16: sum += qlp_coeff[15] * (FLAC__int64)data[i-16];
    760 				case 15: sum += qlp_coeff[14] * (FLAC__int64)data[i-15];
    761 				case 14: sum += qlp_coeff[13] * (FLAC__int64)data[i-14];
    762 				case 13: sum += qlp_coeff[12] * (FLAC__int64)data[i-13];
    763 				         sum += qlp_coeff[11] * (FLAC__int64)data[i-12];
    764 				         sum += qlp_coeff[10] * (FLAC__int64)data[i-11];
    765 				         sum += qlp_coeff[ 9] * (FLAC__int64)data[i-10];
    766 				         sum += qlp_coeff[ 8] * (FLAC__int64)data[i- 9];
    767 				         sum += qlp_coeff[ 7] * (FLAC__int64)data[i- 8];
    768 				         sum += qlp_coeff[ 6] * (FLAC__int64)data[i- 7];
    769 				         sum += qlp_coeff[ 5] * (FLAC__int64)data[i- 6];
    770 				         sum += qlp_coeff[ 4] * (FLAC__int64)data[i- 5];
    771 				         sum += qlp_coeff[ 3] * (FLAC__int64)data[i- 4];
    772 				         sum += qlp_coeff[ 2] * (FLAC__int64)data[i- 3];
    773 				         sum += qlp_coeff[ 1] * (FLAC__int64)data[i- 2];
    774 				         sum += qlp_coeff[ 0] * (FLAC__int64)data[i- 1];
    775 			}
    776 			residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
    777 		}
    778 	}
    779 }
    780 #endif
    781 
    782 #endif /* !defined FLAC__INTEGER_ONLY_LIBRARY */
    783 
    784 void FLAC__lpc_restore_signal(const FLAC__int32 * flac_restrict residual, unsigned data_len, const FLAC__int32 * flac_restrict qlp_coeff, unsigned order, int lp_quantization, FLAC__int32 * flac_restrict data)
    785 #if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
    786 {
    787 	FLAC__int64 sumo;
    788 	unsigned i, j;
    789 	FLAC__int32 sum;
    790 	const FLAC__int32 *r = residual, *history;
    791 
    792 #ifdef FLAC__OVERFLOW_DETECT_VERBOSE
    793 	fprintf(stderr,"FLAC__lpc_restore_signal: data_len=%d, order=%u, lpq=%d",data_len,order,lp_quantization);
    794 	for(i=0;i<order;i++)
    795 		fprintf(stderr,", q[%u]=%d",i,qlp_coeff[i]);
    796 	fprintf(stderr,"\n");
    797 #endif
    798 	FLAC__ASSERT(order > 0);
    799 
    800 	for(i = 0; i < data_len; i++) {
    801 		sumo = 0;
    802 		sum = 0;
    803 		history = data;
    804 		for(j = 0; j < order; j++) {
    805 			sum += qlp_coeff[j] * (*(--history));
    806 			sumo += (FLAC__int64)qlp_coeff[j] * (FLAC__int64)(*history);
    807 			if(sumo > 2147483647ll || sumo < -2147483648ll)
    808 				fprintf(stderr,"FLAC__lpc_restore_signal: OVERFLOW, i=%u, j=%u, c=%d, d=%d, sumo=%" PRId64 "\n",i,j,qlp_coeff[j],*history,sumo);
    809 		}
    810 		*(data++) = *(r++) + (sum >> lp_quantization);
    811 	}
    812 
    813 	/* Here's a slower but clearer version:
    814 	for(i = 0; i < data_len; i++) {
    815 		sum = 0;
    816 		for(j = 0; j < order; j++)
    817 			sum += qlp_coeff[j] * data[i-j-1];
    818 		data[i] = residual[i] + (sum >> lp_quantization);
    819 	}
    820 	*/
    821 }
    822 #else /* fully unrolled version for normal use */
    823 {
    824 	int i;
    825 	FLAC__int32 sum;
    826 
    827 	FLAC__ASSERT(order > 0);
    828 	FLAC__ASSERT(order <= 32);
    829 
    830 	/*
    831 	 * We do unique versions up to 12th order since that's the subset limit.
    832 	 * Also they are roughly ordered to match frequency of occurrence to
    833 	 * minimize branching.
    834 	 */
    835 	if(order <= 12) {
    836 		if(order > 8) {
    837 			if(order > 10) {
    838 				if(order == 12) {
    839 					for(i = 0; i < (int)data_len; i++) {
    840 						sum = 0;
    841 						sum += qlp_coeff[11] * data[i-12];
    842 						sum += qlp_coeff[10] * data[i-11];
    843 						sum += qlp_coeff[9] * data[i-10];
    844 						sum += qlp_coeff[8] * data[i-9];
    845 						sum += qlp_coeff[7] * data[i-8];
    846 						sum += qlp_coeff[6] * data[i-7];
    847 						sum += qlp_coeff[5] * data[i-6];
    848 						sum += qlp_coeff[4] * data[i-5];
    849 						sum += qlp_coeff[3] * data[i-4];
    850 						sum += qlp_coeff[2] * data[i-3];
    851 						sum += qlp_coeff[1] * data[i-2];
    852 						sum += qlp_coeff[0] * data[i-1];
    853 						data[i] = residual[i] + (sum >> lp_quantization);
    854 					}
    855 				}
    856 				else { /* order == 11 */
    857 					for(i = 0; i < (int)data_len; i++) {
    858 						sum = 0;
    859 						sum += qlp_coeff[10] * data[i-11];
    860 						sum += qlp_coeff[9] * data[i-10];
    861 						sum += qlp_coeff[8] * data[i-9];
    862 						sum += qlp_coeff[7] * data[i-8];
    863 						sum += qlp_coeff[6] * data[i-7];
    864 						sum += qlp_coeff[5] * data[i-6];
    865 						sum += qlp_coeff[4] * data[i-5];
    866 						sum += qlp_coeff[3] * data[i-4];
    867 						sum += qlp_coeff[2] * data[i-3];
    868 						sum += qlp_coeff[1] * data[i-2];
    869 						sum += qlp_coeff[0] * data[i-1];
    870 						data[i] = residual[i] + (sum >> lp_quantization);
    871 					}
    872 				}
    873 			}
    874 			else {
    875 				if(order == 10) {
    876 					for(i = 0; i < (int)data_len; i++) {
    877 						sum = 0;
    878 						sum += qlp_coeff[9] * data[i-10];
    879 						sum += qlp_coeff[8] * data[i-9];
    880 						sum += qlp_coeff[7] * data[i-8];
    881 						sum += qlp_coeff[6] * data[i-7];
    882 						sum += qlp_coeff[5] * data[i-6];
    883 						sum += qlp_coeff[4] * data[i-5];
    884 						sum += qlp_coeff[3] * data[i-4];
    885 						sum += qlp_coeff[2] * data[i-3];
    886 						sum += qlp_coeff[1] * data[i-2];
    887 						sum += qlp_coeff[0] * data[i-1];
    888 						data[i] = residual[i] + (sum >> lp_quantization);
    889 					}
    890 				}
    891 				else { /* order == 9 */
    892 					for(i = 0; i < (int)data_len; i++) {
    893 						sum = 0;
    894 						sum += qlp_coeff[8] * data[i-9];
    895 						sum += qlp_coeff[7] * data[i-8];
    896 						sum += qlp_coeff[6] * data[i-7];
    897 						sum += qlp_coeff[5] * data[i-6];
    898 						sum += qlp_coeff[4] * data[i-5];
    899 						sum += qlp_coeff[3] * data[i-4];
    900 						sum += qlp_coeff[2] * data[i-3];
    901 						sum += qlp_coeff[1] * data[i-2];
    902 						sum += qlp_coeff[0] * data[i-1];
    903 						data[i] = residual[i] + (sum >> lp_quantization);
    904 					}
    905 				}
    906 			}
    907 		}
    908 		else if(order > 4) {
    909 			if(order > 6) {
    910 				if(order == 8) {
    911 					for(i = 0; i < (int)data_len; i++) {
    912 						sum = 0;
    913 						sum += qlp_coeff[7] * data[i-8];
    914 						sum += qlp_coeff[6] * data[i-7];
    915 						sum += qlp_coeff[5] * data[i-6];
    916 						sum += qlp_coeff[4] * data[i-5];
    917 						sum += qlp_coeff[3] * data[i-4];
    918 						sum += qlp_coeff[2] * data[i-3];
    919 						sum += qlp_coeff[1] * data[i-2];
    920 						sum += qlp_coeff[0] * data[i-1];
    921 						data[i] = residual[i] + (sum >> lp_quantization);
    922 					}
    923 				}
    924 				else { /* order == 7 */
    925 					for(i = 0; i < (int)data_len; i++) {
    926 						sum = 0;
    927 						sum += qlp_coeff[6] * data[i-7];
    928 						sum += qlp_coeff[5] * data[i-6];
    929 						sum += qlp_coeff[4] * data[i-5];
    930 						sum += qlp_coeff[3] * data[i-4];
    931 						sum += qlp_coeff[2] * data[i-3];
    932 						sum += qlp_coeff[1] * data[i-2];
    933 						sum += qlp_coeff[0] * data[i-1];
    934 						data[i] = residual[i] + (sum >> lp_quantization);
    935 					}
    936 				}
    937 			}
    938 			else {
    939 				if(order == 6) {
    940 					for(i = 0; i < (int)data_len; i++) {
    941 						sum = 0;
    942 						sum += qlp_coeff[5] * data[i-6];
    943 						sum += qlp_coeff[4] * data[i-5];
    944 						sum += qlp_coeff[3] * data[i-4];
    945 						sum += qlp_coeff[2] * data[i-3];
    946 						sum += qlp_coeff[1] * data[i-2];
    947 						sum += qlp_coeff[0] * data[i-1];
    948 						data[i] = residual[i] + (sum >> lp_quantization);
    949 					}
    950 				}
    951 				else { /* order == 5 */
    952 					for(i = 0; i < (int)data_len; i++) {
    953 						sum = 0;
    954 						sum += qlp_coeff[4] * data[i-5];
    955 						sum += qlp_coeff[3] * data[i-4];
    956 						sum += qlp_coeff[2] * data[i-3];
    957 						sum += qlp_coeff[1] * data[i-2];
    958 						sum += qlp_coeff[0] * data[i-1];
    959 						data[i] = residual[i] + (sum >> lp_quantization);
    960 					}
    961 				}
    962 			}
    963 		}
    964 		else {
    965 			if(order > 2) {
    966 				if(order == 4) {
    967 					for(i = 0; i < (int)data_len; i++) {
    968 						sum = 0;
    969 						sum += qlp_coeff[3] * data[i-4];
    970 						sum += qlp_coeff[2] * data[i-3];
    971 						sum += qlp_coeff[1] * data[i-2];
    972 						sum += qlp_coeff[0] * data[i-1];
    973 						data[i] = residual[i] + (sum >> lp_quantization);
    974 					}
    975 				}
    976 				else { /* order == 3 */
    977 					for(i = 0; i < (int)data_len; i++) {
    978 						sum = 0;
    979 						sum += qlp_coeff[2] * data[i-3];
    980 						sum += qlp_coeff[1] * data[i-2];
    981 						sum += qlp_coeff[0] * data[i-1];
    982 						data[i] = residual[i] + (sum >> lp_quantization);
    983 					}
    984 				}
    985 			}
    986 			else {
    987 				if(order == 2) {
    988 					for(i = 0; i < (int)data_len; i++) {
    989 						sum = 0;
    990 						sum += qlp_coeff[1] * data[i-2];
    991 						sum += qlp_coeff[0] * data[i-1];
    992 						data[i] = residual[i] + (sum >> lp_quantization);
    993 					}
    994 				}
    995 				else { /* order == 1 */
    996 					for(i = 0; i < (int)data_len; i++)
    997 						data[i] = residual[i] + ((qlp_coeff[0] * data[i-1]) >> lp_quantization);
    998 				}
    999 			}
   1000 		}
   1001 	}
   1002 	else { /* order > 12 */
   1003 		for(i = 0; i < (int)data_len; i++) {
   1004 			sum = 0;
   1005 			switch(order) {
   1006 				case 32: sum += qlp_coeff[31] * data[i-32];
   1007 				case 31: sum += qlp_coeff[30] * data[i-31];
   1008 				case 30: sum += qlp_coeff[29] * data[i-30];
   1009 				case 29: sum += qlp_coeff[28] * data[i-29];
   1010 				case 28: sum += qlp_coeff[27] * data[i-28];
   1011 				case 27: sum += qlp_coeff[26] * data[i-27];
   1012 				case 26: sum += qlp_coeff[25] * data[i-26];
   1013 				case 25: sum += qlp_coeff[24] * data[i-25];
   1014 				case 24: sum += qlp_coeff[23] * data[i-24];
   1015 				case 23: sum += qlp_coeff[22] * data[i-23];
   1016 				case 22: sum += qlp_coeff[21] * data[i-22];
   1017 				case 21: sum += qlp_coeff[20] * data[i-21];
   1018 				case 20: sum += qlp_coeff[19] * data[i-20];
   1019 				case 19: sum += qlp_coeff[18] * data[i-19];
   1020 				case 18: sum += qlp_coeff[17] * data[i-18];
   1021 				case 17: sum += qlp_coeff[16] * data[i-17];
   1022 				case 16: sum += qlp_coeff[15] * data[i-16];
   1023 				case 15: sum += qlp_coeff[14] * data[i-15];
   1024 				case 14: sum += qlp_coeff[13] * data[i-14];
   1025 				case 13: sum += qlp_coeff[12] * data[i-13];
   1026 				         sum += qlp_coeff[11] * data[i-12];
   1027 				         sum += qlp_coeff[10] * data[i-11];
   1028 				         sum += qlp_coeff[ 9] * data[i-10];
   1029 				         sum += qlp_coeff[ 8] * data[i- 9];
   1030 				         sum += qlp_coeff[ 7] * data[i- 8];
   1031 				         sum += qlp_coeff[ 6] * data[i- 7];
   1032 				         sum += qlp_coeff[ 5] * data[i- 6];
   1033 				         sum += qlp_coeff[ 4] * data[i- 5];
   1034 				         sum += qlp_coeff[ 3] * data[i- 4];
   1035 				         sum += qlp_coeff[ 2] * data[i- 3];
   1036 				         sum += qlp_coeff[ 1] * data[i- 2];
   1037 				         sum += qlp_coeff[ 0] * data[i- 1];
   1038 			}
   1039 			data[i] = residual[i] + (sum >> lp_quantization);
   1040 		}
   1041 	}
   1042 }
   1043 #endif
   1044 
   1045 void FLAC__lpc_restore_signal_wide(const FLAC__int32 * flac_restrict residual, unsigned data_len, const FLAC__int32 * flac_restrict qlp_coeff, unsigned order, int lp_quantization, FLAC__int32 * flac_restrict data)
   1046 #if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
   1047 {
   1048 	unsigned i, j;
   1049 	FLAC__int64 sum;
   1050 	const FLAC__int32 *r = residual, *history;
   1051 
   1052 #ifdef FLAC__OVERFLOW_DETECT_VERBOSE
   1053 	fprintf(stderr,"FLAC__lpc_restore_signal_wide: data_len=%d, order=%u, lpq=%d",data_len,order,lp_quantization);
   1054 	for(i=0;i<order;i++)
   1055 		fprintf(stderr,", q[%u]=%d",i,qlp_coeff[i]);
   1056 	fprintf(stderr,"\n");
   1057 #endif
   1058 	FLAC__ASSERT(order > 0);
   1059 
   1060 	for(i = 0; i < data_len; i++) {
   1061 		sum = 0;
   1062 		history = data;
   1063 		for(j = 0; j < order; j++)
   1064 			sum += (FLAC__int64)qlp_coeff[j] * (FLAC__int64)(*(--history));
   1065 		if(FLAC__bitmath_silog2(sum >> lp_quantization) > 32) {
   1066 			fprintf(stderr,"FLAC__lpc_restore_signal_wide: OVERFLOW, i=%u, sum=%" PRId64 "\n", i, (sum >> lp_quantization));
   1067 			break;
   1068 		}
   1069 		if(FLAC__bitmath_silog2((FLAC__int64)(*r) + (sum >> lp_quantization)) > 32) {
   1070 			fprintf(stderr,"FLAC__lpc_restore_signal_wide: OVERFLOW, i=%u, residual=%d, sum=%" PRId64 ", data=%" PRId64 "\n", i, *r, (sum >> lp_quantization), ((FLAC__int64)(*r) + (sum >> lp_quantization)));
   1071 			break;
   1072 		}
   1073 		*(data++) = *(r++) + (FLAC__int32)(sum >> lp_quantization);
   1074 	}
   1075 }
   1076 #else /* fully unrolled version for normal use */
   1077 {
   1078 	int i;
   1079 	FLAC__int64 sum;
   1080 
   1081 	FLAC__ASSERT(order > 0);
   1082 	FLAC__ASSERT(order <= 32);
   1083 
   1084 	/*
   1085 	 * We do unique versions up to 12th order since that's the subset limit.
   1086 	 * Also they are roughly ordered to match frequency of occurrence to
   1087 	 * minimize branching.
   1088 	 */
   1089 	if(order <= 12) {
   1090 		if(order > 8) {
   1091 			if(order > 10) {
   1092 				if(order == 12) {
   1093 					for(i = 0; i < (int)data_len; i++) {
   1094 						sum = 0;
   1095 						sum += qlp_coeff[11] * (FLAC__int64)data[i-12];
   1096 						sum += qlp_coeff[10] * (FLAC__int64)data[i-11];
   1097 						sum += qlp_coeff[9] * (FLAC__int64)data[i-10];
   1098 						sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
   1099 						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
   1100 						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
   1101 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
   1102 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
   1103 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
   1104 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
   1105 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
   1106 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
   1107 						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
   1108 					}
   1109 				}
   1110 				else { /* order == 11 */
   1111 					for(i = 0; i < (int)data_len; i++) {
   1112 						sum = 0;
   1113 						sum += qlp_coeff[10] * (FLAC__int64)data[i-11];
   1114 						sum += qlp_coeff[9] * (FLAC__int64)data[i-10];
   1115 						sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
   1116 						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
   1117 						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
   1118 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
   1119 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
   1120 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
   1121 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
   1122 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
   1123 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
   1124 						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
   1125 					}
   1126 				}
   1127 			}
   1128 			else {
   1129 				if(order == 10) {
   1130 					for(i = 0; i < (int)data_len; i++) {
   1131 						sum = 0;
   1132 						sum += qlp_coeff[9] * (FLAC__int64)data[i-10];
   1133 						sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
   1134 						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
   1135 						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
   1136 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
   1137 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
   1138 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
   1139 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
   1140 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
   1141 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
   1142 						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
   1143 					}
   1144 				}
   1145 				else { /* order == 9 */
   1146 					for(i = 0; i < (int)data_len; i++) {
   1147 						sum = 0;
   1148 						sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
   1149 						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
   1150 						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
   1151 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
   1152 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
   1153 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
   1154 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
   1155 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
   1156 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
   1157 						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
   1158 					}
   1159 				}
   1160 			}
   1161 		}
   1162 		else if(order > 4) {
   1163 			if(order > 6) {
   1164 				if(order == 8) {
   1165 					for(i = 0; i < (int)data_len; i++) {
   1166 						sum = 0;
   1167 						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
   1168 						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
   1169 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
   1170 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
   1171 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
   1172 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
   1173 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
   1174 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
   1175 						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
   1176 					}
   1177 				}
   1178 				else { /* order == 7 */
   1179 					for(i = 0; i < (int)data_len; i++) {
   1180 						sum = 0;
   1181 						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
   1182 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
   1183 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
   1184 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
   1185 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
   1186 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
   1187 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
   1188 						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
   1189 					}
   1190 				}
   1191 			}
   1192 			else {
   1193 				if(order == 6) {
   1194 					for(i = 0; i < (int)data_len; i++) {
   1195 						sum = 0;
   1196 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
   1197 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
   1198 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
   1199 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
   1200 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
   1201 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
   1202 						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
   1203 					}
   1204 				}
   1205 				else { /* order == 5 */
   1206 					for(i = 0; i < (int)data_len; i++) {
   1207 						sum = 0;
   1208 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
   1209 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
   1210 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
   1211 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
   1212 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
   1213 						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
   1214 					}
   1215 				}
   1216 			}
   1217 		}
   1218 		else {
   1219 			if(order > 2) {
   1220 				if(order == 4) {
   1221 					for(i = 0; i < (int)data_len; i++) {
   1222 						sum = 0;
   1223 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
   1224 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
   1225 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
   1226 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
   1227 						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
   1228 					}
   1229 				}
   1230 				else { /* order == 3 */
   1231 					for(i = 0; i < (int)data_len; i++) {
   1232 						sum = 0;
   1233 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
   1234 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
   1235 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
   1236 						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
   1237 					}
   1238 				}
   1239 			}
   1240 			else {
   1241 				if(order == 2) {
   1242 					for(i = 0; i < (int)data_len; i++) {
   1243 						sum = 0;
   1244 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
   1245 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
   1246 						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
   1247 					}
   1248 				}
   1249 				else { /* order == 1 */
   1250 					for(i = 0; i < (int)data_len; i++)
   1251 						data[i] = residual[i] + (FLAC__int32)((qlp_coeff[0] * (FLAC__int64)data[i-1]) >> lp_quantization);
   1252 				}
   1253 			}
   1254 		}
   1255 	}
   1256 	else { /* order > 12 */
   1257 		for(i = 0; i < (int)data_len; i++) {
   1258 			sum = 0;
   1259 			switch(order) {
   1260 				case 32: sum += qlp_coeff[31] * (FLAC__int64)data[i-32];
   1261 				case 31: sum += qlp_coeff[30] * (FLAC__int64)data[i-31];
   1262 				case 30: sum += qlp_coeff[29] * (FLAC__int64)data[i-30];
   1263 				case 29: sum += qlp_coeff[28] * (FLAC__int64)data[i-29];
   1264 				case 28: sum += qlp_coeff[27] * (FLAC__int64)data[i-28];
   1265 				case 27: sum += qlp_coeff[26] * (FLAC__int64)data[i-27];
   1266 				case 26: sum += qlp_coeff[25] * (FLAC__int64)data[i-26];
   1267 				case 25: sum += qlp_coeff[24] * (FLAC__int64)data[i-25];
   1268 				case 24: sum += qlp_coeff[23] * (FLAC__int64)data[i-24];
   1269 				case 23: sum += qlp_coeff[22] * (FLAC__int64)data[i-23];
   1270 				case 22: sum += qlp_coeff[21] * (FLAC__int64)data[i-22];
   1271 				case 21: sum += qlp_coeff[20] * (FLAC__int64)data[i-21];
   1272 				case 20: sum += qlp_coeff[19] * (FLAC__int64)data[i-20];
   1273 				case 19: sum += qlp_coeff[18] * (FLAC__int64)data[i-19];
   1274 				case 18: sum += qlp_coeff[17] * (FLAC__int64)data[i-18];
   1275 				case 17: sum += qlp_coeff[16] * (FLAC__int64)data[i-17];
   1276 				case 16: sum += qlp_coeff[15] * (FLAC__int64)data[i-16];
   1277 				case 15: sum += qlp_coeff[14] * (FLAC__int64)data[i-15];
   1278 				case 14: sum += qlp_coeff[13] * (FLAC__int64)data[i-14];
   1279 				case 13: sum += qlp_coeff[12] * (FLAC__int64)data[i-13];
   1280 				         sum += qlp_coeff[11] * (FLAC__int64)data[i-12];
   1281 				         sum += qlp_coeff[10] * (FLAC__int64)data[i-11];
   1282 				         sum += qlp_coeff[ 9] * (FLAC__int64)data[i-10];
   1283 				         sum += qlp_coeff[ 8] * (FLAC__int64)data[i- 9];
   1284 				         sum += qlp_coeff[ 7] * (FLAC__int64)data[i- 8];
   1285 				         sum += qlp_coeff[ 6] * (FLAC__int64)data[i- 7];
   1286 				         sum += qlp_coeff[ 5] * (FLAC__int64)data[i- 6];
   1287 				         sum += qlp_coeff[ 4] * (FLAC__int64)data[i- 5];
   1288 				         sum += qlp_coeff[ 3] * (FLAC__int64)data[i- 4];
   1289 				         sum += qlp_coeff[ 2] * (FLAC__int64)data[i- 3];
   1290 				         sum += qlp_coeff[ 1] * (FLAC__int64)data[i- 2];
   1291 				         sum += qlp_coeff[ 0] * (FLAC__int64)data[i- 1];
   1292 			}
   1293 			data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
   1294 		}
   1295 	}
   1296 }
   1297 #endif
   1298 
   1299 #if defined(_MSC_VER)
   1300 #pragma warning ( default : 4028 )
   1301 #endif
   1302 
   1303 #ifndef FLAC__INTEGER_ONLY_LIBRARY
   1304 
   1305 double FLAC__lpc_compute_expected_bits_per_residual_sample(double lpc_error, unsigned total_samples)
   1306 {
   1307 	double error_scale;
   1308 
   1309 	FLAC__ASSERT(total_samples > 0);
   1310 
   1311 	error_scale = 0.5 / (double)total_samples;
   1312 
   1313 	return FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scale(lpc_error, error_scale);
   1314 }
   1315 
   1316 double FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scale(double lpc_error, double error_scale)
   1317 {
   1318 	if(lpc_error > 0.0) {
   1319 		double bps = (double)0.5 * log(error_scale * lpc_error) / M_LN2;
   1320 		if(bps >= 0.0)
   1321 			return bps;
   1322 		else
   1323 			return 0.0;
   1324 	}
   1325 	else if(lpc_error < 0.0) { /* error should not be negative but can happen due to inadequate floating-point resolution */
   1326 		return 1e32;
   1327 	}
   1328 	else {
   1329 		return 0.0;
   1330 	}
   1331 }
   1332 
   1333 unsigned FLAC__lpc_compute_best_order(const double lpc_error[], unsigned max_order, unsigned total_samples, unsigned overhead_bits_per_order)
   1334 {
   1335 	unsigned order, indx, best_index; /* 'index' the index into lpc_error; index==order-1 since lpc_error[0] is for order==1, lpc_error[1] is for order==2, etc */
   1336 	double bits, best_bits, error_scale;
   1337 
   1338 	FLAC__ASSERT(max_order > 0);
   1339 	FLAC__ASSERT(total_samples > 0);
   1340 
   1341 	error_scale = 0.5 / (double)total_samples;
   1342 
   1343 	best_index = 0;
   1344 	best_bits = (unsigned)(-1);
   1345 
   1346 	for(indx = 0, order = 1; indx < max_order; indx++, order++) {
   1347 		bits = FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scale(lpc_error[indx], error_scale) * (double)(total_samples - order) + (double)(order * overhead_bits_per_order);
   1348 		if(bits < best_bits) {
   1349 			best_index = indx;
   1350 			best_bits = bits;
   1351 		}
   1352 	}
   1353 
   1354 	return best_index+1; /* +1 since indx of lpc_error[] is order-1 */
   1355 }
   1356 
   1357 #endif /* !defined FLAC__INTEGER_ONLY_LIBRARY */
   1358