Home | History | Annotate | Download | only in libFLAC
      1 /* libFLAC - Free Lossless Audio Codec library
      2  * Copyright (C) 2000-2009  Josh Coalson
      3  * Copyright (C) 2011-2014  Xiph.Org Foundation
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  *
      9  * - Redistributions of source code must retain the above copyright
     10  * notice, this list of conditions and the following disclaimer.
     11  *
     12  * - Redistributions in binary form must reproduce the above copyright
     13  * notice, this list of conditions and the following disclaimer in the
     14  * documentation and/or other materials provided with the distribution.
     15  *
     16  * - Neither the name of the Xiph.org Foundation nor the names of its
     17  * contributors may be used to endorse or promote products derived from
     18  * this software without specific prior written permission.
     19  *
     20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     23  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
     24  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     25  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     26  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     27  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     28  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     29  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     30  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     31  */
     32 
     33 #ifdef HAVE_CONFIG_H
     34 #  include <config.h>
     35 #endif
     36 
     37 #include <math.h>
     38 
     39 #include "FLAC/assert.h"
     40 #include "FLAC/format.h"
     41 #include "share/compat.h"
     42 #include "private/bitmath.h"
     43 #include "private/lpc.h"
     44 #include "private/macros.h"
     45 #if defined DEBUG || defined FLAC__OVERFLOW_DETECT || defined FLAC__OVERFLOW_DETECT_VERBOSE
     46 #include <stdio.h>
     47 #endif
     48 
     49 /* OPT: #undef'ing this may improve the speed on some architectures */
     50 #define FLAC__LPC_UNROLLED_FILTER_LOOPS
     51 
     52 #ifndef FLAC__INTEGER_ONLY_LIBRARY
     53 
     54 #if !defined(HAVE_LROUND)
     55 #if defined(_MSC_VER)
     56 #include <float.h>
     57 #define copysign _copysign
     58 #elif defined(__GNUC__)
     59 #define copysign __builtin_copysign
     60 #endif
     61 static inline long int lround(double x) {
     62     return (long)(x + copysign (0.5, x));
     63 }
     64 /* If this fails, we are in the presence of a mid 90's compiler, move along... */
     65 #endif
     66 
     67 void FLAC__lpc_window_data(const FLAC__int32 in[], const FLAC__real window[], FLAC__real out[], unsigned data_len)
     68 {
     69 	unsigned i;
     70 	for(i = 0; i < data_len; i++)
     71 		out[i] = in[i] * window[i];
     72 }
     73 
     74 void FLAC__lpc_compute_autocorrelation(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
     75 {
     76 	/* a readable, but slower, version */
     77 #if 0
     78 	FLAC__real d;
     79 	unsigned i;
     80 
     81 	FLAC__ASSERT(lag > 0);
     82 	FLAC__ASSERT(lag <= data_len);
     83 
     84 	/*
     85 	 * Technically we should subtract the mean first like so:
     86 	 *   for(i = 0; i < data_len; i++)
     87 	 *     data[i] -= mean;
     88 	 * but it appears not to make enough of a difference to matter, and
     89 	 * most signals are already closely centered around zero
     90 	 */
     91 	while(lag--) {
     92 		for(i = lag, d = 0.0; i < data_len; i++)
     93 			d += data[i] * data[i - lag];
     94 		autoc[lag] = d;
     95 	}
     96 #endif
     97 
     98 	/*
     99 	 * this version tends to run faster because of better data locality
    100 	 * ('data_len' is usually much larger than 'lag')
    101 	 */
    102 	FLAC__real d;
    103 	unsigned sample, coeff;
    104 	const unsigned limit = data_len - lag;
    105 
    106 	FLAC__ASSERT(lag > 0);
    107 	FLAC__ASSERT(lag <= data_len);
    108 
    109 	for(coeff = 0; coeff < lag; coeff++)
    110 		autoc[coeff] = 0.0;
    111 	for(sample = 0; sample <= limit; sample++) {
    112 		d = data[sample];
    113 		for(coeff = 0; coeff < lag; coeff++)
    114 			autoc[coeff] += d * data[sample+coeff];
    115 	}
    116 	for(; sample < data_len; sample++) {
    117 		d = data[sample];
    118 		for(coeff = 0; coeff < data_len - sample; coeff++)
    119 			autoc[coeff] += d * data[sample+coeff];
    120 	}
    121 }
    122 
    123 void FLAC__lpc_compute_lp_coefficients(const FLAC__real autoc[], unsigned *max_order, FLAC__real lp_coeff[][FLAC__MAX_LPC_ORDER], FLAC__double error[])
    124 {
    125 	unsigned i, j;
    126 	FLAC__double r, err, lpc[FLAC__MAX_LPC_ORDER];
    127 
    128 	FLAC__ASSERT(0 != max_order);
    129 	FLAC__ASSERT(0 < *max_order);
    130 	FLAC__ASSERT(*max_order <= FLAC__MAX_LPC_ORDER);
    131 	FLAC__ASSERT(autoc[0] != 0.0);
    132 
    133 	err = autoc[0];
    134 
    135 	for(i = 0; i < *max_order; i++) {
    136 		/* Sum up this iteration's reflection coefficient. */
    137 		r = -autoc[i+1];
    138 		for(j = 0; j < i; j++)
    139 			r -= lpc[j] * autoc[i-j];
    140 		r /= err;
    141 
    142 		/* Update LPC coefficients and total error. */
    143 		lpc[i]=r;
    144 		for(j = 0; j < (i>>1); j++) {
    145 			FLAC__double tmp = lpc[j];
    146 			lpc[j] += r * lpc[i-1-j];
    147 			lpc[i-1-j] += r * tmp;
    148 		}
    149 		if(i & 1)
    150 			lpc[j] += lpc[j] * r;
    151 
    152 		err *= (1.0 - r * r);
    153 
    154 		/* save this order */
    155 		for(j = 0; j <= i; j++)
    156 			lp_coeff[i][j] = (FLAC__real)(-lpc[j]); /* negate FIR filter coeff to get predictor coeff */
    157 		error[i] = err;
    158 
    159 		/* see SF bug https://sourceforge.net/p/flac/bugs/234/ */
    160 		if(err == 0.0) {
    161 			*max_order = i+1;
    162 			return;
    163 		}
    164 	}
    165 }
    166 
    167 int FLAC__lpc_quantize_coefficients(const FLAC__real lp_coeff[], unsigned order, unsigned precision, FLAC__int32 qlp_coeff[], int *shift)
    168 {
    169 	unsigned i;
    170 	FLAC__double cmax;
    171 	FLAC__int32 qmax, qmin;
    172 
    173 	FLAC__ASSERT(precision > 0);
    174 	FLAC__ASSERT(precision >= FLAC__MIN_QLP_COEFF_PRECISION);
    175 
    176 	/* drop one bit for the sign; from here on out we consider only |lp_coeff[i]| */
    177 	precision--;
    178 	qmax = 1 << precision;
    179 	qmin = -qmax;
    180 	qmax--;
    181 
    182 	/* calc cmax = max( |lp_coeff[i]| ) */
    183 	cmax = 0.0;
    184 	for(i = 0; i < order; i++) {
    185 		const FLAC__double d = fabs(lp_coeff[i]);
    186 		if(d > cmax)
    187 			cmax = d;
    188 	}
    189 
    190 	if(cmax <= 0.0) {
    191 		/* => coefficients are all 0, which means our constant-detect didn't work */
    192 		return 2;
    193 	}
    194 	else {
    195 		const int max_shiftlimit = (1 << (FLAC__SUBFRAME_LPC_QLP_SHIFT_LEN-1)) - 1;
    196 		const int min_shiftlimit = -max_shiftlimit - 1;
    197 		int log2cmax;
    198 
    199 		(void)frexp(cmax, &log2cmax);
    200 		log2cmax--;
    201 		*shift = (int)precision - log2cmax - 1;
    202 
    203 		if(*shift > max_shiftlimit)
    204 			*shift = max_shiftlimit;
    205 		else if(*shift < min_shiftlimit)
    206 			return 1;
    207 	}
    208 
    209 	if(*shift >= 0) {
    210 		FLAC__double error = 0.0;
    211 		FLAC__int32 q;
    212 		for(i = 0; i < order; i++) {
    213 			error += lp_coeff[i] * (1 << *shift);
    214 			q = lround(error);
    215 
    216 #ifdef FLAC__OVERFLOW_DETECT
    217 			if(q > qmax+1) /* we expect q==qmax+1 occasionally due to rounding */
    218 				fprintf(stderr,"FLAC__lpc_quantize_coefficients: quantizer overflow: q>qmax %d>%d shift=%d cmax=%f precision=%u lpc[%u]=%f\n",q,qmax,*shift,cmax,precision+1,i,lp_coeff[i]);
    219 			else if(q < qmin)
    220 				fprintf(stderr,"FLAC__lpc_quantize_coefficients: quantizer overflow: q<qmin %d<%d shift=%d cmax=%f precision=%u lpc[%u]=%f\n",q,qmin,*shift,cmax,precision+1,i,lp_coeff[i]);
    221 #endif
    222 			if(q > qmax)
    223 				q = qmax;
    224 			else if(q < qmin)
    225 				q = qmin;
    226 			error -= q;
    227 			qlp_coeff[i] = q;
    228 		}
    229 	}
    230 	/* negative shift is very rare but due to design flaw, negative shift is
    231 	 * a NOP in the decoder, so it must be handled specially by scaling down
    232 	 * coeffs
    233 	 */
    234 	else {
    235 		const int nshift = -(*shift);
    236 		FLAC__double error = 0.0;
    237 		FLAC__int32 q;
    238 #ifdef DEBUG
    239 		fprintf(stderr,"FLAC__lpc_quantize_coefficients: negative shift=%d order=%u cmax=%f\n", *shift, order, cmax);
    240 #endif
    241 		for(i = 0; i < order; i++) {
    242 			error += lp_coeff[i] / (1 << nshift);
    243 			q = lround(error);
    244 #ifdef FLAC__OVERFLOW_DETECT
    245 			if(q > qmax+1) /* we expect q==qmax+1 occasionally due to rounding */
    246 				fprintf(stderr,"FLAC__lpc_quantize_coefficients: quantizer overflow: q>qmax %d>%d shift=%d cmax=%f precision=%u lpc[%u]=%f\n",q,qmax,*shift,cmax,precision+1,i,lp_coeff[i]);
    247 			else if(q < qmin)
    248 				fprintf(stderr,"FLAC__lpc_quantize_coefficients: quantizer overflow: q<qmin %d<%d shift=%d cmax=%f precision=%u lpc[%u]=%f\n",q,qmin,*shift,cmax,precision+1,i,lp_coeff[i]);
    249 #endif
    250 			if(q > qmax)
    251 				q = qmax;
    252 			else if(q < qmin)
    253 				q = qmin;
    254 			error -= q;
    255 			qlp_coeff[i] = q;
    256 		}
    257 		*shift = 0;
    258 	}
    259 
    260 	return 0;
    261 }
    262 
    263 #if defined(_MSC_VER)
    264 // silence MSVC warnings about __restrict modifier
    265 #pragma warning ( disable : 4028 )
    266 #endif
    267 
    268 void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 * flac_restrict data, unsigned data_len, const FLAC__int32 * flac_restrict qlp_coeff, unsigned order, int lp_quantization, FLAC__int32 * flac_restrict residual)
    269 #if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
    270 {
    271 	FLAC__int64 sumo;
    272 	unsigned i, j;
    273 	FLAC__int32 sum;
    274 	const FLAC__int32 *history;
    275 
    276 #ifdef FLAC__OVERFLOW_DETECT_VERBOSE
    277 	fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients: data_len=%d, order=%u, lpq=%d",data_len,order,lp_quantization);
    278 	for(i=0;i<order;i++)
    279 		fprintf(stderr,", q[%u]=%d",i,qlp_coeff[i]);
    280 	fprintf(stderr,"\n");
    281 #endif
    282 	FLAC__ASSERT(order > 0);
    283 
    284 	for(i = 0; i < data_len; i++) {
    285 		sumo = 0;
    286 		sum = 0;
    287 		history = data;
    288 		for(j = 0; j < order; j++) {
    289 			sum += qlp_coeff[j] * (*(--history));
    290 			sumo += (FLAC__int64)qlp_coeff[j] * (FLAC__int64)(*history);
    291 			if(sumo > 2147483647ll || sumo < -2147483648ll)
    292 				fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients: OVERFLOW, i=%u, j=%u, c=%d, d=%d, sumo=%" PRId64 "\n",i,j,qlp_coeff[j],*history,sumo);
    293 		}
    294 		*(residual++) = *(data++) - (sum >> lp_quantization);
    295 	}
    296 
    297 	/* Here's a slower but clearer version:
    298 	for(i = 0; i < data_len; i++) {
    299 		sum = 0;
    300 		for(j = 0; j < order; j++)
    301 			sum += qlp_coeff[j] * data[i-j-1];
    302 		residual[i] = data[i] - (sum >> lp_quantization);
    303 	}
    304 	*/
    305 }
    306 #else /* fully unrolled version for normal use */
    307 {
    308 	int i;
    309 	FLAC__int32 sum;
    310 
    311 	FLAC__ASSERT(order > 0);
    312 	FLAC__ASSERT(order <= 32);
    313 
    314 	/*
    315 	 * We do unique versions up to 12th order since that's the subset limit.
    316 	 * Also they are roughly ordered to match frequency of occurrence to
    317 	 * minimize branching.
    318 	 */
    319 	if(order <= 12) {
    320 		if(order > 8) {
    321 			if(order > 10) {
    322 				if(order == 12) {
    323 					for(i = 0; i < (int)data_len; i++) {
    324 						sum = 0;
    325 						sum += qlp_coeff[11] * data[i-12];
    326 						sum += qlp_coeff[10] * data[i-11];
    327 						sum += qlp_coeff[9] * data[i-10];
    328 						sum += qlp_coeff[8] * data[i-9];
    329 						sum += qlp_coeff[7] * data[i-8];
    330 						sum += qlp_coeff[6] * data[i-7];
    331 						sum += qlp_coeff[5] * data[i-6];
    332 						sum += qlp_coeff[4] * data[i-5];
    333 						sum += qlp_coeff[3] * data[i-4];
    334 						sum += qlp_coeff[2] * data[i-3];
    335 						sum += qlp_coeff[1] * data[i-2];
    336 						sum += qlp_coeff[0] * data[i-1];
    337 						residual[i] = data[i] - (sum >> lp_quantization);
    338 					}
    339 				}
    340 				else { /* order == 11 */
    341 					for(i = 0; i < (int)data_len; i++) {
    342 						sum = 0;
    343 						sum += qlp_coeff[10] * data[i-11];
    344 						sum += qlp_coeff[9] * data[i-10];
    345 						sum += qlp_coeff[8] * data[i-9];
    346 						sum += qlp_coeff[7] * data[i-8];
    347 						sum += qlp_coeff[6] * data[i-7];
    348 						sum += qlp_coeff[5] * data[i-6];
    349 						sum += qlp_coeff[4] * data[i-5];
    350 						sum += qlp_coeff[3] * data[i-4];
    351 						sum += qlp_coeff[2] * data[i-3];
    352 						sum += qlp_coeff[1] * data[i-2];
    353 						sum += qlp_coeff[0] * data[i-1];
    354 						residual[i] = data[i] - (sum >> lp_quantization);
    355 					}
    356 				}
    357 			}
    358 			else {
    359 				if(order == 10) {
    360 					for(i = 0; i < (int)data_len; i++) {
    361 						sum = 0;
    362 						sum += qlp_coeff[9] * data[i-10];
    363 						sum += qlp_coeff[8] * data[i-9];
    364 						sum += qlp_coeff[7] * data[i-8];
    365 						sum += qlp_coeff[6] * data[i-7];
    366 						sum += qlp_coeff[5] * data[i-6];
    367 						sum += qlp_coeff[4] * data[i-5];
    368 						sum += qlp_coeff[3] * data[i-4];
    369 						sum += qlp_coeff[2] * data[i-3];
    370 						sum += qlp_coeff[1] * data[i-2];
    371 						sum += qlp_coeff[0] * data[i-1];
    372 						residual[i] = data[i] - (sum >> lp_quantization);
    373 					}
    374 				}
    375 				else { /* order == 9 */
    376 					for(i = 0; i < (int)data_len; i++) {
    377 						sum = 0;
    378 						sum += qlp_coeff[8] * data[i-9];
    379 						sum += qlp_coeff[7] * data[i-8];
    380 						sum += qlp_coeff[6] * data[i-7];
    381 						sum += qlp_coeff[5] * data[i-6];
    382 						sum += qlp_coeff[4] * data[i-5];
    383 						sum += qlp_coeff[3] * data[i-4];
    384 						sum += qlp_coeff[2] * data[i-3];
    385 						sum += qlp_coeff[1] * data[i-2];
    386 						sum += qlp_coeff[0] * data[i-1];
    387 						residual[i] = data[i] - (sum >> lp_quantization);
    388 					}
    389 				}
    390 			}
    391 		}
    392 		else if(order > 4) {
    393 			if(order > 6) {
    394 				if(order == 8) {
    395 					for(i = 0; i < (int)data_len; i++) {
    396 						sum = 0;
    397 						sum += qlp_coeff[7] * data[i-8];
    398 						sum += qlp_coeff[6] * data[i-7];
    399 						sum += qlp_coeff[5] * data[i-6];
    400 						sum += qlp_coeff[4] * data[i-5];
    401 						sum += qlp_coeff[3] * data[i-4];
    402 						sum += qlp_coeff[2] * data[i-3];
    403 						sum += qlp_coeff[1] * data[i-2];
    404 						sum += qlp_coeff[0] * data[i-1];
    405 						residual[i] = data[i] - (sum >> lp_quantization);
    406 					}
    407 				}
    408 				else { /* order == 7 */
    409 					for(i = 0; i < (int)data_len; i++) {
    410 						sum = 0;
    411 						sum += qlp_coeff[6] * data[i-7];
    412 						sum += qlp_coeff[5] * data[i-6];
    413 						sum += qlp_coeff[4] * data[i-5];
    414 						sum += qlp_coeff[3] * data[i-4];
    415 						sum += qlp_coeff[2] * data[i-3];
    416 						sum += qlp_coeff[1] * data[i-2];
    417 						sum += qlp_coeff[0] * data[i-1];
    418 						residual[i] = data[i] - (sum >> lp_quantization);
    419 					}
    420 				}
    421 			}
    422 			else {
    423 				if(order == 6) {
    424 					for(i = 0; i < (int)data_len; i++) {
    425 						sum = 0;
    426 						sum += qlp_coeff[5] * data[i-6];
    427 						sum += qlp_coeff[4] * data[i-5];
    428 						sum += qlp_coeff[3] * data[i-4];
    429 						sum += qlp_coeff[2] * data[i-3];
    430 						sum += qlp_coeff[1] * data[i-2];
    431 						sum += qlp_coeff[0] * data[i-1];
    432 						residual[i] = data[i] - (sum >> lp_quantization);
    433 					}
    434 				}
    435 				else { /* order == 5 */
    436 					for(i = 0; i < (int)data_len; i++) {
    437 						sum = 0;
    438 						sum += qlp_coeff[4] * data[i-5];
    439 						sum += qlp_coeff[3] * data[i-4];
    440 						sum += qlp_coeff[2] * data[i-3];
    441 						sum += qlp_coeff[1] * data[i-2];
    442 						sum += qlp_coeff[0] * data[i-1];
    443 						residual[i] = data[i] - (sum >> lp_quantization);
    444 					}
    445 				}
    446 			}
    447 		}
    448 		else {
    449 			if(order > 2) {
    450 				if(order == 4) {
    451 					for(i = 0; i < (int)data_len; i++) {
    452 						sum = 0;
    453 						sum += qlp_coeff[3] * data[i-4];
    454 						sum += qlp_coeff[2] * data[i-3];
    455 						sum += qlp_coeff[1] * data[i-2];
    456 						sum += qlp_coeff[0] * data[i-1];
    457 						residual[i] = data[i] - (sum >> lp_quantization);
    458 					}
    459 				}
    460 				else { /* order == 3 */
    461 					for(i = 0; i < (int)data_len; i++) {
    462 						sum = 0;
    463 						sum += qlp_coeff[2] * data[i-3];
    464 						sum += qlp_coeff[1] * data[i-2];
    465 						sum += qlp_coeff[0] * data[i-1];
    466 						residual[i] = data[i] - (sum >> lp_quantization);
    467 					}
    468 				}
    469 			}
    470 			else {
    471 				if(order == 2) {
    472 					for(i = 0; i < (int)data_len; i++) {
    473 						sum = 0;
    474 						sum += qlp_coeff[1] * data[i-2];
    475 						sum += qlp_coeff[0] * data[i-1];
    476 						residual[i] = data[i] - (sum >> lp_quantization);
    477 					}
    478 				}
    479 				else { /* order == 1 */
    480 					for(i = 0; i < (int)data_len; i++)
    481 						residual[i] = data[i] - ((qlp_coeff[0] * data[i-1]) >> lp_quantization);
    482 				}
    483 			}
    484 		}
    485 	}
    486 	else { /* order > 12 */
    487 		for(i = 0; i < (int)data_len; i++) {
    488 			sum = 0;
    489 			switch(order) {
    490 				case 32: sum += qlp_coeff[31] * data[i-32];
    491 				case 31: sum += qlp_coeff[30] * data[i-31];
    492 				case 30: sum += qlp_coeff[29] * data[i-30];
    493 				case 29: sum += qlp_coeff[28] * data[i-29];
    494 				case 28: sum += qlp_coeff[27] * data[i-28];
    495 				case 27: sum += qlp_coeff[26] * data[i-27];
    496 				case 26: sum += qlp_coeff[25] * data[i-26];
    497 				case 25: sum += qlp_coeff[24] * data[i-25];
    498 				case 24: sum += qlp_coeff[23] * data[i-24];
    499 				case 23: sum += qlp_coeff[22] * data[i-23];
    500 				case 22: sum += qlp_coeff[21] * data[i-22];
    501 				case 21: sum += qlp_coeff[20] * data[i-21];
    502 				case 20: sum += qlp_coeff[19] * data[i-20];
    503 				case 19: sum += qlp_coeff[18] * data[i-19];
    504 				case 18: sum += qlp_coeff[17] * data[i-18];
    505 				case 17: sum += qlp_coeff[16] * data[i-17];
    506 				case 16: sum += qlp_coeff[15] * data[i-16];
    507 				case 15: sum += qlp_coeff[14] * data[i-15];
    508 				case 14: sum += qlp_coeff[13] * data[i-14];
    509 				case 13: sum += qlp_coeff[12] * data[i-13];
    510 				         sum += qlp_coeff[11] * data[i-12];
    511 				         sum += qlp_coeff[10] * data[i-11];
    512 				         sum += qlp_coeff[ 9] * data[i-10];
    513 				         sum += qlp_coeff[ 8] * data[i- 9];
    514 				         sum += qlp_coeff[ 7] * data[i- 8];
    515 				         sum += qlp_coeff[ 6] * data[i- 7];
    516 				         sum += qlp_coeff[ 5] * data[i- 6];
    517 				         sum += qlp_coeff[ 4] * data[i- 5];
    518 				         sum += qlp_coeff[ 3] * data[i- 4];
    519 				         sum += qlp_coeff[ 2] * data[i- 3];
    520 				         sum += qlp_coeff[ 1] * data[i- 2];
    521 				         sum += qlp_coeff[ 0] * data[i- 1];
    522 			}
    523 			residual[i] = data[i] - (sum >> lp_quantization);
    524 		}
    525 	}
    526 }
    527 #endif
    528 
    529 void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(const FLAC__int32 * flac_restrict data, unsigned data_len, const FLAC__int32 * flac_restrict qlp_coeff, unsigned order, int lp_quantization, FLAC__int32 * flac_restrict residual)
    530 #if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
    531 {
    532 	unsigned i, j;
    533 	FLAC__int64 sum;
    534 	const FLAC__int32 *history;
    535 
    536 #ifdef FLAC__OVERFLOW_DETECT_VERBOSE
    537 	fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients_wide: data_len=%d, order=%u, lpq=%d",data_len,order,lp_quantization);
    538 	for(i=0;i<order;i++)
    539 		fprintf(stderr,", q[%u]=%d",i,qlp_coeff[i]);
    540 	fprintf(stderr,"\n");
    541 #endif
    542 	FLAC__ASSERT(order > 0);
    543 
    544 	for(i = 0; i < data_len; i++) {
    545 		sum = 0;
    546 		history = data;
    547 		for(j = 0; j < order; j++)
    548 			sum += (FLAC__int64)qlp_coeff[j] * (FLAC__int64)(*(--history));
    549 		if(FLAC__bitmath_silog2_wide(sum >> lp_quantization) > 32) {
    550 			fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients_wide: OVERFLOW, i=%u, sum=%" PRId64 "\n", i, (sum >> lp_quantization));
    551 			break;
    552 		}
    553 		if(FLAC__bitmath_silog2_wide((FLAC__int64)(*data) - (sum >> lp_quantization)) > 32) {
    554 			fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients_wide: OVERFLOW, i=%u, data=%d, sum=%" PRId64 ", residual=%" PRId64 "\n", i, *data, (int64_t)(sum >> lp_quantization), ((FLAC__int64)(*data) - (sum >> lp_quantization)));
    555 			break;
    556 		}
    557 		*(residual++) = *(data++) - (FLAC__int32)(sum >> lp_quantization);
    558 	}
    559 }
    560 #else /* fully unrolled version for normal use */
    561 {
    562 	int i;
    563 	FLAC__int64 sum;
    564 
    565 	FLAC__ASSERT(order > 0);
    566 	FLAC__ASSERT(order <= 32);
    567 
    568 	/*
    569 	 * We do unique versions up to 12th order since that's the subset limit.
    570 	 * Also they are roughly ordered to match frequency of occurrence to
    571 	 * minimize branching.
    572 	 */
    573 	if(order <= 12) {
    574 		if(order > 8) {
    575 			if(order > 10) {
    576 				if(order == 12) {
    577 					for(i = 0; i < (int)data_len; i++) {
    578 						sum = 0;
    579 						sum += qlp_coeff[11] * (FLAC__int64)data[i-12];
    580 						sum += qlp_coeff[10] * (FLAC__int64)data[i-11];
    581 						sum += qlp_coeff[9] * (FLAC__int64)data[i-10];
    582 						sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
    583 						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
    584 						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
    585 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
    586 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
    587 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
    588 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
    589 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
    590 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
    591 						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
    592 					}
    593 				}
    594 				else { /* order == 11 */
    595 					for(i = 0; i < (int)data_len; i++) {
    596 						sum = 0;
    597 						sum += qlp_coeff[10] * (FLAC__int64)data[i-11];
    598 						sum += qlp_coeff[9] * (FLAC__int64)data[i-10];
    599 						sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
    600 						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
    601 						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
    602 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
    603 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
    604 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
    605 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
    606 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
    607 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
    608 						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
    609 					}
    610 				}
    611 			}
    612 			else {
    613 				if(order == 10) {
    614 					for(i = 0; i < (int)data_len; i++) {
    615 						sum = 0;
    616 						sum += qlp_coeff[9] * (FLAC__int64)data[i-10];
    617 						sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
    618 						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
    619 						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
    620 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
    621 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
    622 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
    623 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
    624 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
    625 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
    626 						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
    627 					}
    628 				}
    629 				else { /* order == 9 */
    630 					for(i = 0; i < (int)data_len; i++) {
    631 						sum = 0;
    632 						sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
    633 						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
    634 						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
    635 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
    636 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
    637 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
    638 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
    639 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
    640 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
    641 						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
    642 					}
    643 				}
    644 			}
    645 		}
    646 		else if(order > 4) {
    647 			if(order > 6) {
    648 				if(order == 8) {
    649 					for(i = 0; i < (int)data_len; i++) {
    650 						sum = 0;
    651 						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
    652 						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
    653 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
    654 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
    655 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
    656 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
    657 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
    658 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
    659 						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
    660 					}
    661 				}
    662 				else { /* order == 7 */
    663 					for(i = 0; i < (int)data_len; i++) {
    664 						sum = 0;
    665 						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
    666 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
    667 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
    668 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
    669 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
    670 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
    671 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
    672 						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
    673 					}
    674 				}
    675 			}
    676 			else {
    677 				if(order == 6) {
    678 					for(i = 0; i < (int)data_len; i++) {
    679 						sum = 0;
    680 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
    681 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
    682 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
    683 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
    684 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
    685 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
    686 						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
    687 					}
    688 				}
    689 				else { /* order == 5 */
    690 					for(i = 0; i < (int)data_len; i++) {
    691 						sum = 0;
    692 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
    693 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
    694 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
    695 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
    696 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
    697 						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
    698 					}
    699 				}
    700 			}
    701 		}
    702 		else {
    703 			if(order > 2) {
    704 				if(order == 4) {
    705 					for(i = 0; i < (int)data_len; i++) {
    706 						sum = 0;
    707 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
    708 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
    709 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
    710 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
    711 						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
    712 					}
    713 				}
    714 				else { /* order == 3 */
    715 					for(i = 0; i < (int)data_len; i++) {
    716 						sum = 0;
    717 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
    718 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
    719 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
    720 						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
    721 					}
    722 				}
    723 			}
    724 			else {
    725 				if(order == 2) {
    726 					for(i = 0; i < (int)data_len; i++) {
    727 						sum = 0;
    728 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
    729 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
    730 						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
    731 					}
    732 				}
    733 				else { /* order == 1 */
    734 					for(i = 0; i < (int)data_len; i++)
    735 						residual[i] = data[i] - (FLAC__int32)((qlp_coeff[0] * (FLAC__int64)data[i-1]) >> lp_quantization);
    736 				}
    737 			}
    738 		}
    739 	}
    740 	else { /* order > 12 */
    741 		for(i = 0; i < (int)data_len; i++) {
    742 			sum = 0;
    743 			switch(order) {
    744 				case 32: sum += qlp_coeff[31] * (FLAC__int64)data[i-32];
    745 				case 31: sum += qlp_coeff[30] * (FLAC__int64)data[i-31];
    746 				case 30: sum += qlp_coeff[29] * (FLAC__int64)data[i-30];
    747 				case 29: sum += qlp_coeff[28] * (FLAC__int64)data[i-29];
    748 				case 28: sum += qlp_coeff[27] * (FLAC__int64)data[i-28];
    749 				case 27: sum += qlp_coeff[26] * (FLAC__int64)data[i-27];
    750 				case 26: sum += qlp_coeff[25] * (FLAC__int64)data[i-26];
    751 				case 25: sum += qlp_coeff[24] * (FLAC__int64)data[i-25];
    752 				case 24: sum += qlp_coeff[23] * (FLAC__int64)data[i-24];
    753 				case 23: sum += qlp_coeff[22] * (FLAC__int64)data[i-23];
    754 				case 22: sum += qlp_coeff[21] * (FLAC__int64)data[i-22];
    755 				case 21: sum += qlp_coeff[20] * (FLAC__int64)data[i-21];
    756 				case 20: sum += qlp_coeff[19] * (FLAC__int64)data[i-20];
    757 				case 19: sum += qlp_coeff[18] * (FLAC__int64)data[i-19];
    758 				case 18: sum += qlp_coeff[17] * (FLAC__int64)data[i-18];
    759 				case 17: sum += qlp_coeff[16] * (FLAC__int64)data[i-17];
    760 				case 16: sum += qlp_coeff[15] * (FLAC__int64)data[i-16];
    761 				case 15: sum += qlp_coeff[14] * (FLAC__int64)data[i-15];
    762 				case 14: sum += qlp_coeff[13] * (FLAC__int64)data[i-14];
    763 				case 13: sum += qlp_coeff[12] * (FLAC__int64)data[i-13];
    764 				         sum += qlp_coeff[11] * (FLAC__int64)data[i-12];
    765 				         sum += qlp_coeff[10] * (FLAC__int64)data[i-11];
    766 				         sum += qlp_coeff[ 9] * (FLAC__int64)data[i-10];
    767 				         sum += qlp_coeff[ 8] * (FLAC__int64)data[i- 9];
    768 				         sum += qlp_coeff[ 7] * (FLAC__int64)data[i- 8];
    769 				         sum += qlp_coeff[ 6] * (FLAC__int64)data[i- 7];
    770 				         sum += qlp_coeff[ 5] * (FLAC__int64)data[i- 6];
    771 				         sum += qlp_coeff[ 4] * (FLAC__int64)data[i- 5];
    772 				         sum += qlp_coeff[ 3] * (FLAC__int64)data[i- 4];
    773 				         sum += qlp_coeff[ 2] * (FLAC__int64)data[i- 3];
    774 				         sum += qlp_coeff[ 1] * (FLAC__int64)data[i- 2];
    775 				         sum += qlp_coeff[ 0] * (FLAC__int64)data[i- 1];
    776 			}
    777 			residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
    778 		}
    779 	}
    780 }
    781 #endif
    782 
    783 #endif /* !defined FLAC__INTEGER_ONLY_LIBRARY */
    784 
    785 void FLAC__lpc_restore_signal(const FLAC__int32 * flac_restrict residual, unsigned data_len, const FLAC__int32 * flac_restrict qlp_coeff, unsigned order, int lp_quantization, FLAC__int32 * flac_restrict data)
    786 #if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
    787 {
    788 	FLAC__int64 sumo;
    789 	unsigned i, j;
    790 	FLAC__int32 sum;
    791 	const FLAC__int32 *r = residual, *history;
    792 
    793 #ifdef FLAC__OVERFLOW_DETECT_VERBOSE
    794 	fprintf(stderr,"FLAC__lpc_restore_signal: data_len=%d, order=%u, lpq=%d",data_len,order,lp_quantization);
    795 	for(i=0;i<order;i++)
    796 		fprintf(stderr,", q[%u]=%d",i,qlp_coeff[i]);
    797 	fprintf(stderr,"\n");
    798 #endif
    799 	FLAC__ASSERT(order > 0);
    800 
    801 	for(i = 0; i < data_len; i++) {
    802 		sumo = 0;
    803 		sum = 0;
    804 		history = data;
    805 		for(j = 0; j < order; j++) {
    806 			sum += qlp_coeff[j] * (*(--history));
    807 			sumo += (FLAC__int64)qlp_coeff[j] * (FLAC__int64)(*history);
    808 			if(sumo > 2147483647ll || sumo < -2147483648ll)
    809 				fprintf(stderr,"FLAC__lpc_restore_signal: OVERFLOW, i=%u, j=%u, c=%d, d=%d, sumo=%" PRId64 "\n",i,j,qlp_coeff[j],*history,sumo);
    810 		}
    811 		*(data++) = *(r++) + (sum >> lp_quantization);
    812 	}
    813 
    814 	/* Here's a slower but clearer version:
    815 	for(i = 0; i < data_len; i++) {
    816 		sum = 0;
    817 		for(j = 0; j < order; j++)
    818 			sum += qlp_coeff[j] * data[i-j-1];
    819 		data[i] = residual[i] + (sum >> lp_quantization);
    820 	}
    821 	*/
    822 }
    823 #else /* fully unrolled version for normal use */
    824 {
    825 	int i;
    826 	FLAC__int32 sum;
    827 
    828 	FLAC__ASSERT(order > 0);
    829 	FLAC__ASSERT(order <= 32);
    830 
    831 	/*
    832 	 * We do unique versions up to 12th order since that's the subset limit.
    833 	 * Also they are roughly ordered to match frequency of occurrence to
    834 	 * minimize branching.
    835 	 */
    836 	if(order <= 12) {
    837 		if(order > 8) {
    838 			if(order > 10) {
    839 				if(order == 12) {
    840 					for(i = 0; i < (int)data_len; i++) {
    841 						sum = 0;
    842 						sum += qlp_coeff[11] * data[i-12];
    843 						sum += qlp_coeff[10] * data[i-11];
    844 						sum += qlp_coeff[9] * data[i-10];
    845 						sum += qlp_coeff[8] * data[i-9];
    846 						sum += qlp_coeff[7] * data[i-8];
    847 						sum += qlp_coeff[6] * data[i-7];
    848 						sum += qlp_coeff[5] * data[i-6];
    849 						sum += qlp_coeff[4] * data[i-5];
    850 						sum += qlp_coeff[3] * data[i-4];
    851 						sum += qlp_coeff[2] * data[i-3];
    852 						sum += qlp_coeff[1] * data[i-2];
    853 						sum += qlp_coeff[0] * data[i-1];
    854 						data[i] = residual[i] + (sum >> lp_quantization);
    855 					}
    856 				}
    857 				else { /* order == 11 */
    858 					for(i = 0; i < (int)data_len; i++) {
    859 						sum = 0;
    860 						sum += qlp_coeff[10] * data[i-11];
    861 						sum += qlp_coeff[9] * data[i-10];
    862 						sum += qlp_coeff[8] * data[i-9];
    863 						sum += qlp_coeff[7] * data[i-8];
    864 						sum += qlp_coeff[6] * data[i-7];
    865 						sum += qlp_coeff[5] * data[i-6];
    866 						sum += qlp_coeff[4] * data[i-5];
    867 						sum += qlp_coeff[3] * data[i-4];
    868 						sum += qlp_coeff[2] * data[i-3];
    869 						sum += qlp_coeff[1] * data[i-2];
    870 						sum += qlp_coeff[0] * data[i-1];
    871 						data[i] = residual[i] + (sum >> lp_quantization);
    872 					}
    873 				}
    874 			}
    875 			else {
    876 				if(order == 10) {
    877 					for(i = 0; i < (int)data_len; i++) {
    878 						sum = 0;
    879 						sum += qlp_coeff[9] * data[i-10];
    880 						sum += qlp_coeff[8] * data[i-9];
    881 						sum += qlp_coeff[7] * data[i-8];
    882 						sum += qlp_coeff[6] * data[i-7];
    883 						sum += qlp_coeff[5] * data[i-6];
    884 						sum += qlp_coeff[4] * data[i-5];
    885 						sum += qlp_coeff[3] * data[i-4];
    886 						sum += qlp_coeff[2] * data[i-3];
    887 						sum += qlp_coeff[1] * data[i-2];
    888 						sum += qlp_coeff[0] * data[i-1];
    889 						data[i] = residual[i] + (sum >> lp_quantization);
    890 					}
    891 				}
    892 				else { /* order == 9 */
    893 					for(i = 0; i < (int)data_len; i++) {
    894 						sum = 0;
    895 						sum += qlp_coeff[8] * data[i-9];
    896 						sum += qlp_coeff[7] * data[i-8];
    897 						sum += qlp_coeff[6] * data[i-7];
    898 						sum += qlp_coeff[5] * data[i-6];
    899 						sum += qlp_coeff[4] * data[i-5];
    900 						sum += qlp_coeff[3] * data[i-4];
    901 						sum += qlp_coeff[2] * data[i-3];
    902 						sum += qlp_coeff[1] * data[i-2];
    903 						sum += qlp_coeff[0] * data[i-1];
    904 						data[i] = residual[i] + (sum >> lp_quantization);
    905 					}
    906 				}
    907 			}
    908 		}
    909 		else if(order > 4) {
    910 			if(order > 6) {
    911 				if(order == 8) {
    912 					for(i = 0; i < (int)data_len; i++) {
    913 						sum = 0;
    914 						sum += qlp_coeff[7] * data[i-8];
    915 						sum += qlp_coeff[6] * data[i-7];
    916 						sum += qlp_coeff[5] * data[i-6];
    917 						sum += qlp_coeff[4] * data[i-5];
    918 						sum += qlp_coeff[3] * data[i-4];
    919 						sum += qlp_coeff[2] * data[i-3];
    920 						sum += qlp_coeff[1] * data[i-2];
    921 						sum += qlp_coeff[0] * data[i-1];
    922 						data[i] = residual[i] + (sum >> lp_quantization);
    923 					}
    924 				}
    925 				else { /* order == 7 */
    926 					for(i = 0; i < (int)data_len; i++) {
    927 						sum = 0;
    928 						sum += qlp_coeff[6] * data[i-7];
    929 						sum += qlp_coeff[5] * data[i-6];
    930 						sum += qlp_coeff[4] * data[i-5];
    931 						sum += qlp_coeff[3] * data[i-4];
    932 						sum += qlp_coeff[2] * data[i-3];
    933 						sum += qlp_coeff[1] * data[i-2];
    934 						sum += qlp_coeff[0] * data[i-1];
    935 						data[i] = residual[i] + (sum >> lp_quantization);
    936 					}
    937 				}
    938 			}
    939 			else {
    940 				if(order == 6) {
    941 					for(i = 0; i < (int)data_len; i++) {
    942 						sum = 0;
    943 						sum += qlp_coeff[5] * data[i-6];
    944 						sum += qlp_coeff[4] * data[i-5];
    945 						sum += qlp_coeff[3] * data[i-4];
    946 						sum += qlp_coeff[2] * data[i-3];
    947 						sum += qlp_coeff[1] * data[i-2];
    948 						sum += qlp_coeff[0] * data[i-1];
    949 						data[i] = residual[i] + (sum >> lp_quantization);
    950 					}
    951 				}
    952 				else { /* order == 5 */
    953 					for(i = 0; i < (int)data_len; i++) {
    954 						sum = 0;
    955 						sum += qlp_coeff[4] * data[i-5];
    956 						sum += qlp_coeff[3] * data[i-4];
    957 						sum += qlp_coeff[2] * data[i-3];
    958 						sum += qlp_coeff[1] * data[i-2];
    959 						sum += qlp_coeff[0] * data[i-1];
    960 						data[i] = residual[i] + (sum >> lp_quantization);
    961 					}
    962 				}
    963 			}
    964 		}
    965 		else {
    966 			if(order > 2) {
    967 				if(order == 4) {
    968 					for(i = 0; i < (int)data_len; i++) {
    969 						sum = 0;
    970 						sum += qlp_coeff[3] * data[i-4];
    971 						sum += qlp_coeff[2] * data[i-3];
    972 						sum += qlp_coeff[1] * data[i-2];
    973 						sum += qlp_coeff[0] * data[i-1];
    974 						data[i] = residual[i] + (sum >> lp_quantization);
    975 					}
    976 				}
    977 				else { /* order == 3 */
    978 					for(i = 0; i < (int)data_len; i++) {
    979 						sum = 0;
    980 						sum += qlp_coeff[2] * data[i-3];
    981 						sum += qlp_coeff[1] * data[i-2];
    982 						sum += qlp_coeff[0] * data[i-1];
    983 						data[i] = residual[i] + (sum >> lp_quantization);
    984 					}
    985 				}
    986 			}
    987 			else {
    988 				if(order == 2) {
    989 					for(i = 0; i < (int)data_len; i++) {
    990 						sum = 0;
    991 						sum += qlp_coeff[1] * data[i-2];
    992 						sum += qlp_coeff[0] * data[i-1];
    993 						data[i] = residual[i] + (sum >> lp_quantization);
    994 					}
    995 				}
    996 				else { /* order == 1 */
    997 					for(i = 0; i < (int)data_len; i++)
    998 						data[i] = residual[i] + ((qlp_coeff[0] * data[i-1]) >> lp_quantization);
    999 				}
   1000 			}
   1001 		}
   1002 	}
   1003 	else { /* order > 12 */
   1004 		for(i = 0; i < (int)data_len; i++) {
   1005 			sum = 0;
   1006 			switch(order) {
   1007 				case 32: sum += qlp_coeff[31] * data[i-32];
   1008 				case 31: sum += qlp_coeff[30] * data[i-31];
   1009 				case 30: sum += qlp_coeff[29] * data[i-30];
   1010 				case 29: sum += qlp_coeff[28] * data[i-29];
   1011 				case 28: sum += qlp_coeff[27] * data[i-28];
   1012 				case 27: sum += qlp_coeff[26] * data[i-27];
   1013 				case 26: sum += qlp_coeff[25] * data[i-26];
   1014 				case 25: sum += qlp_coeff[24] * data[i-25];
   1015 				case 24: sum += qlp_coeff[23] * data[i-24];
   1016 				case 23: sum += qlp_coeff[22] * data[i-23];
   1017 				case 22: sum += qlp_coeff[21] * data[i-22];
   1018 				case 21: sum += qlp_coeff[20] * data[i-21];
   1019 				case 20: sum += qlp_coeff[19] * data[i-20];
   1020 				case 19: sum += qlp_coeff[18] * data[i-19];
   1021 				case 18: sum += qlp_coeff[17] * data[i-18];
   1022 				case 17: sum += qlp_coeff[16] * data[i-17];
   1023 				case 16: sum += qlp_coeff[15] * data[i-16];
   1024 				case 15: sum += qlp_coeff[14] * data[i-15];
   1025 				case 14: sum += qlp_coeff[13] * data[i-14];
   1026 				case 13: sum += qlp_coeff[12] * data[i-13];
   1027 				         sum += qlp_coeff[11] * data[i-12];
   1028 				         sum += qlp_coeff[10] * data[i-11];
   1029 				         sum += qlp_coeff[ 9] * data[i-10];
   1030 				         sum += qlp_coeff[ 8] * data[i- 9];
   1031 				         sum += qlp_coeff[ 7] * data[i- 8];
   1032 				         sum += qlp_coeff[ 6] * data[i- 7];
   1033 				         sum += qlp_coeff[ 5] * data[i- 6];
   1034 				         sum += qlp_coeff[ 4] * data[i- 5];
   1035 				         sum += qlp_coeff[ 3] * data[i- 4];
   1036 				         sum += qlp_coeff[ 2] * data[i- 3];
   1037 				         sum += qlp_coeff[ 1] * data[i- 2];
   1038 				         sum += qlp_coeff[ 0] * data[i- 1];
   1039 			}
   1040 			data[i] = residual[i] + (sum >> lp_quantization);
   1041 		}
   1042 	}
   1043 }
   1044 #endif
   1045 
   1046 void FLAC__lpc_restore_signal_wide(const FLAC__int32 * flac_restrict residual, unsigned data_len, const FLAC__int32 * flac_restrict qlp_coeff, unsigned order, int lp_quantization, FLAC__int32 * flac_restrict data)
   1047 #if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
   1048 {
   1049 	unsigned i, j;
   1050 	FLAC__int64 sum;
   1051 	const FLAC__int32 *r = residual, *history;
   1052 
   1053 #ifdef FLAC__OVERFLOW_DETECT_VERBOSE
   1054 	fprintf(stderr,"FLAC__lpc_restore_signal_wide: data_len=%d, order=%u, lpq=%d",data_len,order,lp_quantization);
   1055 	for(i=0;i<order;i++)
   1056 		fprintf(stderr,", q[%u]=%d",i,qlp_coeff[i]);
   1057 	fprintf(stderr,"\n");
   1058 #endif
   1059 	FLAC__ASSERT(order > 0);
   1060 
   1061 	for(i = 0; i < data_len; i++) {
   1062 		sum = 0;
   1063 		history = data;
   1064 		for(j = 0; j < order; j++)
   1065 			sum += (FLAC__int64)qlp_coeff[j] * (FLAC__int64)(*(--history));
   1066 		if(FLAC__bitmath_silog2_wide(sum >> lp_quantization) > 32) {
   1067 			fprintf(stderr,"FLAC__lpc_restore_signal_wide: OVERFLOW, i=%u, sum=%" PRId64 "\n", i, (sum >> lp_quantization));
   1068 			break;
   1069 		}
   1070 		if(FLAC__bitmath_silog2_wide((FLAC__int64)(*r) + (sum >> lp_quantization)) > 32) {
   1071 			fprintf(stderr,"FLAC__lpc_restore_signal_wide: OVERFLOW, i=%u, residual=%d, sum=%" PRId64 ", data=%" PRId64 "\n", i, *r, (sum >> lp_quantization), ((FLAC__int64)(*r) + (sum >> lp_quantization)));
   1072 			break;
   1073 		}
   1074 		*(data++) = *(r++) + (FLAC__int32)(sum >> lp_quantization);
   1075 	}
   1076 }
   1077 #else /* fully unrolled version for normal use */
   1078 {
   1079 	int i;
   1080 	FLAC__int64 sum;
   1081 
   1082 	FLAC__ASSERT(order > 0);
   1083 	FLAC__ASSERT(order <= 32);
   1084 
   1085 	/*
   1086 	 * We do unique versions up to 12th order since that's the subset limit.
   1087 	 * Also they are roughly ordered to match frequency of occurrence to
   1088 	 * minimize branching.
   1089 	 */
   1090 	if(order <= 12) {
   1091 		if(order > 8) {
   1092 			if(order > 10) {
   1093 				if(order == 12) {
   1094 					for(i = 0; i < (int)data_len; i++) {
   1095 						sum = 0;
   1096 						sum += qlp_coeff[11] * (FLAC__int64)data[i-12];
   1097 						sum += qlp_coeff[10] * (FLAC__int64)data[i-11];
   1098 						sum += qlp_coeff[9] * (FLAC__int64)data[i-10];
   1099 						sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
   1100 						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
   1101 						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
   1102 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
   1103 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
   1104 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
   1105 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
   1106 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
   1107 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
   1108 						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
   1109 					}
   1110 				}
   1111 				else { /* order == 11 */
   1112 					for(i = 0; i < (int)data_len; i++) {
   1113 						sum = 0;
   1114 						sum += qlp_coeff[10] * (FLAC__int64)data[i-11];
   1115 						sum += qlp_coeff[9] * (FLAC__int64)data[i-10];
   1116 						sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
   1117 						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
   1118 						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
   1119 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
   1120 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
   1121 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
   1122 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
   1123 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
   1124 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
   1125 						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
   1126 					}
   1127 				}
   1128 			}
   1129 			else {
   1130 				if(order == 10) {
   1131 					for(i = 0; i < (int)data_len; i++) {
   1132 						sum = 0;
   1133 						sum += qlp_coeff[9] * (FLAC__int64)data[i-10];
   1134 						sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
   1135 						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
   1136 						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
   1137 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
   1138 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
   1139 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
   1140 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
   1141 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
   1142 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
   1143 						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
   1144 					}
   1145 				}
   1146 				else { /* order == 9 */
   1147 					for(i = 0; i < (int)data_len; i++) {
   1148 						sum = 0;
   1149 						sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
   1150 						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
   1151 						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
   1152 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
   1153 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
   1154 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
   1155 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
   1156 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
   1157 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
   1158 						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
   1159 					}
   1160 				}
   1161 			}
   1162 		}
   1163 		else if(order > 4) {
   1164 			if(order > 6) {
   1165 				if(order == 8) {
   1166 					for(i = 0; i < (int)data_len; i++) {
   1167 						sum = 0;
   1168 						sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
   1169 						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
   1170 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
   1171 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
   1172 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
   1173 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
   1174 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
   1175 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
   1176 						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
   1177 					}
   1178 				}
   1179 				else { /* order == 7 */
   1180 					for(i = 0; i < (int)data_len; i++) {
   1181 						sum = 0;
   1182 						sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
   1183 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
   1184 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
   1185 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
   1186 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
   1187 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
   1188 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
   1189 						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
   1190 					}
   1191 				}
   1192 			}
   1193 			else {
   1194 				if(order == 6) {
   1195 					for(i = 0; i < (int)data_len; i++) {
   1196 						sum = 0;
   1197 						sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
   1198 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
   1199 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
   1200 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
   1201 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
   1202 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
   1203 						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
   1204 					}
   1205 				}
   1206 				else { /* order == 5 */
   1207 					for(i = 0; i < (int)data_len; i++) {
   1208 						sum = 0;
   1209 						sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
   1210 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
   1211 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
   1212 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
   1213 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
   1214 						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
   1215 					}
   1216 				}
   1217 			}
   1218 		}
   1219 		else {
   1220 			if(order > 2) {
   1221 				if(order == 4) {
   1222 					for(i = 0; i < (int)data_len; i++) {
   1223 						sum = 0;
   1224 						sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
   1225 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
   1226 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
   1227 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
   1228 						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
   1229 					}
   1230 				}
   1231 				else { /* order == 3 */
   1232 					for(i = 0; i < (int)data_len; i++) {
   1233 						sum = 0;
   1234 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
   1235 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
   1236 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
   1237 						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
   1238 					}
   1239 				}
   1240 			}
   1241 			else {
   1242 				if(order == 2) {
   1243 					for(i = 0; i < (int)data_len; i++) {
   1244 						sum = 0;
   1245 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
   1246 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
   1247 						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
   1248 					}
   1249 				}
   1250 				else { /* order == 1 */
   1251 					for(i = 0; i < (int)data_len; i++)
   1252 						data[i] = residual[i] + (FLAC__int32)((qlp_coeff[0] * (FLAC__int64)data[i-1]) >> lp_quantization);
   1253 				}
   1254 			}
   1255 		}
   1256 	}
   1257 	else { /* order > 12 */
   1258 		for(i = 0; i < (int)data_len; i++) {
   1259 			sum = 0;
   1260 			switch(order) {
   1261 				case 32: sum += qlp_coeff[31] * (FLAC__int64)data[i-32];
   1262 				case 31: sum += qlp_coeff[30] * (FLAC__int64)data[i-31];
   1263 				case 30: sum += qlp_coeff[29] * (FLAC__int64)data[i-30];
   1264 				case 29: sum += qlp_coeff[28] * (FLAC__int64)data[i-29];
   1265 				case 28: sum += qlp_coeff[27] * (FLAC__int64)data[i-28];
   1266 				case 27: sum += qlp_coeff[26] * (FLAC__int64)data[i-27];
   1267 				case 26: sum += qlp_coeff[25] * (FLAC__int64)data[i-26];
   1268 				case 25: sum += qlp_coeff[24] * (FLAC__int64)data[i-25];
   1269 				case 24: sum += qlp_coeff[23] * (FLAC__int64)data[i-24];
   1270 				case 23: sum += qlp_coeff[22] * (FLAC__int64)data[i-23];
   1271 				case 22: sum += qlp_coeff[21] * (FLAC__int64)data[i-22];
   1272 				case 21: sum += qlp_coeff[20] * (FLAC__int64)data[i-21];
   1273 				case 20: sum += qlp_coeff[19] * (FLAC__int64)data[i-20];
   1274 				case 19: sum += qlp_coeff[18] * (FLAC__int64)data[i-19];
   1275 				case 18: sum += qlp_coeff[17] * (FLAC__int64)data[i-18];
   1276 				case 17: sum += qlp_coeff[16] * (FLAC__int64)data[i-17];
   1277 				case 16: sum += qlp_coeff[15] * (FLAC__int64)data[i-16];
   1278 				case 15: sum += qlp_coeff[14] * (FLAC__int64)data[i-15];
   1279 				case 14: sum += qlp_coeff[13] * (FLAC__int64)data[i-14];
   1280 				case 13: sum += qlp_coeff[12] * (FLAC__int64)data[i-13];
   1281 				         sum += qlp_coeff[11] * (FLAC__int64)data[i-12];
   1282 				         sum += qlp_coeff[10] * (FLAC__int64)data[i-11];
   1283 				         sum += qlp_coeff[ 9] * (FLAC__int64)data[i-10];
   1284 				         sum += qlp_coeff[ 8] * (FLAC__int64)data[i- 9];
   1285 				         sum += qlp_coeff[ 7] * (FLAC__int64)data[i- 8];
   1286 				         sum += qlp_coeff[ 6] * (FLAC__int64)data[i- 7];
   1287 				         sum += qlp_coeff[ 5] * (FLAC__int64)data[i- 6];
   1288 				         sum += qlp_coeff[ 4] * (FLAC__int64)data[i- 5];
   1289 				         sum += qlp_coeff[ 3] * (FLAC__int64)data[i- 4];
   1290 				         sum += qlp_coeff[ 2] * (FLAC__int64)data[i- 3];
   1291 				         sum += qlp_coeff[ 1] * (FLAC__int64)data[i- 2];
   1292 				         sum += qlp_coeff[ 0] * (FLAC__int64)data[i- 1];
   1293 			}
   1294 			data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
   1295 		}
   1296 	}
   1297 }
   1298 #endif
   1299 
   1300 #if defined(_MSC_VER)
   1301 #pragma warning ( default : 4028 )
   1302 #endif
   1303 
   1304 #ifndef FLAC__INTEGER_ONLY_LIBRARY
   1305 
   1306 FLAC__double FLAC__lpc_compute_expected_bits_per_residual_sample(FLAC__double lpc_error, unsigned total_samples)
   1307 {
   1308 	FLAC__double error_scale;
   1309 
   1310 	FLAC__ASSERT(total_samples > 0);
   1311 
   1312 	error_scale = 0.5 / (FLAC__double)total_samples;
   1313 
   1314 	return FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scale(lpc_error, error_scale);
   1315 }
   1316 
   1317 FLAC__double FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scale(FLAC__double lpc_error, FLAC__double error_scale)
   1318 {
   1319 	if(lpc_error > 0.0) {
   1320 		FLAC__double bps = (FLAC__double)0.5 * log(error_scale * lpc_error) / M_LN2;
   1321 		if(bps >= 0.0)
   1322 			return bps;
   1323 		else
   1324 			return 0.0;
   1325 	}
   1326 	else if(lpc_error < 0.0) { /* error should not be negative but can happen due to inadequate floating-point resolution */
   1327 		return 1e32;
   1328 	}
   1329 	else {
   1330 		return 0.0;
   1331 	}
   1332 }
   1333 
   1334 unsigned FLAC__lpc_compute_best_order(const FLAC__double lpc_error[], unsigned max_order, unsigned total_samples, unsigned overhead_bits_per_order)
   1335 {
   1336 	unsigned order, indx, best_index; /* 'index' the index into lpc_error; index==order-1 since lpc_error[0] is for order==1, lpc_error[1] is for order==2, etc */
   1337 	FLAC__double bits, best_bits, error_scale;
   1338 
   1339 	FLAC__ASSERT(max_order > 0);
   1340 	FLAC__ASSERT(total_samples > 0);
   1341 
   1342 	error_scale = 0.5 / (FLAC__double)total_samples;
   1343 
   1344 	best_index = 0;
   1345 	best_bits = (unsigned)(-1);
   1346 
   1347 	for(indx = 0, order = 1; indx < max_order; indx++, order++) {
   1348 		bits = FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scale(lpc_error[indx], error_scale) * (FLAC__double)(total_samples - order) + (FLAC__double)(order * overhead_bits_per_order);
   1349 		if(bits < best_bits) {
   1350 			best_index = indx;
   1351 			best_bits = bits;
   1352 		}
   1353 	}
   1354 
   1355 	return best_index+1; /* +1 since indx of lpc_error[] is order-1 */
   1356 }
   1357 
   1358 #endif /* !defined FLAC__INTEGER_ONLY_LIBRARY */
   1359