Home | History | Annotate | Download | only in libFLAC

Lines Matching refs:qlp_coeff

57 void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
71 q0 = _mm_cvtsi32_si128(0xffff & qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0));
72 q1 = _mm_cvtsi32_si128(0xffff & qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0));
73 q2 = _mm_cvtsi32_si128(0xffff & qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0));
74 q3 = _mm_cvtsi32_si128(0xffff & qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0));
75 q4 = _mm_cvtsi32_si128(0xffff & qlp_coeff[4]); q4 = _mm_shuffle_epi32(q4, _MM_SHUFFLE(0,0,0,0));
76 q5 = _mm_cvtsi32_si128(0xffff & qlp_coeff[5]); q5 = _mm_shuffle_epi32(q5, _MM_SHUFFLE(0,0,0,0));
77 q6 = _mm_cvtsi32_si128(0xffff & qlp_coeff[6]); q6 = _mm_shuffle_epi32(q6, _MM_SHUFFLE(0,0,0,0));
78 q7 = _mm_cvtsi32_si128(0xffff & qlp_coeff[7]); q7 = _mm_shuffle_epi32(q7, _MM_SHUFFLE(0,0,0,0));
79 q8 = _mm_cvtsi32_si128(0xffff & qlp_coeff[8]); q8 = _mm_shuffle_epi32(q8, _MM_SHUFFLE(0,0,0,0));
80 q9 = _mm_cvtsi32_si128(0xffff & qlp_coeff[9]); q9 = _mm_shuffle_epi32(q9, _MM_SHUFFLE(0,0,0,0));
81 q10 = _mm_cvtsi32_si128(0xffff & qlp_coeff[10]); q10 = _mm_shuffle_epi32(q10, _MM_SHUFFLE(0,0,0,0));
82 q11 = _mm_cvtsi32_si128(0xffff & qlp_coeff[11]); q11 = _mm_shuffle_epi32(q11, _MM_SHUFFLE(0,0,0,0));
104 q0 = _mm_cvtsi32_si128(0xffff & qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0));
105 q1 = _mm_cvtsi32_si128(0xffff & qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0));
106 q2 = _mm_cvtsi32_si128(0xffff & qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0));
107 q3 = _mm_cvtsi32_si128(0xffff & qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0));
108 q4 = _mm_cvtsi32_si128(0xffff & qlp_coeff[4]); q4 = _mm_shuffle_epi32(q4, _MM_SHUFFLE(0,0,0,0));
109 q5 = _mm_cvtsi32_si128(0xffff & qlp_coeff[5]); q5 = _mm_shuffle_epi32(q5, _MM_SHUFFLE(0,0,0,0));
110 q6 = _mm_cvtsi32_si128(0xffff & qlp_coeff[6]); q6 = _mm_shuffle_epi32(q6, _MM_SHUFFLE(0,0,0,0));
111 q7 = _mm_cvtsi32_si128(0xffff & qlp_coeff[7]); q7 = _mm_shuffle_epi32(q7, _MM_SHUFFLE(0,0,0,0));
112 q8 = _mm_cvtsi32_si128(0xffff & qlp_coeff[8]); q8 = _mm_shuffle_epi32(q8, _MM_SHUFFLE(0,0,0,0));
113 q9 = _mm_cvtsi32_si128(0xffff & qlp_coeff[9]); q9 = _mm_shuffle_epi32(q9, _MM_SHUFFLE(0,0,0,0));
114 q10 = _mm_cvtsi32_si128(0xffff & qlp_coeff[10]); q10 = _mm_shuffle_epi32(q10, _MM_SHUFFLE(0,0,0,0));
137 q0 = _mm_cvtsi32_si128(0xffff & qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0));
138 q1 = _mm_cvtsi32_si128(0xffff & qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0));
139 q2 = _mm_cvtsi32_si128(0xffff & qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0));
140 q3 = _mm_cvtsi32_si128(0xffff & qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0));
141 q4 = _mm_cvtsi32_si128(0xffff & qlp_coeff[4]); q4 = _mm_shuffle_epi32(q4, _MM_SHUFFLE(0,0,0,0));
142 q5 = _mm_cvtsi32_si128(0xffff & qlp_coeff[5]); q5 = _mm_shuffle_epi32(q5, _MM_SHUFFLE(0,0,0,0));
143 q6 = _mm_cvtsi32_si128(0xffff & qlp_coeff[6]); q6 = _mm_shuffle_epi32(q6, _MM_SHUFFLE(0,0,0,0));
144 q7 = _mm_cvtsi32_si128(0xffff & qlp_coeff[7]); q7 = _mm_shuffle_epi32(q7, _MM_SHUFFLE(0,0,0,0));
145 q8 = _mm_cvtsi32_si128(0xffff & qlp_coeff[8]); q8 = _mm_shuffle_epi32(q8, _MM_SHUFFLE(0,0,0,0));
146 q9 = _mm_cvtsi32_si128(0xffff & qlp_coeff[9]); q9 = _mm_shuffle_epi32(q9, _MM_SHUFFLE(0,0,0,0));
166 q0 = _mm_cvtsi32_si128(0xffff & qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0));
167 q1 = _mm_cvtsi32_si128(0xffff & qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0));
168 q2 = _mm_cvtsi32_si128(0xffff & qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0));
169 q3 = _mm_cvtsi32_si128(0xffff & qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0));
170 q4 = _mm_cvtsi32_si128(0xffff & qlp_coeff[4]); q4 = _mm_shuffle_epi32(q4, _MM_SHUFFLE(0,0,0,0));
171 q5 = _mm_cvtsi32_si128(0xffff & qlp_coeff[5]); q5 = _mm_shuffle_epi32(q5, _MM_SHUFFLE(0,0,0,0));
172 q6 = _mm_cvtsi32_si128(0xffff & qlp_coeff[6]); q6 = _mm_shuffle_epi32(q6, _MM_SHUFFLE(0,0,0,0));
173 q7 = _mm_cvtsi32_si128(0xffff & qlp_coeff[7]); q7 = _mm_shuffle_epi32(q7, _MM_SHUFFLE(0,0,0,0));
174 q8 = _mm_cvtsi32_si128(0xffff & qlp_coeff[8]); q8 = _mm_shuffle_epi32(q8, _MM_SHUFFLE(0,0,0,0));
197 q0 = _mm_cvtsi32_si128(0xffff & qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0));
198 q1 = _mm_cvtsi32_si128(0xffff & qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0));
199 q2 = _mm_cvtsi32_si128(0xffff & qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0));
200 q3 = _mm_cvtsi32_si128(0xffff & qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0));
201 q4 = _mm_cvtsi32_si128(0xffff & qlp_coeff[4]); q4 = _mm_shuffle_epi32(q4, _MM_SHUFFLE(0,0,0,0));
202 q5 = _mm_cvtsi32_si128(0xffff & qlp_coeff[5]); q5 = _mm_shuffle_epi32(q5, _MM_SHUFFLE(0,0,0,0));
203 q6 = _mm_cvtsi32_si128(0xffff & qlp_coeff[6]); q6 = _mm_shuffle_epi32(q6, _MM_SHUFFLE(0,0,0,0));
204 q7 = _mm_cvtsi32_si128(0xffff & qlp_coeff[7]); q7 = _mm_shuffle_epi32(q7, _MM_SHUFFLE(0,0,0,0));
222 q0 = _mm_cvtsi32_si128(0xffff & qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0));
223 q1 = _mm_cvtsi32_si128(0xffff & qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0));
224 q2 = _mm_cvtsi32_si128(0xffff & qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0));
225 q3 = _mm_cvtsi32_si128(0xffff & qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0));
226 q4 = _mm_cvtsi32_si128(0xffff & qlp_coeff[4]); q4 = _mm_shuffle_epi32(q4, _MM_SHUFFLE(0,0,0,0));
227 q5 = _mm_cvtsi32_si128(0xffff & qlp_coeff[5]); q5 = _mm_shuffle_epi32(q5, _MM_SHUFFLE(0,0,0,0));
228 q6 = _mm_cvtsi32_si128(0xffff & qlp_coeff[6]); q6 = _mm_shuffle_epi32(q6, _MM_SHUFFLE(0,0,0,0));
247 q0 = _mm_cvtsi32_si128(0xffff & qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0));
248 q1 = _mm_cvtsi32_si128(0xffff & qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0));
249 q2 = _mm_cvtsi32_si128(0xffff & qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0));
250 q3 = _mm_cvtsi32_si128(0xffff & qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0));
251 q4 = _mm_cvtsi32_si128(0xffff & qlp_coeff[4]); q4 = _mm_shuffle_epi32(q4, _MM_SHUFFLE(0,0,0,0));
252 q5 = _mm_cvtsi32_si128(0xffff & qlp_coeff[5]); q5 = _mm_shuffle_epi32(q5, _MM_SHUFFLE(0,0,0,0));
268 q0 = _mm_cvtsi32_si128(0xffff & qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0));
269 q1 = _mm_cvtsi32_si128(0xffff & qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0));
270 q2 = _mm_cvtsi32_si128(0xffff & qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0));
271 q3 = _mm_cvtsi32_si128(0xffff & qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0));
272 q4 = _mm_cvtsi32_si128(0xffff & qlp_coeff[4]); q4 = _mm_shuffle_epi32(q4, _MM_SHUFFLE(0,0,0,0));
291 q0 = _mm_cvtsi32_si128(0xffff & qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0));
292 q1 = _mm_cvtsi32_si128(0xffff & qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0));
293 q2 = _mm_cvtsi32_si128(0xffff & qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0));
294 q3 = _mm_cvtsi32_si128(0xffff & qlp_coeff[3]); q3 = _mm_shuffle_epi32(q3, _MM_SHUFFLE(0,0,0,0));
308 q0 = _mm_cvtsi32_si128(0xffff & qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0));
309 q1 = _mm_cvtsi32_si128(0xffff & qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0));
310 q2 = _mm_cvtsi32_si128(0xffff & qlp_coeff[2]); q2 = _mm_shuffle_epi32(q2, _MM_SHUFFLE(0,0,0,0));
325 q0 = _mm_cvtsi32_si128(0xffff & qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0));
326 q1 = _mm_cvtsi32_si128(0xffff & qlp_coeff[1]); q1 = _mm_shuffle_epi32(q1, _MM_SHUFFLE(0,0,0,0));
338 q0 = _mm_cvtsi32_si128(0xffff & qlp_coeff[0]); q0 = _mm_shuffle_epi32(q0, _MM_SHUFFLE(0,0,0,0));
352 case 12: sum += qlp_coeff[11] * data[i-12];
353 case 11: sum += qlp_coeff[10] * data[i-11];
354 case 10: sum += qlp_coeff[ 9] * data[i-10];
355 case 9: sum += qlp_coeff[ 8] * data[i- 9];
356 case 8: sum += qlp_coeff[ 7] * data[i- 8];
357 case 7: sum += qlp_coeff[ 6] * data[i- 7];
358 case 6: sum += qlp_coeff[ 5] * data[i- 6];
359 case 5: sum += qlp_coeff[ 4] * data[i- 5];
360 case 4: sum += qlp_coeff[ 3] * data[i- 4];
361 case 3: sum += qlp_coeff[ 2] * data[i- 3];
362 case 2: sum += qlp_coeff[ 1] * data[i- 2];
363 case 1: sum += qlp_coeff[ 0] * data[i- 1];
372 case 32: sum += qlp_coeff[31] * data[i-32];
373 case 31: sum += qlp_coeff[30] * data[i-31];
374 case 30: sum += qlp_coeff[29] * data[i-30];
375 case 29: sum += qlp_coeff[28] * data[i-29];
376 case 28: sum += qlp_coeff[27] * data[i-28];
377 case 27: sum += qlp_coeff[26] * data[i-27];
378 case 26: sum += qlp_coeff[25] * data[i-26];
379 case 25: sum += qlp_coeff[24] * data[i-25];
380 case 24: sum += qlp_coeff[23] * data[i-24];
381 case 23: sum += qlp_coeff[22] * data[i-23];
382 case 22: sum += qlp_coeff[21] * data[i-22];
383 case 21: sum += qlp_coeff[20] * data[i-21];
384 case 20: sum += qlp_coeff[19] * data[i-20];
385 case 19: sum += qlp_coeff[18] * data[i-19];
386 case 18: sum += qlp_coeff[17] * data[i-18];
387 case 17: sum += qlp_coeff[16] * data[i-17];
388 case 16: sum += qlp_coeff[15] * data[i-16];
389 case 15: sum += qlp_coeff[14] * data[i-15];
390 case 14: sum += qlp_coeff[13] * data[i-14];
391 case 13: sum += qlp_coeff[12] * data[i-13];
392 sum += qlp_coeff[11] * data[i-12];
393 sum += qlp_coeff[10] * data[i-11];
394 sum += qlp_coeff[ 9] * data[i-10];
395 sum += qlp_coeff[ 8] * data[i- 9];
396 sum += qlp_coeff[ 7] * data[i- 8];
397 sum += qlp_coeff[ 6] * data[i- 7];
398 sum += qlp_coeff[ 5] * data[i- 6];
399 sum += qlp_coeff[ 4] * data[i- 5];
400 sum += qlp_coeff[ 3] * data[i- 4];
401 sum += qlp_coeff[ 2] * data[i- 3];
402 sum += qlp_coeff[ 1] * data[i- 2];
403 sum += qlp_coeff[ 0] * data[i- 1];
411 void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
423 xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0)); // 0 0 q[1] q[0]
424 xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2)); // 0 0 q[3] q[2]
425 xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4)); // 0 0 q[5] q[4]
426 xmm3 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6)); // 0 0 q[7] q[6]
427 xmm4 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+8)); // 0 0 q[9] q[8]
428 xmm5 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+10)); // 0 0 q[11] q[10]
439 //sum += qlp_coeff[11] * data[i-12];
440 //sum += qlp_coeff[10] * data[i-11];
445 //sum += qlp_coeff[9] * data[i-10];
446 //sum += qlp_coeff[8] * data[i-9];
452 //sum += qlp_coeff[7] * data[i-8];
453 //sum += qlp_coeff[6] * data[i-7];
459 //sum += qlp_coeff[5] * data[i-6];
460 //sum += qlp_coeff[4] * data[i-5];
466 //sum += qlp_coeff[3] * data[i-4];
467 //sum += qlp_coeff[2] * data[i-3];
473 //sum += qlp_coeff[1] * data[i-2];
474 //sum += qlp_coeff[0] * data[i-1];
486 xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
487 xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
488 xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4));
489 xmm3 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6));
490 xmm4 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+8));
491 xmm5 = _mm_cvtsi32_si128(qlp_coeff[10]);
501 //sum = qlp_coeff[10] * data[i-11];
505 //sum += qlp_coeff[9] * data[i-10];
506 //sum += qlp_coeff[8] * data[i-9];
512 //sum += qlp_coeff[7] * data[i-8];
513 //sum += qlp_coeff[6] * data[i-7];
519 //sum += qlp_coeff[5] * data[i-6];
520 //sum += qlp_coeff[4] * data[i-5];
526 //sum += qlp_coeff[3] * data[i-4];
527 //sum += qlp_coeff[2] * data[i-3];
533 //sum += qlp_coeff[1] * data[i-2];
534 //sum += qlp_coeff[0] * data[i-1];
548 xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
549 xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
550 xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4));
551 xmm3 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6));
552 xmm4 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+8));
562 //sum += qlp_coeff[9] * data[i-10];
563 //sum += qlp_coeff[8] * data[i-9];
568 //sum += qlp_coeff[7] * data[i-8];
569 //sum += qlp_coeff[6] * data[i-7];
575 //sum += qlp_coeff[5] * data[i-6];
576 //sum += qlp_coeff[4] * data[i-5];
582 //sum += qlp_coeff[3] * data[i-4];
583 qlp_coeff[2] * data[i-3];
589 //sum += qlp_coeff[1] * data[i-2];
590 //sum += qlp_coeff[0] * data[i-1];
602 xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
603 xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
604 xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4));
605 xmm3 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6));
606 xmm4 = _mm_cvtsi32_si128(qlp_coeff[8]);
615 //sum = qlp_coeff[8] * data[i-9];
619 //sum += qlp_coeff[7] * data[i-8];
620 //sum += qlp_coeff[6] * data[i-7];
626 //sum += qlp_coeff[5] * data[i-6];
627 //sum += qlp_coeff[4] * data[i-5];
633 //sum += qlp_coeff[3] * data[i-4];
634 //sum += qlp_coeff[2] * data[i-3];
640 //sum += qlp_coeff[1] * data[i-2];
641 //sum += qlp_coeff[0] * data[i-1];
657 xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
658 xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
659 xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4));
660 xmm3 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6));
669 //sum += qlp_coeff[7] * data[i-8];
670 //sum += qlp_coeff[6] * data[i-7];
675 //sum += qlp_coeff[5] * data[i-6];
676 //sum += qlp_coeff[4] * data[i-5];
682 //sum += qlp_coeff[3] * data[i-4];
683 //sum += qlp_coeff[2] * data[i-3];
689 //sum += qlp_coeff[1] * data[i-2];
690 //sum += qlp_coeff[0] * data[i-1];
702 xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
703 xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
704 xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4));
705 xmm3 = _mm_cvtsi32_si128(qlp_coeff[6]);
713 //sum = qlp_coeff[6] * data[i-7];
717 //sum += qlp_coeff[5] * data[i-6];
718 //sum += qlp_coeff[4] * data[i-5];
724 //sum += qlp_coeff[3] * data[i-4];
725 //sum += qlp_coeff[2] * data[i-3];
731 //sum += qlp_coeff[1] * data[i-2];
732 //sum += qlp_coeff[0] * data[i-1];
746 xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
747 xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
748 xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4));
756 //sum += qlp_coeff[5] * data[i-6];
757 //sum += qlp_coeff[4] * data[i-5];
762 //sum += qlp_coeff[3] * data[i-4];
763 //sum += qlp_coeff[2] * data[i-3];
769 //sum += qlp_coeff[1] * data[i-2];
770 //sum += qlp_coeff[0] * data[i-1];
782 xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
783 xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
784 xmm2 = _mm_cvtsi32_si128(qlp_coeff[4]);
791 //sum = qlp_coeff[4] * data[i-5];
795 //sum += qlp_coeff[3] * data[i-4];
796 //sum += qlp_coeff[2] * data[i-3];
802 //sum += qlp_coeff[1] * data[i-2];
803 //sum += qlp_coeff[0] * data[i-1];
819 xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
820 xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
827 //sum += qlp_coeff[3] * data[i-4];
828 //sum += qlp_coeff[2] * data[i-3];
833 //sum += qlp_coeff[1] * data[i-2];
834 //sum += qlp_coeff[0] * data[i-1];
846 xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
847 xmm1 = _mm_cvtsi32_si128(qlp_coeff[2]);
853 //sum = qlp_coeff[2] * data[i-3];
857 //sum += qlp_coeff[1] * data[i-2];
858 //sum += qlp_coeff[0] * data[i-1];
872 xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
877 //sum += qlp_coeff[1] * data[i-2];
878 //sum += qlp_coeff[0] * data[i-1];
889 residual[i] = data[i] - ((qlp_coeff[0] * data[i-1]) >> lp_quantization);
899 case 32: sum += qlp_coeff[31] * data[i-32];
900 case 31: sum += qlp_coeff[30] * data[i-31];
901 case 30: sum += qlp_coeff[29] * data[i-30];
902 case 29: sum += qlp_coeff[28] * data[i-29];
903 case 28: sum += qlp_coeff[27] * data[i-28];
904 case 27: sum += qlp_coeff[26] * data[i-27];
905 case 26: sum += qlp_coeff[25] * data[i-26];
906 case 25: sum += qlp_coeff[24] * data[i-25];
907 case 24: sum += qlp_coeff[23] * data[i-24];
908 case 23: sum += qlp_coeff[22] * data[i-23];
909 case 22: sum += qlp_coeff[21] * data[i-22];
910 case 21: sum += qlp_coeff[20] * data[i-21];
911 case 20: sum += qlp_coeff[19] * data[i-20];
912 case 19: sum += qlp_coeff[18] * data[i-19];
913 case 18: sum += qlp_coeff[17] * data[i-18];
914 case 17: sum += qlp_coeff[16] * data[i-17];
915 case 16: sum += qlp_coeff[15] * data[i-16];
916 case 15: sum += qlp_coeff[14] * data[i-15];
917 case 14: sum += qlp_coeff[13] * data[i-14];
918 case 13: sum += qlp_coeff[12] * data[i-13];
919 sum += qlp_coeff[11] * data[i-12];
920 sum += qlp_coeff[10] * data[i-11];
921 sum += qlp_coeff[ 9] * data[i-10];
922 sum += qlp_coeff[ 8] * data[i- 9];
923 sum += qlp_coeff[ 7] * data[i- 8];
924 sum += qlp_coeff[ 6] * data[i- 7];
925 sum += qlp_coeff[ 5] * data[i- 6];
926 sum += qlp_coeff[ 4] * data[i- 5];
927 sum += qlp_coeff[ 3] * data[i- 4];
928 sum += qlp_coeff[ 2] * data[i- 3];
929 sum += qlp_coeff[ 1] * data[i- 2];
930 sum += qlp_coeff[ 0] * data[i- 1];
940 void FLAC__lpc_restore_signal_16_intrin_sse2(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[])
943 FLAC__lpc_restore_signal(residual, data_len, qlp_coeff, order, lp_quantization, data);
955 xmm0 = _mm_loadu_si128((const __m128i*)(qlp_coeff+0));
956 xmm6 = _mm_loadu_si128((const __m128i*)(qlp_coeff+4));
957 xmm1 = _mm_loadu_si128((const __m128i*)(qlp_coeff+8)); /* read 0 to 3 uninitialized coeffs... */
984 /* xmm0, xmm1: qlp_coeff
985 xmm2, xmm7: qlp_coeff << 16 bit
1017 /* 1 _mm_slli_si128 per data element less but we need shifted qlp_coeff in xmm2:xmm7 */
1049 xmm0 = _mm_loadu_si128((const __m128i*)(qlp_coeff+0));
1050 xmm1 = _mm_loadu_si128((const __m128i*)(qlp_coeff+4));
1059 /* xmm0: qlp_coeff