1 /* 2 * jidctint.c 3 * 4 * Copyright (C) 1991-1998, Thomas G. Lane. 5 * Modification developed 2002-2009 by Guido Vollbeding. 6 * This file is part of the Independent JPEG Group's software. 7 * For conditions of distribution and use, see the accompanying README file. 8 * 9 * This file contains a slow-but-accurate integer implementation of the 10 * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine 11 * must also perform dequantization of the input coefficients. 12 * 13 * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT 14 * on each row (or vice versa, but it's more convenient to emit a row at 15 * a time). Direct algorithms are also available, but they are much more 16 * complex and seem not to be any faster when reduced to code. 17 * 18 * This implementation is based on an algorithm described in 19 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT 20 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, 21 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. 22 * The primary algorithm described there uses 11 multiplies and 29 adds. 23 * We use their alternate method with 12 multiplies and 32 adds. 24 * The advantage of this method is that no data path contains more than one 25 * multiplication; this allows a very simple and accurate implementation in 26 * scaled fixed-point arithmetic, with a minimal number of shifts. 27 * 28 * We also provide IDCT routines with various output sample block sizes for 29 * direct resolution reduction or enlargement without additional resampling: 30 * NxN (N=1...16) pixels for one 8x8 input DCT block. 31 * 32 * For N<8 we simply take the corresponding low-frequency coefficients of 33 * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block 34 * to yield the downscaled outputs. 35 * This can be seen as direct low-pass downsampling from the DCT domain 36 * point of view rather than the usual spatial domain point of view, 37 * yielding significant computational savings and results at least 38 * as good as common bilinear (averaging) spatial downsampling. 39 * 40 * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as 41 * lower frequencies and higher frequencies assumed to be zero. 42 * It turns out that the computational effort is similar to the 8x8 IDCT 43 * regarding the output size. 44 * Furthermore, the scaling and descaling is the same for all IDCT sizes. 45 * 46 * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases 47 * since there would be too many additional constants to pre-calculate. 48 */ 49 50 #define JPEG_INTERNALS 51 #include "jinclude.h" 52 #include "jpeglib.h" 53 #include "jdct.h" /* Private declarations for DCT subsystem */ 54 55 #ifdef DCT_ISLOW_SUPPORTED 56 57 58 /* 59 * This module is specialized to the case DCTSIZE = 8. 60 */ 61 62 #if DCTSIZE != 8 63 Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */ 64 #endif 65 66 67 /* 68 * The poop on this scaling stuff is as follows: 69 * 70 * Each 1-D IDCT step produces outputs which are a factor of sqrt(N) 71 * larger than the true IDCT outputs. The final outputs are therefore 72 * a factor of N larger than desired; since N=8 this can be cured by 73 * a simple right shift at the end of the algorithm. The advantage of 74 * this arrangement is that we save two multiplications per 1-D IDCT, 75 * because the y0 and y4 inputs need not be divided by sqrt(N). 76 * 77 * We have to do addition and subtraction of the integer inputs, which 78 * is no problem, and multiplication by fractional constants, which is 79 * a problem to do in integer arithmetic. We multiply all the constants 80 * by CONST_SCALE and convert them to integer constants (thus retaining 81 * CONST_BITS bits of precision in the constants). After doing a 82 * multiplication we have to divide the product by CONST_SCALE, with proper 83 * rounding, to produce the correct output. This division can be done 84 * cheaply as a right shift of CONST_BITS bits. We postpone shifting 85 * as long as possible so that partial sums can be added together with 86 * full fractional precision. 87 * 88 * The outputs of the first pass are scaled up by PASS1_BITS bits so that 89 * they are represented to better-than-integral precision. These outputs 90 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word 91 * with the recommended scaling. (To scale up 12-bit sample data further, an 92 * intermediate INT32 array would be needed.) 93 * 94 * To avoid overflow of the 32-bit intermediate results in pass 2, we must 95 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis 96 * shows that the values given below are the most effective. 97 */ 98 99 #if BITS_IN_JSAMPLE == 8 100 #define CONST_BITS 13 101 #define PASS1_BITS 2 102 #else 103 #define CONST_BITS 13 104 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */ 105 #endif 106 107 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus 108 * causing a lot of useless floating-point operations at run time. 109 * To get around this we use the following pre-calculated constants. 110 * If you change CONST_BITS you may want to add appropriate values. 111 * (With a reasonable C compiler, you can just rely on the FIX() macro...) 112 */ 113 114 #if CONST_BITS == 13 115 #define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */ 116 #define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */ 117 #define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */ 118 #define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */ 119 #define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */ 120 #define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */ 121 #define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */ 122 #define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */ 123 #define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */ 124 #define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */ 125 #define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */ 126 #define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */ 127 #else 128 #define FIX_0_298631336 FIX(0.298631336) 129 #define FIX_0_390180644 FIX(0.390180644) 130 #define FIX_0_541196100 FIX(0.541196100) 131 #define FIX_0_765366865 FIX(0.765366865) 132 #define FIX_0_899976223 FIX(0.899976223) 133 #define FIX_1_175875602 FIX(1.175875602) 134 #define FIX_1_501321110 FIX(1.501321110) 135 #define FIX_1_847759065 FIX(1.847759065) 136 #define FIX_1_961570560 FIX(1.961570560) 137 #define FIX_2_053119869 FIX(2.053119869) 138 #define FIX_2_562915447 FIX(2.562915447) 139 #define FIX_3_072711026 FIX(3.072711026) 140 #endif 141 142 143 /* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. 144 * For 8-bit samples with the recommended scaling, all the variable 145 * and constant values involved are no more than 16 bits wide, so a 146 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. 147 * For 12-bit samples, a full 32-bit multiplication will be needed. 148 */ 149 150 #if BITS_IN_JSAMPLE == 8 151 #define MULTIPLY(var,const) MULTIPLY16C16(var,const) 152 #else 153 #define MULTIPLY(var,const) ((var) * (const)) 154 #endif 155 156 157 /* Dequantize a coefficient by multiplying it by the multiplier-table 158 * entry; produce an int result. In this module, both inputs and result 159 * are 16 bits or less, so either int or short multiply will work. 160 */ 161 162 #define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval)) 163 164 165 /* 166 * Perform dequantization and inverse DCT on one block of coefficients. 167 */ 168 169 GLOBAL(void) 170 jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, 171 JCOEFPTR coef_block, 172 JSAMPARRAY output_buf, JDIMENSION output_col) 173 { 174 INT32 tmp0, tmp1, tmp2, tmp3; 175 INT32 tmp10, tmp11, tmp12, tmp13; 176 INT32 z1, z2, z3, z4, z5; 177 JCOEFPTR inptr; 178 ISLOW_MULT_TYPE * quantptr; 179 int * wsptr; 180 JSAMPROW outptr; 181 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 182 int ctr; 183 int workspace[DCTSIZE2]; /* buffers data between passes */ 184 SHIFT_TEMPS 185 186 /* Pass 1: process columns from input, store into work array. */ 187 /* Note results are scaled up by sqrt(8) compared to a true IDCT; */ 188 /* furthermore, we scale the results by 2**PASS1_BITS. */ 189 190 inptr = coef_block; 191 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 192 wsptr = workspace; 193 for (ctr = DCTSIZE; ctr > 0; ctr--) { 194 /* Due to quantization, we will usually find that many of the input 195 * coefficients are zero, especially the AC terms. We can exploit this 196 * by short-circuiting the IDCT calculation for any column in which all 197 * the AC terms are zero. In that case each output is equal to the 198 * DC coefficient (with scale factor as needed). 199 * With typical images and quantization tables, half or more of the 200 * column DCT calculations can be simplified this way. 201 */ 202 203 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && 204 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && 205 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && 206 inptr[DCTSIZE*7] == 0) { 207 /* AC terms all zero */ 208 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; 209 210 wsptr[DCTSIZE*0] = dcval; 211 wsptr[DCTSIZE*1] = dcval; 212 wsptr[DCTSIZE*2] = dcval; 213 wsptr[DCTSIZE*3] = dcval; 214 wsptr[DCTSIZE*4] = dcval; 215 wsptr[DCTSIZE*5] = dcval; 216 wsptr[DCTSIZE*6] = dcval; 217 wsptr[DCTSIZE*7] = dcval; 218 219 inptr++; /* advance pointers to next column */ 220 quantptr++; 221 wsptr++; 222 continue; 223 } 224 225 /* Even part: reverse the even part of the forward DCT. */ 226 /* The rotator is sqrt(2)*c(-6). */ 227 228 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 229 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 230 231 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); 232 tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); 233 tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); 234 235 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 236 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 237 238 tmp0 = (z2 + z3) << CONST_BITS; 239 tmp1 = (z2 - z3) << CONST_BITS; 240 241 tmp10 = tmp0 + tmp3; 242 tmp13 = tmp0 - tmp3; 243 tmp11 = tmp1 + tmp2; 244 tmp12 = tmp1 - tmp2; 245 246 /* Odd part per figure 8; the matrix is unitary and hence its 247 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. 248 */ 249 250 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 251 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 252 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 253 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 254 255 z1 = tmp0 + tmp3; 256 z2 = tmp1 + tmp2; 257 z3 = tmp0 + tmp2; 258 z4 = tmp1 + tmp3; 259 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ 260 261 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ 262 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ 263 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ 264 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ 265 z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ 266 z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ 267 z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ 268 z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ 269 270 z3 += z5; 271 z4 += z5; 272 273 tmp0 += z1 + z3; 274 tmp1 += z2 + z4; 275 tmp2 += z2 + z3; 276 tmp3 += z1 + z4; 277 278 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 279 280 wsptr[DCTSIZE*0] = (int) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS); 281 wsptr[DCTSIZE*7] = (int) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS); 282 wsptr[DCTSIZE*1] = (int) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS); 283 wsptr[DCTSIZE*6] = (int) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS); 284 wsptr[DCTSIZE*2] = (int) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS); 285 wsptr[DCTSIZE*5] = (int) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS); 286 wsptr[DCTSIZE*3] = (int) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS); 287 wsptr[DCTSIZE*4] = (int) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS); 288 289 inptr++; /* advance pointers to next column */ 290 quantptr++; 291 wsptr++; 292 } 293 294 /* Pass 2: process rows from work array, store into output array. */ 295 /* Note that we must descale the results by a factor of 8 == 2**3, */ 296 /* and also undo the PASS1_BITS scaling. */ 297 298 wsptr = workspace; 299 for (ctr = 0; ctr < DCTSIZE; ctr++) { 300 outptr = output_buf[ctr] + output_col; 301 /* Rows of zeroes can be exploited in the same way as we did with columns. 302 * However, the column calculation has created many nonzero AC terms, so 303 * the simplification applies less often (typically 5% to 10% of the time). 304 * On machines with very fast multiplication, it's possible that the 305 * test takes more time than it's worth. In that case this section 306 * may be commented out. 307 */ 308 309 #ifndef NO_ZERO_ROW_TEST 310 if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 && 311 wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { 312 /* AC terms all zero */ 313 JSAMPLE dcval = range_limit[(int) DESCALE((INT32) wsptr[0], PASS1_BITS+3) 314 & RANGE_MASK]; 315 316 outptr[0] = dcval; 317 outptr[1] = dcval; 318 outptr[2] = dcval; 319 outptr[3] = dcval; 320 outptr[4] = dcval; 321 outptr[5] = dcval; 322 outptr[6] = dcval; 323 outptr[7] = dcval; 324 325 wsptr += DCTSIZE; /* advance pointer to next row */ 326 continue; 327 } 328 #endif 329 330 /* Even part: reverse the even part of the forward DCT. */ 331 /* The rotator is sqrt(2)*c(-6). */ 332 333 z2 = (INT32) wsptr[2]; 334 z3 = (INT32) wsptr[6]; 335 336 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); 337 tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); 338 tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); 339 340 tmp0 = ((INT32) wsptr[0] + (INT32) wsptr[4]) << CONST_BITS; 341 tmp1 = ((INT32) wsptr[0] - (INT32) wsptr[4]) << CONST_BITS; 342 343 tmp10 = tmp0 + tmp3; 344 tmp13 = tmp0 - tmp3; 345 tmp11 = tmp1 + tmp2; 346 tmp12 = tmp1 - tmp2; 347 348 /* Odd part per figure 8; the matrix is unitary and hence its 349 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. 350 */ 351 352 tmp0 = (INT32) wsptr[7]; 353 tmp1 = (INT32) wsptr[5]; 354 tmp2 = (INT32) wsptr[3]; 355 tmp3 = (INT32) wsptr[1]; 356 357 z1 = tmp0 + tmp3; 358 z2 = tmp1 + tmp2; 359 z3 = tmp0 + tmp2; 360 z4 = tmp1 + tmp3; 361 z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */ 362 363 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */ 364 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */ 365 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */ 366 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */ 367 z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */ 368 z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */ 369 z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */ 370 z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */ 371 372 z3 += z5; 373 z4 += z5; 374 375 tmp0 += z1 + z3; 376 tmp1 += z2 + z4; 377 tmp2 += z2 + z3; 378 tmp3 += z1 + z4; 379 380 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 381 382 outptr[0] = range_limit[(int) DESCALE(tmp10 + tmp3, 383 CONST_BITS+PASS1_BITS+3) 384 & RANGE_MASK]; 385 outptr[7] = range_limit[(int) DESCALE(tmp10 - tmp3, 386 CONST_BITS+PASS1_BITS+3) 387 & RANGE_MASK]; 388 outptr[1] = range_limit[(int) DESCALE(tmp11 + tmp2, 389 CONST_BITS+PASS1_BITS+3) 390 & RANGE_MASK]; 391 outptr[6] = range_limit[(int) DESCALE(tmp11 - tmp2, 392 CONST_BITS+PASS1_BITS+3) 393 & RANGE_MASK]; 394 outptr[2] = range_limit[(int) DESCALE(tmp12 + tmp1, 395 CONST_BITS+PASS1_BITS+3) 396 & RANGE_MASK]; 397 outptr[5] = range_limit[(int) DESCALE(tmp12 - tmp1, 398 CONST_BITS+PASS1_BITS+3) 399 & RANGE_MASK]; 400 outptr[3] = range_limit[(int) DESCALE(tmp13 + tmp0, 401 CONST_BITS+PASS1_BITS+3) 402 & RANGE_MASK]; 403 outptr[4] = range_limit[(int) DESCALE(tmp13 - tmp0, 404 CONST_BITS+PASS1_BITS+3) 405 & RANGE_MASK]; 406 407 wsptr += DCTSIZE; /* advance pointer to next row */ 408 } 409 } 410 411 #ifdef IDCT_SCALING_SUPPORTED 412 413 414 /* 415 * Perform dequantization and inverse DCT on one block of coefficients, 416 * producing a 7x7 output block. 417 * 418 * Optimized algorithm with 12 multiplications in the 1-D kernel. 419 * cK represents sqrt(2) * cos(K*pi/14). 420 */ 421 422 GLOBAL(void) 423 jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 424 JCOEFPTR coef_block, 425 JSAMPARRAY output_buf, JDIMENSION output_col) 426 { 427 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13; 428 INT32 z1, z2, z3; 429 JCOEFPTR inptr; 430 ISLOW_MULT_TYPE * quantptr; 431 int * wsptr; 432 JSAMPROW outptr; 433 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 434 int ctr; 435 int workspace[7*7]; /* buffers data between passes */ 436 SHIFT_TEMPS 437 438 /* Pass 1: process columns from input, store into work array. */ 439 440 inptr = coef_block; 441 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 442 wsptr = workspace; 443 for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) { 444 /* Even part */ 445 446 tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 447 tmp13 <<= CONST_BITS; 448 /* Add fudge factor here for final descale. */ 449 tmp13 += ONE << (CONST_BITS-PASS1_BITS-1); 450 451 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 452 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 453 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 454 455 tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ 456 tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ 457 tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ 458 tmp0 = z1 + z3; 459 z2 -= tmp0; 460 tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */ 461 tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ 462 tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ 463 tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ 464 465 /* Odd part */ 466 467 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 468 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 469 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 470 471 tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ 472 tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ 473 tmp0 = tmp1 - tmp2; 474 tmp1 += tmp2; 475 tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ 476 tmp1 += tmp2; 477 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ 478 tmp0 += z2; 479 tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ 480 481 /* Final output stage */ 482 483 wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); 484 wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); 485 wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); 486 wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); 487 wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); 488 wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); 489 wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS); 490 } 491 492 /* Pass 2: process 7 rows from work array, store into output array. */ 493 494 wsptr = workspace; 495 for (ctr = 0; ctr < 7; ctr++) { 496 outptr = output_buf[ctr] + output_col; 497 498 /* Even part */ 499 500 /* Add fudge factor here for final descale. */ 501 tmp13 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); 502 tmp13 <<= CONST_BITS; 503 504 z1 = (INT32) wsptr[2]; 505 z2 = (INT32) wsptr[4]; 506 z3 = (INT32) wsptr[6]; 507 508 tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ 509 tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ 510 tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ 511 tmp0 = z1 + z3; 512 z2 -= tmp0; 513 tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */ 514 tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ 515 tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ 516 tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ 517 518 /* Odd part */ 519 520 z1 = (INT32) wsptr[1]; 521 z2 = (INT32) wsptr[3]; 522 z3 = (INT32) wsptr[5]; 523 524 tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ 525 tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ 526 tmp0 = tmp1 - tmp2; 527 tmp1 += tmp2; 528 tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ 529 tmp1 += tmp2; 530 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ 531 tmp0 += z2; 532 tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ 533 534 /* Final output stage */ 535 536 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 537 CONST_BITS+PASS1_BITS+3) 538 & RANGE_MASK]; 539 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 540 CONST_BITS+PASS1_BITS+3) 541 & RANGE_MASK]; 542 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, 543 CONST_BITS+PASS1_BITS+3) 544 & RANGE_MASK]; 545 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, 546 CONST_BITS+PASS1_BITS+3) 547 & RANGE_MASK]; 548 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, 549 CONST_BITS+PASS1_BITS+3) 550 & RANGE_MASK]; 551 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, 552 CONST_BITS+PASS1_BITS+3) 553 & RANGE_MASK]; 554 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13, 555 CONST_BITS+PASS1_BITS+3) 556 & RANGE_MASK]; 557 558 wsptr += 7; /* advance pointer to next row */ 559 } 560 } 561 562 563 /* 564 * Perform dequantization and inverse DCT on one block of coefficients, 565 * producing a reduced-size 6x6 output block. 566 * 567 * Optimized algorithm with 3 multiplications in the 1-D kernel. 568 * cK represents sqrt(2) * cos(K*pi/12). 569 */ 570 571 GLOBAL(void) 572 jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 573 JCOEFPTR coef_block, 574 JSAMPARRAY output_buf, JDIMENSION output_col) 575 { 576 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12; 577 INT32 z1, z2, z3; 578 JCOEFPTR inptr; 579 ISLOW_MULT_TYPE * quantptr; 580 int * wsptr; 581 JSAMPROW outptr; 582 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 583 int ctr; 584 int workspace[6*6]; /* buffers data between passes */ 585 SHIFT_TEMPS 586 587 /* Pass 1: process columns from input, store into work array. */ 588 589 inptr = coef_block; 590 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 591 wsptr = workspace; 592 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) { 593 /* Even part */ 594 595 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 596 tmp0 <<= CONST_BITS; 597 /* Add fudge factor here for final descale. */ 598 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); 599 tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 600 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ 601 tmp1 = tmp0 + tmp10; 602 tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS); 603 tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 604 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ 605 tmp10 = tmp1 + tmp0; 606 tmp12 = tmp1 - tmp0; 607 608 /* Odd part */ 609 610 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 611 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 612 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 613 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ 614 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS); 615 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS); 616 tmp1 = (z1 - z2 - z3) << PASS1_BITS; 617 618 /* Final output stage */ 619 620 wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); 621 wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); 622 wsptr[6*1] = (int) (tmp11 + tmp1); 623 wsptr[6*4] = (int) (tmp11 - tmp1); 624 wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); 625 wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); 626 } 627 628 /* Pass 2: process 6 rows from work array, store into output array. */ 629 630 wsptr = workspace; 631 for (ctr = 0; ctr < 6; ctr++) { 632 outptr = output_buf[ctr] + output_col; 633 634 /* Even part */ 635 636 /* Add fudge factor here for final descale. */ 637 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); 638 tmp0 <<= CONST_BITS; 639 tmp2 = (INT32) wsptr[4]; 640 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ 641 tmp1 = tmp0 + tmp10; 642 tmp11 = tmp0 - tmp10 - tmp10; 643 tmp10 = (INT32) wsptr[2]; 644 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ 645 tmp10 = tmp1 + tmp0; 646 tmp12 = tmp1 - tmp0; 647 648 /* Odd part */ 649 650 z1 = (INT32) wsptr[1]; 651 z2 = (INT32) wsptr[3]; 652 z3 = (INT32) wsptr[5]; 653 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ 654 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS); 655 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS); 656 tmp1 = (z1 - z2 - z3) << CONST_BITS; 657 658 /* Final output stage */ 659 660 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 661 CONST_BITS+PASS1_BITS+3) 662 & RANGE_MASK]; 663 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 664 CONST_BITS+PASS1_BITS+3) 665 & RANGE_MASK]; 666 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, 667 CONST_BITS+PASS1_BITS+3) 668 & RANGE_MASK]; 669 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, 670 CONST_BITS+PASS1_BITS+3) 671 & RANGE_MASK]; 672 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, 673 CONST_BITS+PASS1_BITS+3) 674 & RANGE_MASK]; 675 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, 676 CONST_BITS+PASS1_BITS+3) 677 & RANGE_MASK]; 678 679 wsptr += 6; /* advance pointer to next row */ 680 } 681 } 682 683 684 /* 685 * Perform dequantization and inverse DCT on one block of coefficients, 686 * producing a reduced-size 5x5 output block. 687 * 688 * Optimized algorithm with 5 multiplications in the 1-D kernel. 689 * cK represents sqrt(2) * cos(K*pi/10). 690 */ 691 692 GLOBAL(void) 693 jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 694 JCOEFPTR coef_block, 695 JSAMPARRAY output_buf, JDIMENSION output_col) 696 { 697 INT32 tmp0, tmp1, tmp10, tmp11, tmp12; 698 INT32 z1, z2, z3; 699 JCOEFPTR inptr; 700 ISLOW_MULT_TYPE * quantptr; 701 int * wsptr; 702 JSAMPROW outptr; 703 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 704 int ctr; 705 int workspace[5*5]; /* buffers data between passes */ 706 SHIFT_TEMPS 707 708 /* Pass 1: process columns from input, store into work array. */ 709 710 inptr = coef_block; 711 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 712 wsptr = workspace; 713 for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) { 714 /* Even part */ 715 716 tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 717 tmp12 <<= CONST_BITS; 718 /* Add fudge factor here for final descale. */ 719 tmp12 += ONE << (CONST_BITS-PASS1_BITS-1); 720 tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 721 tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 722 z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */ 723 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */ 724 z3 = tmp12 + z2; 725 tmp10 = z3 + z1; 726 tmp11 = z3 - z1; 727 tmp12 -= z2 << 2; 728 729 /* Odd part */ 730 731 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 732 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 733 734 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ 735 tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ 736 tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ 737 738 /* Final output stage */ 739 740 wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); 741 wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); 742 wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); 743 wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); 744 wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS); 745 } 746 747 /* Pass 2: process 5 rows from work array, store into output array. */ 748 749 wsptr = workspace; 750 for (ctr = 0; ctr < 5; ctr++) { 751 outptr = output_buf[ctr] + output_col; 752 753 /* Even part */ 754 755 /* Add fudge factor here for final descale. */ 756 tmp12 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); 757 tmp12 <<= CONST_BITS; 758 tmp0 = (INT32) wsptr[2]; 759 tmp1 = (INT32) wsptr[4]; 760 z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */ 761 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */ 762 z3 = tmp12 + z2; 763 tmp10 = z3 + z1; 764 tmp11 = z3 - z1; 765 tmp12 -= z2 << 2; 766 767 /* Odd part */ 768 769 z2 = (INT32) wsptr[1]; 770 z3 = (INT32) wsptr[3]; 771 772 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ 773 tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ 774 tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ 775 776 /* Final output stage */ 777 778 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 779 CONST_BITS+PASS1_BITS+3) 780 & RANGE_MASK]; 781 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 782 CONST_BITS+PASS1_BITS+3) 783 & RANGE_MASK]; 784 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, 785 CONST_BITS+PASS1_BITS+3) 786 & RANGE_MASK]; 787 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, 788 CONST_BITS+PASS1_BITS+3) 789 & RANGE_MASK]; 790 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12, 791 CONST_BITS+PASS1_BITS+3) 792 & RANGE_MASK]; 793 794 wsptr += 5; /* advance pointer to next row */ 795 } 796 } 797 798 799 /* 800 * Perform dequantization and inverse DCT on one block of coefficients, 801 * producing a reduced-size 3x3 output block. 802 * 803 * Optimized algorithm with 2 multiplications in the 1-D kernel. 804 * cK represents sqrt(2) * cos(K*pi/6). 805 */ 806 807 GLOBAL(void) 808 jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 809 JCOEFPTR coef_block, 810 JSAMPARRAY output_buf, JDIMENSION output_col) 811 { 812 INT32 tmp0, tmp2, tmp10, tmp12; 813 JCOEFPTR inptr; 814 ISLOW_MULT_TYPE * quantptr; 815 int * wsptr; 816 JSAMPROW outptr; 817 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 818 int ctr; 819 int workspace[3*3]; /* buffers data between passes */ 820 SHIFT_TEMPS 821 822 /* Pass 1: process columns from input, store into work array. */ 823 824 inptr = coef_block; 825 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 826 wsptr = workspace; 827 for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) { 828 /* Even part */ 829 830 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 831 tmp0 <<= CONST_BITS; 832 /* Add fudge factor here for final descale. */ 833 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); 834 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 835 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ 836 tmp10 = tmp0 + tmp12; 837 tmp2 = tmp0 - tmp12 - tmp12; 838 839 /* Odd part */ 840 841 tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 842 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ 843 844 /* Final output stage */ 845 846 wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); 847 wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); 848 wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS); 849 } 850 851 /* Pass 2: process 3 rows from work array, store into output array. */ 852 853 wsptr = workspace; 854 for (ctr = 0; ctr < 3; ctr++) { 855 outptr = output_buf[ctr] + output_col; 856 857 /* Even part */ 858 859 /* Add fudge factor here for final descale. */ 860 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); 861 tmp0 <<= CONST_BITS; 862 tmp2 = (INT32) wsptr[2]; 863 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ 864 tmp10 = tmp0 + tmp12; 865 tmp2 = tmp0 - tmp12 - tmp12; 866 867 /* Odd part */ 868 869 tmp12 = (INT32) wsptr[1]; 870 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ 871 872 /* Final output stage */ 873 874 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 875 CONST_BITS+PASS1_BITS+3) 876 & RANGE_MASK]; 877 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 878 CONST_BITS+PASS1_BITS+3) 879 & RANGE_MASK]; 880 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2, 881 CONST_BITS+PASS1_BITS+3) 882 & RANGE_MASK]; 883 884 wsptr += 3; /* advance pointer to next row */ 885 } 886 } 887 888 889 /* 890 * Perform dequantization and inverse DCT on one block of coefficients, 891 * producing a 9x9 output block. 892 * 893 * Optimized algorithm with 10 multiplications in the 1-D kernel. 894 * cK represents sqrt(2) * cos(K*pi/18). 895 */ 896 897 GLOBAL(void) 898 jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 899 JCOEFPTR coef_block, 900 JSAMPARRAY output_buf, JDIMENSION output_col) 901 { 902 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14; 903 INT32 z1, z2, z3, z4; 904 JCOEFPTR inptr; 905 ISLOW_MULT_TYPE * quantptr; 906 int * wsptr; 907 JSAMPROW outptr; 908 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 909 int ctr; 910 int workspace[8*9]; /* buffers data between passes */ 911 SHIFT_TEMPS 912 913 /* Pass 1: process columns from input, store into work array. */ 914 915 inptr = coef_block; 916 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 917 wsptr = workspace; 918 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 919 /* Even part */ 920 921 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 922 tmp0 <<= CONST_BITS; 923 /* Add fudge factor here for final descale. */ 924 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); 925 926 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 927 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 928 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 929 930 tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */ 931 tmp1 = tmp0 + tmp3; 932 tmp2 = tmp0 - tmp3 - tmp3; 933 934 tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */ 935 tmp11 = tmp2 + tmp0; 936 tmp14 = tmp2 - tmp0 - tmp0; 937 938 tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */ 939 tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */ 940 tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */ 941 942 tmp10 = tmp1 + tmp0 - tmp3; 943 tmp12 = tmp1 - tmp0 + tmp2; 944 tmp13 = tmp1 - tmp2 + tmp3; 945 946 /* Odd part */ 947 948 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 949 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 950 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 951 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 952 953 z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */ 954 955 tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */ 956 tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */ 957 tmp0 = tmp2 + tmp3 - z2; 958 tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */ 959 tmp2 += z2 - tmp1; 960 tmp3 += z2 + tmp1; 961 tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */ 962 963 /* Final output stage */ 964 965 wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); 966 wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); 967 wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); 968 wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); 969 wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); 970 wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); 971 wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS); 972 wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS); 973 wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS); 974 } 975 976 /* Pass 2: process 9 rows from work array, store into output array. */ 977 978 wsptr = workspace; 979 for (ctr = 0; ctr < 9; ctr++) { 980 outptr = output_buf[ctr] + output_col; 981 982 /* Even part */ 983 984 /* Add fudge factor here for final descale. */ 985 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); 986 tmp0 <<= CONST_BITS; 987 988 z1 = (INT32) wsptr[2]; 989 z2 = (INT32) wsptr[4]; 990 z3 = (INT32) wsptr[6]; 991 992 tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */ 993 tmp1 = tmp0 + tmp3; 994 tmp2 = tmp0 - tmp3 - tmp3; 995 996 tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */ 997 tmp11 = tmp2 + tmp0; 998 tmp14 = tmp2 - tmp0 - tmp0; 999 1000 tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */ 1001 tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */ 1002 tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */ 1003 1004 tmp10 = tmp1 + tmp0 - tmp3; 1005 tmp12 = tmp1 - tmp0 + tmp2; 1006 tmp13 = tmp1 - tmp2 + tmp3; 1007 1008 /* Odd part */ 1009 1010 z1 = (INT32) wsptr[1]; 1011 z2 = (INT32) wsptr[3]; 1012 z3 = (INT32) wsptr[5]; 1013 z4 = (INT32) wsptr[7]; 1014 1015 z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */ 1016 1017 tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */ 1018 tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */ 1019 tmp0 = tmp2 + tmp3 - z2; 1020 tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */ 1021 tmp2 += z2 - tmp1; 1022 tmp3 += z2 + tmp1; 1023 tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */ 1024 1025 /* Final output stage */ 1026 1027 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 1028 CONST_BITS+PASS1_BITS+3) 1029 & RANGE_MASK]; 1030 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 1031 CONST_BITS+PASS1_BITS+3) 1032 & RANGE_MASK]; 1033 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, 1034 CONST_BITS+PASS1_BITS+3) 1035 & RANGE_MASK]; 1036 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, 1037 CONST_BITS+PASS1_BITS+3) 1038 & RANGE_MASK]; 1039 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, 1040 CONST_BITS+PASS1_BITS+3) 1041 & RANGE_MASK]; 1042 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, 1043 CONST_BITS+PASS1_BITS+3) 1044 & RANGE_MASK]; 1045 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3, 1046 CONST_BITS+PASS1_BITS+3) 1047 & RANGE_MASK]; 1048 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3, 1049 CONST_BITS+PASS1_BITS+3) 1050 & RANGE_MASK]; 1051 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14, 1052 CONST_BITS+PASS1_BITS+3) 1053 & RANGE_MASK]; 1054 1055 wsptr += 8; /* advance pointer to next row */ 1056 } 1057 } 1058 1059 1060 /* 1061 * Perform dequantization and inverse DCT on one block of coefficients, 1062 * producing a 10x10 output block. 1063 * 1064 * Optimized algorithm with 12 multiplications in the 1-D kernel. 1065 * cK represents sqrt(2) * cos(K*pi/20). 1066 */ 1067 1068 GLOBAL(void) 1069 jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1070 JCOEFPTR coef_block, 1071 JSAMPARRAY output_buf, JDIMENSION output_col) 1072 { 1073 INT32 tmp10, tmp11, tmp12, tmp13, tmp14; 1074 INT32 tmp20, tmp21, tmp22, tmp23, tmp24; 1075 INT32 z1, z2, z3, z4, z5; 1076 JCOEFPTR inptr; 1077 ISLOW_MULT_TYPE * quantptr; 1078 int * wsptr; 1079 JSAMPROW outptr; 1080 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1081 int ctr; 1082 int workspace[8*10]; /* buffers data between passes */ 1083 SHIFT_TEMPS 1084 1085 /* Pass 1: process columns from input, store into work array. */ 1086 1087 inptr = coef_block; 1088 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1089 wsptr = workspace; 1090 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 1091 /* Even part */ 1092 1093 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1094 z3 <<= CONST_BITS; 1095 /* Add fudge factor here for final descale. */ 1096 z3 += ONE << (CONST_BITS-PASS1_BITS-1); 1097 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 1098 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ 1099 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ 1100 tmp10 = z3 + z1; 1101 tmp11 = z3 - z2; 1102 1103 tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */ 1104 CONST_BITS-PASS1_BITS); 1105 1106 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 1107 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 1108 1109 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ 1110 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ 1111 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ 1112 1113 tmp20 = tmp10 + tmp12; 1114 tmp24 = tmp10 - tmp12; 1115 tmp21 = tmp11 + tmp13; 1116 tmp23 = tmp11 - tmp13; 1117 1118 /* Odd part */ 1119 1120 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 1121 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 1122 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 1123 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 1124 1125 tmp11 = z2 + z4; 1126 tmp13 = z2 - z4; 1127 1128 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ 1129 z5 = z3 << CONST_BITS; 1130 1131 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ 1132 z4 = z5 + tmp12; 1133 1134 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ 1135 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ 1136 1137 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ 1138 z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1)); 1139 1140 tmp12 = (z1 - tmp13 - z3) << PASS1_BITS; 1141 1142 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ 1143 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ 1144 1145 /* Final output stage */ 1146 1147 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 1148 wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 1149 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 1150 wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 1151 wsptr[8*2] = (int) (tmp22 + tmp12); 1152 wsptr[8*7] = (int) (tmp22 - tmp12); 1153 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); 1154 wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); 1155 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 1156 wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 1157 } 1158 1159 /* Pass 2: process 10 rows from work array, store into output array. */ 1160 1161 wsptr = workspace; 1162 for (ctr = 0; ctr < 10; ctr++) { 1163 outptr = output_buf[ctr] + output_col; 1164 1165 /* Even part */ 1166 1167 /* Add fudge factor here for final descale. */ 1168 z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); 1169 z3 <<= CONST_BITS; 1170 z4 = (INT32) wsptr[4]; 1171 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ 1172 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ 1173 tmp10 = z3 + z1; 1174 tmp11 = z3 - z2; 1175 1176 tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */ 1177 1178 z2 = (INT32) wsptr[2]; 1179 z3 = (INT32) wsptr[6]; 1180 1181 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ 1182 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ 1183 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ 1184 1185 tmp20 = tmp10 + tmp12; 1186 tmp24 = tmp10 - tmp12; 1187 tmp21 = tmp11 + tmp13; 1188 tmp23 = tmp11 - tmp13; 1189 1190 /* Odd part */ 1191 1192 z1 = (INT32) wsptr[1]; 1193 z2 = (INT32) wsptr[3]; 1194 z3 = (INT32) wsptr[5]; 1195 z3 <<= CONST_BITS; 1196 z4 = (INT32) wsptr[7]; 1197 1198 tmp11 = z2 + z4; 1199 tmp13 = z2 - z4; 1200 1201 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ 1202 1203 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ 1204 z4 = z3 + tmp12; 1205 1206 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ 1207 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ 1208 1209 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ 1210 z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1)); 1211 1212 tmp12 = ((z1 - tmp13) << CONST_BITS) - z3; 1213 1214 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ 1215 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ 1216 1217 /* Final output stage */ 1218 1219 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 1220 CONST_BITS+PASS1_BITS+3) 1221 & RANGE_MASK]; 1222 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 1223 CONST_BITS+PASS1_BITS+3) 1224 & RANGE_MASK]; 1225 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 1226 CONST_BITS+PASS1_BITS+3) 1227 & RANGE_MASK]; 1228 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 1229 CONST_BITS+PASS1_BITS+3) 1230 & RANGE_MASK]; 1231 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 1232 CONST_BITS+PASS1_BITS+3) 1233 & RANGE_MASK]; 1234 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 1235 CONST_BITS+PASS1_BITS+3) 1236 & RANGE_MASK]; 1237 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 1238 CONST_BITS+PASS1_BITS+3) 1239 & RANGE_MASK]; 1240 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 1241 CONST_BITS+PASS1_BITS+3) 1242 & RANGE_MASK]; 1243 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 1244 CONST_BITS+PASS1_BITS+3) 1245 & RANGE_MASK]; 1246 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 1247 CONST_BITS+PASS1_BITS+3) 1248 & RANGE_MASK]; 1249 1250 wsptr += 8; /* advance pointer to next row */ 1251 } 1252 } 1253 1254 1255 /* 1256 * Perform dequantization and inverse DCT on one block of coefficients, 1257 * producing a 11x11 output block. 1258 * 1259 * Optimized algorithm with 24 multiplications in the 1-D kernel. 1260 * cK represents sqrt(2) * cos(K*pi/22). 1261 */ 1262 1263 GLOBAL(void) 1264 jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1265 JCOEFPTR coef_block, 1266 JSAMPARRAY output_buf, JDIMENSION output_col) 1267 { 1268 INT32 tmp10, tmp11, tmp12, tmp13, tmp14; 1269 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; 1270 INT32 z1, z2, z3, z4; 1271 JCOEFPTR inptr; 1272 ISLOW_MULT_TYPE * quantptr; 1273 int * wsptr; 1274 JSAMPROW outptr; 1275 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1276 int ctr; 1277 int workspace[8*11]; /* buffers data between passes */ 1278 SHIFT_TEMPS 1279 1280 /* Pass 1: process columns from input, store into work array. */ 1281 1282 inptr = coef_block; 1283 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1284 wsptr = workspace; 1285 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 1286 /* Even part */ 1287 1288 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1289 tmp10 <<= CONST_BITS; 1290 /* Add fudge factor here for final descale. */ 1291 tmp10 += ONE << (CONST_BITS-PASS1_BITS-1); 1292 1293 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 1294 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 1295 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 1296 1297 tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */ 1298 tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */ 1299 z4 = z1 + z3; 1300 tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */ 1301 z4 -= z2; 1302 tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */ 1303 tmp21 = tmp20 + tmp23 + tmp25 - 1304 MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */ 1305 tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */ 1306 tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */ 1307 tmp24 += tmp25; 1308 tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */ 1309 tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */ 1310 MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */ 1311 tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */ 1312 1313 /* Odd part */ 1314 1315 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 1316 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 1317 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 1318 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 1319 1320 tmp11 = z1 + z2; 1321 tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */ 1322 tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */ 1323 tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */ 1324 tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */ 1325 tmp10 = tmp11 + tmp12 + tmp13 - 1326 MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */ 1327 z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */ 1328 tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */ 1329 tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */ 1330 z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */ 1331 tmp11 += z1; 1332 tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */ 1333 tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */ 1334 MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */ 1335 MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */ 1336 1337 /* Final output stage */ 1338 1339 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 1340 wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 1341 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 1342 wsptr[8*9] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 1343 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 1344 wsptr[8*8] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 1345 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); 1346 wsptr[8*7] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); 1347 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 1348 wsptr[8*6] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 1349 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS); 1350 } 1351 1352 /* Pass 2: process 11 rows from work array, store into output array. */ 1353 1354 wsptr = workspace; 1355 for (ctr = 0; ctr < 11; ctr++) { 1356 outptr = output_buf[ctr] + output_col; 1357 1358 /* Even part */ 1359 1360 /* Add fudge factor here for final descale. */ 1361 tmp10 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); 1362 tmp10 <<= CONST_BITS; 1363 1364 z1 = (INT32) wsptr[2]; 1365 z2 = (INT32) wsptr[4]; 1366 z3 = (INT32) wsptr[6]; 1367 1368 tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */ 1369 tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */ 1370 z4 = z1 + z3; 1371 tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */ 1372 z4 -= z2; 1373 tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */ 1374 tmp21 = tmp20 + tmp23 + tmp25 - 1375 MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */ 1376 tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */ 1377 tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */ 1378 tmp24 += tmp25; 1379 tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */ 1380 tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */ 1381 MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */ 1382 tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */ 1383 1384 /* Odd part */ 1385 1386 z1 = (INT32) wsptr[1]; 1387 z2 = (INT32) wsptr[3]; 1388 z3 = (INT32) wsptr[5]; 1389 z4 = (INT32) wsptr[7]; 1390 1391 tmp11 = z1 + z2; 1392 tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */ 1393 tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */ 1394 tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */ 1395 tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */ 1396 tmp10 = tmp11 + tmp12 + tmp13 - 1397 MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */ 1398 z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */ 1399 tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */ 1400 tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */ 1401 z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */ 1402 tmp11 += z1; 1403 tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */ 1404 tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */ 1405 MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */ 1406 MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */ 1407 1408 /* Final output stage */ 1409 1410 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 1411 CONST_BITS+PASS1_BITS+3) 1412 & RANGE_MASK]; 1413 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 1414 CONST_BITS+PASS1_BITS+3) 1415 & RANGE_MASK]; 1416 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 1417 CONST_BITS+PASS1_BITS+3) 1418 & RANGE_MASK]; 1419 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 1420 CONST_BITS+PASS1_BITS+3) 1421 & RANGE_MASK]; 1422 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 1423 CONST_BITS+PASS1_BITS+3) 1424 & RANGE_MASK]; 1425 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 1426 CONST_BITS+PASS1_BITS+3) 1427 & RANGE_MASK]; 1428 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 1429 CONST_BITS+PASS1_BITS+3) 1430 & RANGE_MASK]; 1431 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 1432 CONST_BITS+PASS1_BITS+3) 1433 & RANGE_MASK]; 1434 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 1435 CONST_BITS+PASS1_BITS+3) 1436 & RANGE_MASK]; 1437 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 1438 CONST_BITS+PASS1_BITS+3) 1439 & RANGE_MASK]; 1440 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25, 1441 CONST_BITS+PASS1_BITS+3) 1442 & RANGE_MASK]; 1443 1444 wsptr += 8; /* advance pointer to next row */ 1445 } 1446 } 1447 1448 1449 /* 1450 * Perform dequantization and inverse DCT on one block of coefficients, 1451 * producing a 12x12 output block. 1452 * 1453 * Optimized algorithm with 15 multiplications in the 1-D kernel. 1454 * cK represents sqrt(2) * cos(K*pi/24). 1455 */ 1456 1457 GLOBAL(void) 1458 jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1459 JCOEFPTR coef_block, 1460 JSAMPARRAY output_buf, JDIMENSION output_col) 1461 { 1462 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; 1463 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; 1464 INT32 z1, z2, z3, z4; 1465 JCOEFPTR inptr; 1466 ISLOW_MULT_TYPE * quantptr; 1467 int * wsptr; 1468 JSAMPROW outptr; 1469 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1470 int ctr; 1471 int workspace[8*12]; /* buffers data between passes */ 1472 SHIFT_TEMPS 1473 1474 /* Pass 1: process columns from input, store into work array. */ 1475 1476 inptr = coef_block; 1477 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1478 wsptr = workspace; 1479 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 1480 /* Even part */ 1481 1482 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1483 z3 <<= CONST_BITS; 1484 /* Add fudge factor here for final descale. */ 1485 z3 += ONE << (CONST_BITS-PASS1_BITS-1); 1486 1487 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 1488 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ 1489 1490 tmp10 = z3 + z4; 1491 tmp11 = z3 - z4; 1492 1493 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 1494 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ 1495 z1 <<= CONST_BITS; 1496 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 1497 z2 <<= CONST_BITS; 1498 1499 tmp12 = z1 - z2; 1500 1501 tmp21 = z3 + tmp12; 1502 tmp24 = z3 - tmp12; 1503 1504 tmp12 = z4 + z2; 1505 1506 tmp20 = tmp10 + tmp12; 1507 tmp25 = tmp10 - tmp12; 1508 1509 tmp12 = z4 - z1 - z2; 1510 1511 tmp22 = tmp11 + tmp12; 1512 tmp23 = tmp11 - tmp12; 1513 1514 /* Odd part */ 1515 1516 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 1517 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 1518 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 1519 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 1520 1521 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ 1522 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ 1523 1524 tmp10 = z1 + z3; 1525 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ 1526 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ 1527 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ 1528 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ 1529 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ 1530 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ 1531 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ 1532 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ 1533 1534 z1 -= z4; 1535 z2 -= z3; 1536 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ 1537 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ 1538 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ 1539 1540 /* Final output stage */ 1541 1542 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 1543 wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 1544 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 1545 wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 1546 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 1547 wsptr[8*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 1548 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); 1549 wsptr[8*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); 1550 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 1551 wsptr[8*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 1552 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); 1553 wsptr[8*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); 1554 } 1555 1556 /* Pass 2: process 12 rows from work array, store into output array. */ 1557 1558 wsptr = workspace; 1559 for (ctr = 0; ctr < 12; ctr++) { 1560 outptr = output_buf[ctr] + output_col; 1561 1562 /* Even part */ 1563 1564 /* Add fudge factor here for final descale. */ 1565 z3 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); 1566 z3 <<= CONST_BITS; 1567 1568 z4 = (INT32) wsptr[4]; 1569 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ 1570 1571 tmp10 = z3 + z4; 1572 tmp11 = z3 - z4; 1573 1574 z1 = (INT32) wsptr[2]; 1575 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ 1576 z1 <<= CONST_BITS; 1577 z2 = (INT32) wsptr[6]; 1578 z2 <<= CONST_BITS; 1579 1580 tmp12 = z1 - z2; 1581 1582 tmp21 = z3 + tmp12; 1583 tmp24 = z3 - tmp12; 1584 1585 tmp12 = z4 + z2; 1586 1587 tmp20 = tmp10 + tmp12; 1588 tmp25 = tmp10 - tmp12; 1589 1590 tmp12 = z4 - z1 - z2; 1591 1592 tmp22 = tmp11 + tmp12; 1593 tmp23 = tmp11 - tmp12; 1594 1595 /* Odd part */ 1596 1597 z1 = (INT32) wsptr[1]; 1598 z2 = (INT32) wsptr[3]; 1599 z3 = (INT32) wsptr[5]; 1600 z4 = (INT32) wsptr[7]; 1601 1602 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ 1603 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ 1604 1605 tmp10 = z1 + z3; 1606 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ 1607 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ 1608 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ 1609 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ 1610 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ 1611 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ 1612 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ 1613 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ 1614 1615 z1 -= z4; 1616 z2 -= z3; 1617 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ 1618 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ 1619 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ 1620 1621 /* Final output stage */ 1622 1623 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 1624 CONST_BITS+PASS1_BITS+3) 1625 & RANGE_MASK]; 1626 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 1627 CONST_BITS+PASS1_BITS+3) 1628 & RANGE_MASK]; 1629 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 1630 CONST_BITS+PASS1_BITS+3) 1631 & RANGE_MASK]; 1632 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 1633 CONST_BITS+PASS1_BITS+3) 1634 & RANGE_MASK]; 1635 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 1636 CONST_BITS+PASS1_BITS+3) 1637 & RANGE_MASK]; 1638 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 1639 CONST_BITS+PASS1_BITS+3) 1640 & RANGE_MASK]; 1641 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 1642 CONST_BITS+PASS1_BITS+3) 1643 & RANGE_MASK]; 1644 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 1645 CONST_BITS+PASS1_BITS+3) 1646 & RANGE_MASK]; 1647 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 1648 CONST_BITS+PASS1_BITS+3) 1649 & RANGE_MASK]; 1650 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 1651 CONST_BITS+PASS1_BITS+3) 1652 & RANGE_MASK]; 1653 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, 1654 CONST_BITS+PASS1_BITS+3) 1655 & RANGE_MASK]; 1656 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, 1657 CONST_BITS+PASS1_BITS+3) 1658 & RANGE_MASK]; 1659 1660 wsptr += 8; /* advance pointer to next row */ 1661 } 1662 } 1663 1664 1665 /* 1666 * Perform dequantization and inverse DCT on one block of coefficients, 1667 * producing a 13x13 output block. 1668 * 1669 * Optimized algorithm with 29 multiplications in the 1-D kernel. 1670 * cK represents sqrt(2) * cos(K*pi/26). 1671 */ 1672 1673 GLOBAL(void) 1674 jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1675 JCOEFPTR coef_block, 1676 JSAMPARRAY output_buf, JDIMENSION output_col) 1677 { 1678 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; 1679 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; 1680 INT32 z1, z2, z3, z4; 1681 JCOEFPTR inptr; 1682 ISLOW_MULT_TYPE * quantptr; 1683 int * wsptr; 1684 JSAMPROW outptr; 1685 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1686 int ctr; 1687 int workspace[8*13]; /* buffers data between passes */ 1688 SHIFT_TEMPS 1689 1690 /* Pass 1: process columns from input, store into work array. */ 1691 1692 inptr = coef_block; 1693 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1694 wsptr = workspace; 1695 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 1696 /* Even part */ 1697 1698 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1699 z1 <<= CONST_BITS; 1700 /* Add fudge factor here for final descale. */ 1701 z1 += ONE << (CONST_BITS-PASS1_BITS-1); 1702 1703 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 1704 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 1705 z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 1706 1707 tmp10 = z3 + z4; 1708 tmp11 = z3 - z4; 1709 1710 tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */ 1711 tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */ 1712 1713 tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */ 1714 tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */ 1715 1716 tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */ 1717 tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */ 1718 1719 tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */ 1720 tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */ 1721 1722 tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */ 1723 tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */ 1724 1725 tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */ 1726 tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */ 1727 1728 tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */ 1729 1730 /* Odd part */ 1731 1732 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 1733 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 1734 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 1735 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 1736 1737 tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */ 1738 tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */ 1739 tmp15 = z1 + z4; 1740 tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */ 1741 tmp10 = tmp11 + tmp12 + tmp13 - 1742 MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */ 1743 tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */ 1744 tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */ 1745 tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */ 1746 tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */ 1747 tmp11 += tmp14; 1748 tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */ 1749 tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */ 1750 tmp12 += tmp14; 1751 tmp13 += tmp14; 1752 tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */ 1753 tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */ 1754 MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */ 1755 z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */ 1756 tmp14 += z1; 1757 tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */ 1758 MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */ 1759 1760 /* Final output stage */ 1761 1762 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 1763 wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 1764 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 1765 wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 1766 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 1767 wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 1768 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); 1769 wsptr[8*9] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); 1770 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 1771 wsptr[8*8] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 1772 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); 1773 wsptr[8*7] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); 1774 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS); 1775 } 1776 1777 /* Pass 2: process 13 rows from work array, store into output array. */ 1778 1779 wsptr = workspace; 1780 for (ctr = 0; ctr < 13; ctr++) { 1781 outptr = output_buf[ctr] + output_col; 1782 1783 /* Even part */ 1784 1785 /* Add fudge factor here for final descale. */ 1786 z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); 1787 z1 <<= CONST_BITS; 1788 1789 z2 = (INT32) wsptr[2]; 1790 z3 = (INT32) wsptr[4]; 1791 z4 = (INT32) wsptr[6]; 1792 1793 tmp10 = z3 + z4; 1794 tmp11 = z3 - z4; 1795 1796 tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */ 1797 tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */ 1798 1799 tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */ 1800 tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */ 1801 1802 tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */ 1803 tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */ 1804 1805 tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */ 1806 tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */ 1807 1808 tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */ 1809 tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */ 1810 1811 tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */ 1812 tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */ 1813 1814 tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */ 1815 1816 /* Odd part */ 1817 1818 z1 = (INT32) wsptr[1]; 1819 z2 = (INT32) wsptr[3]; 1820 z3 = (INT32) wsptr[5]; 1821 z4 = (INT32) wsptr[7]; 1822 1823 tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */ 1824 tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */ 1825 tmp15 = z1 + z4; 1826 tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */ 1827 tmp10 = tmp11 + tmp12 + tmp13 - 1828 MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */ 1829 tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */ 1830 tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */ 1831 tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */ 1832 tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */ 1833 tmp11 += tmp14; 1834 tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */ 1835 tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */ 1836 tmp12 += tmp14; 1837 tmp13 += tmp14; 1838 tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */ 1839 tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */ 1840 MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */ 1841 z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */ 1842 tmp14 += z1; 1843 tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */ 1844 MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */ 1845 1846 /* Final output stage */ 1847 1848 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 1849 CONST_BITS+PASS1_BITS+3) 1850 & RANGE_MASK]; 1851 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 1852 CONST_BITS+PASS1_BITS+3) 1853 & RANGE_MASK]; 1854 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 1855 CONST_BITS+PASS1_BITS+3) 1856 & RANGE_MASK]; 1857 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 1858 CONST_BITS+PASS1_BITS+3) 1859 & RANGE_MASK]; 1860 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 1861 CONST_BITS+PASS1_BITS+3) 1862 & RANGE_MASK]; 1863 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 1864 CONST_BITS+PASS1_BITS+3) 1865 & RANGE_MASK]; 1866 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 1867 CONST_BITS+PASS1_BITS+3) 1868 & RANGE_MASK]; 1869 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 1870 CONST_BITS+PASS1_BITS+3) 1871 & RANGE_MASK]; 1872 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 1873 CONST_BITS+PASS1_BITS+3) 1874 & RANGE_MASK]; 1875 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 1876 CONST_BITS+PASS1_BITS+3) 1877 & RANGE_MASK]; 1878 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, 1879 CONST_BITS+PASS1_BITS+3) 1880 & RANGE_MASK]; 1881 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, 1882 CONST_BITS+PASS1_BITS+3) 1883 & RANGE_MASK]; 1884 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26, 1885 CONST_BITS+PASS1_BITS+3) 1886 & RANGE_MASK]; 1887 1888 wsptr += 8; /* advance pointer to next row */ 1889 } 1890 } 1891 1892 1893 /* 1894 * Perform dequantization and inverse DCT on one block of coefficients, 1895 * producing a 14x14 output block. 1896 * 1897 * Optimized algorithm with 20 multiplications in the 1-D kernel. 1898 * cK represents sqrt(2) * cos(K*pi/28). 1899 */ 1900 1901 GLOBAL(void) 1902 jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 1903 JCOEFPTR coef_block, 1904 JSAMPARRAY output_buf, JDIMENSION output_col) 1905 { 1906 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; 1907 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; 1908 INT32 z1, z2, z3, z4; 1909 JCOEFPTR inptr; 1910 ISLOW_MULT_TYPE * quantptr; 1911 int * wsptr; 1912 JSAMPROW outptr; 1913 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 1914 int ctr; 1915 int workspace[8*14]; /* buffers data between passes */ 1916 SHIFT_TEMPS 1917 1918 /* Pass 1: process columns from input, store into work array. */ 1919 1920 inptr = coef_block; 1921 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 1922 wsptr = workspace; 1923 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 1924 /* Even part */ 1925 1926 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 1927 z1 <<= CONST_BITS; 1928 /* Add fudge factor here for final descale. */ 1929 z1 += ONE << (CONST_BITS-PASS1_BITS-1); 1930 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 1931 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ 1932 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ 1933 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ 1934 1935 tmp10 = z1 + z2; 1936 tmp11 = z1 + z3; 1937 tmp12 = z1 - z4; 1938 1939 tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */ 1940 CONST_BITS-PASS1_BITS); 1941 1942 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 1943 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 1944 1945 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ 1946 1947 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ 1948 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ 1949 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ 1950 MULTIPLY(z2, FIX(1.378756276)); /* c2 */ 1951 1952 tmp20 = tmp10 + tmp13; 1953 tmp26 = tmp10 - tmp13; 1954 tmp21 = tmp11 + tmp14; 1955 tmp25 = tmp11 - tmp14; 1956 tmp22 = tmp12 + tmp15; 1957 tmp24 = tmp12 - tmp15; 1958 1959 /* Odd part */ 1960 1961 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 1962 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 1963 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 1964 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 1965 tmp13 = z4 << CONST_BITS; 1966 1967 tmp14 = z1 + z3; 1968 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ 1969 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ 1970 tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ 1971 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ 1972 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ 1973 z1 -= z2; 1974 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */ 1975 tmp16 += tmp15; 1976 z1 += z4; 1977 z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */ 1978 tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ 1979 tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ 1980 z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ 1981 tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ 1982 tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ 1983 1984 tmp13 = (z1 - z3) << PASS1_BITS; 1985 1986 /* Final output stage */ 1987 1988 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 1989 wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 1990 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 1991 wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 1992 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 1993 wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 1994 wsptr[8*3] = (int) (tmp23 + tmp13); 1995 wsptr[8*10] = (int) (tmp23 - tmp13); 1996 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 1997 wsptr[8*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 1998 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); 1999 wsptr[8*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); 2000 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); 2001 wsptr[8*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); 2002 } 2003 2004 /* Pass 2: process 14 rows from work array, store into output array. */ 2005 2006 wsptr = workspace; 2007 for (ctr = 0; ctr < 14; ctr++) { 2008 outptr = output_buf[ctr] + output_col; 2009 2010 /* Even part */ 2011 2012 /* Add fudge factor here for final descale. */ 2013 z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); 2014 z1 <<= CONST_BITS; 2015 z4 = (INT32) wsptr[4]; 2016 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ 2017 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ 2018 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ 2019 2020 tmp10 = z1 + z2; 2021 tmp11 = z1 + z3; 2022 tmp12 = z1 - z4; 2023 2024 tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */ 2025 2026 z1 = (INT32) wsptr[2]; 2027 z2 = (INT32) wsptr[6]; 2028 2029 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ 2030 2031 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ 2032 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ 2033 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ 2034 MULTIPLY(z2, FIX(1.378756276)); /* c2 */ 2035 2036 tmp20 = tmp10 + tmp13; 2037 tmp26 = tmp10 - tmp13; 2038 tmp21 = tmp11 + tmp14; 2039 tmp25 = tmp11 - tmp14; 2040 tmp22 = tmp12 + tmp15; 2041 tmp24 = tmp12 - tmp15; 2042 2043 /* Odd part */ 2044 2045 z1 = (INT32) wsptr[1]; 2046 z2 = (INT32) wsptr[3]; 2047 z3 = (INT32) wsptr[5]; 2048 z4 = (INT32) wsptr[7]; 2049 z4 <<= CONST_BITS; 2050 2051 tmp14 = z1 + z3; 2052 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ 2053 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ 2054 tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ 2055 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ 2056 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ 2057 z1 -= z2; 2058 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */ 2059 tmp16 += tmp15; 2060 tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */ 2061 tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ 2062 tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ 2063 tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ 2064 tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ 2065 tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ 2066 2067 tmp13 = ((z1 - z3) << CONST_BITS) + z4; 2068 2069 /* Final output stage */ 2070 2071 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 2072 CONST_BITS+PASS1_BITS+3) 2073 & RANGE_MASK]; 2074 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 2075 CONST_BITS+PASS1_BITS+3) 2076 & RANGE_MASK]; 2077 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 2078 CONST_BITS+PASS1_BITS+3) 2079 & RANGE_MASK]; 2080 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 2081 CONST_BITS+PASS1_BITS+3) 2082 & RANGE_MASK]; 2083 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 2084 CONST_BITS+PASS1_BITS+3) 2085 & RANGE_MASK]; 2086 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 2087 CONST_BITS+PASS1_BITS+3) 2088 & RANGE_MASK]; 2089 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 2090 CONST_BITS+PASS1_BITS+3) 2091 & RANGE_MASK]; 2092 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 2093 CONST_BITS+PASS1_BITS+3) 2094 & RANGE_MASK]; 2095 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 2096 CONST_BITS+PASS1_BITS+3) 2097 & RANGE_MASK]; 2098 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 2099 CONST_BITS+PASS1_BITS+3) 2100 & RANGE_MASK]; 2101 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, 2102 CONST_BITS+PASS1_BITS+3) 2103 & RANGE_MASK]; 2104 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, 2105 CONST_BITS+PASS1_BITS+3) 2106 & RANGE_MASK]; 2107 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, 2108 CONST_BITS+PASS1_BITS+3) 2109 & RANGE_MASK]; 2110 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, 2111 CONST_BITS+PASS1_BITS+3) 2112 & RANGE_MASK]; 2113 2114 wsptr += 8; /* advance pointer to next row */ 2115 } 2116 } 2117 2118 2119 /* 2120 * Perform dequantization and inverse DCT on one block of coefficients, 2121 * producing a 15x15 output block. 2122 * 2123 * Optimized algorithm with 22 multiplications in the 1-D kernel. 2124 * cK represents sqrt(2) * cos(K*pi/30). 2125 */ 2126 2127 GLOBAL(void) 2128 jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 2129 JCOEFPTR coef_block, 2130 JSAMPARRAY output_buf, JDIMENSION output_col) 2131 { 2132 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; 2133 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; 2134 INT32 z1, z2, z3, z4; 2135 JCOEFPTR inptr; 2136 ISLOW_MULT_TYPE * quantptr; 2137 int * wsptr; 2138 JSAMPROW outptr; 2139 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 2140 int ctr; 2141 int workspace[8*15]; /* buffers data between passes */ 2142 SHIFT_TEMPS 2143 2144 /* Pass 1: process columns from input, store into work array. */ 2145 2146 inptr = coef_block; 2147 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 2148 wsptr = workspace; 2149 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 2150 /* Even part */ 2151 2152 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 2153 z1 <<= CONST_BITS; 2154 /* Add fudge factor here for final descale. */ 2155 z1 += ONE << (CONST_BITS-PASS1_BITS-1); 2156 2157 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 2158 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 2159 z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 2160 2161 tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */ 2162 tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */ 2163 2164 tmp12 = z1 - tmp10; 2165 tmp13 = z1 + tmp11; 2166 z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */ 2167 2168 z4 = z2 - z3; 2169 z3 += z2; 2170 tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */ 2171 tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */ 2172 z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */ 2173 2174 tmp20 = tmp13 + tmp10 + tmp11; 2175 tmp23 = tmp12 - tmp10 + tmp11 + z2; 2176 2177 tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */ 2178 tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */ 2179 2180 tmp25 = tmp13 - tmp10 - tmp11; 2181 tmp26 = tmp12 + tmp10 - tmp11 - z2; 2182 2183 tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */ 2184 tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */ 2185 2186 tmp21 = tmp12 + tmp10 + tmp11; 2187 tmp24 = tmp13 - tmp10 + tmp11; 2188 tmp11 += tmp11; 2189 tmp22 = z1 + tmp11; /* c10 = c6-c12 */ 2190 tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */ 2191 2192 /* Odd part */ 2193 2194 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 2195 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 2196 z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 2197 z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */ 2198 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 2199 2200 tmp13 = z2 - z4; 2201 tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */ 2202 tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */ 2203 tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */ 2204 2205 tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */ 2206 tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */ 2207 z2 = z1 - z4; 2208 tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */ 2209 2210 tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */ 2211 tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */ 2212 tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */ 2213 z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */ 2214 tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */ 2215 tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */ 2216 2217 /* Final output stage */ 2218 2219 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 2220 wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 2221 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 2222 wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 2223 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 2224 wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 2225 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); 2226 wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); 2227 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 2228 wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 2229 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); 2230 wsptr[8*9] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); 2231 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); 2232 wsptr[8*8] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); 2233 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS); 2234 } 2235 2236 /* Pass 2: process 15 rows from work array, store into output array. */ 2237 2238 wsptr = workspace; 2239 for (ctr = 0; ctr < 15; ctr++) { 2240 outptr = output_buf[ctr] + output_col; 2241 2242 /* Even part */ 2243 2244 /* Add fudge factor here for final descale. */ 2245 z1 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); 2246 z1 <<= CONST_BITS; 2247 2248 z2 = (INT32) wsptr[2]; 2249 z3 = (INT32) wsptr[4]; 2250 z4 = (INT32) wsptr[6]; 2251 2252 tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */ 2253 tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */ 2254 2255 tmp12 = z1 - tmp10; 2256 tmp13 = z1 + tmp11; 2257 z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */ 2258 2259 z4 = z2 - z3; 2260 z3 += z2; 2261 tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */ 2262 tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */ 2263 z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */ 2264 2265 tmp20 = tmp13 + tmp10 + tmp11; 2266 tmp23 = tmp12 - tmp10 + tmp11 + z2; 2267 2268 tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */ 2269 tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */ 2270 2271 tmp25 = tmp13 - tmp10 - tmp11; 2272 tmp26 = tmp12 + tmp10 - tmp11 - z2; 2273 2274 tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */ 2275 tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */ 2276 2277 tmp21 = tmp12 + tmp10 + tmp11; 2278 tmp24 = tmp13 - tmp10 + tmp11; 2279 tmp11 += tmp11; 2280 tmp22 = z1 + tmp11; /* c10 = c6-c12 */ 2281 tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */ 2282 2283 /* Odd part */ 2284 2285 z1 = (INT32) wsptr[1]; 2286 z2 = (INT32) wsptr[3]; 2287 z4 = (INT32) wsptr[5]; 2288 z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */ 2289 z4 = (INT32) wsptr[7]; 2290 2291 tmp13 = z2 - z4; 2292 tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */ 2293 tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */ 2294 tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */ 2295 2296 tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */ 2297 tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */ 2298 z2 = z1 - z4; 2299 tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */ 2300 2301 tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */ 2302 tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */ 2303 tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */ 2304 z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */ 2305 tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */ 2306 tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */ 2307 2308 /* Final output stage */ 2309 2310 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 2311 CONST_BITS+PASS1_BITS+3) 2312 & RANGE_MASK]; 2313 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 2314 CONST_BITS+PASS1_BITS+3) 2315 & RANGE_MASK]; 2316 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 2317 CONST_BITS+PASS1_BITS+3) 2318 & RANGE_MASK]; 2319 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 2320 CONST_BITS+PASS1_BITS+3) 2321 & RANGE_MASK]; 2322 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 2323 CONST_BITS+PASS1_BITS+3) 2324 & RANGE_MASK]; 2325 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 2326 CONST_BITS+PASS1_BITS+3) 2327 & RANGE_MASK]; 2328 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 2329 CONST_BITS+PASS1_BITS+3) 2330 & RANGE_MASK]; 2331 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 2332 CONST_BITS+PASS1_BITS+3) 2333 & RANGE_MASK]; 2334 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 2335 CONST_BITS+PASS1_BITS+3) 2336 & RANGE_MASK]; 2337 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 2338 CONST_BITS+PASS1_BITS+3) 2339 & RANGE_MASK]; 2340 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, 2341 CONST_BITS+PASS1_BITS+3) 2342 & RANGE_MASK]; 2343 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, 2344 CONST_BITS+PASS1_BITS+3) 2345 & RANGE_MASK]; 2346 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, 2347 CONST_BITS+PASS1_BITS+3) 2348 & RANGE_MASK]; 2349 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, 2350 CONST_BITS+PASS1_BITS+3) 2351 & RANGE_MASK]; 2352 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27, 2353 CONST_BITS+PASS1_BITS+3) 2354 & RANGE_MASK]; 2355 2356 wsptr += 8; /* advance pointer to next row */ 2357 } 2358 } 2359 2360 2361 /* 2362 * Perform dequantization and inverse DCT on one block of coefficients, 2363 * producing a 16x16 output block. 2364 * 2365 * Optimized algorithm with 28 multiplications in the 1-D kernel. 2366 * cK represents sqrt(2) * cos(K*pi/32). 2367 */ 2368 2369 GLOBAL(void) 2370 jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 2371 JCOEFPTR coef_block, 2372 JSAMPARRAY output_buf, JDIMENSION output_col) 2373 { 2374 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13; 2375 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; 2376 INT32 z1, z2, z3, z4; 2377 JCOEFPTR inptr; 2378 ISLOW_MULT_TYPE * quantptr; 2379 int * wsptr; 2380 JSAMPROW outptr; 2381 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 2382 int ctr; 2383 int workspace[8*16]; /* buffers data between passes */ 2384 SHIFT_TEMPS 2385 2386 /* Pass 1: process columns from input, store into work array. */ 2387 2388 inptr = coef_block; 2389 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 2390 wsptr = workspace; 2391 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 2392 /* Even part */ 2393 2394 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 2395 tmp0 <<= CONST_BITS; 2396 /* Add fudge factor here for final descale. */ 2397 tmp0 += 1 << (CONST_BITS-PASS1_BITS-1); 2398 2399 z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 2400 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ 2401 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ 2402 2403 tmp10 = tmp0 + tmp1; 2404 tmp11 = tmp0 - tmp1; 2405 tmp12 = tmp0 + tmp2; 2406 tmp13 = tmp0 - tmp2; 2407 2408 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 2409 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 2410 z3 = z1 - z2; 2411 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ 2412 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ 2413 2414 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ 2415 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ 2416 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ 2417 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ 2418 2419 tmp20 = tmp10 + tmp0; 2420 tmp27 = tmp10 - tmp0; 2421 tmp21 = tmp12 + tmp1; 2422 tmp26 = tmp12 - tmp1; 2423 tmp22 = tmp13 + tmp2; 2424 tmp25 = tmp13 - tmp2; 2425 tmp23 = tmp11 + tmp3; 2426 tmp24 = tmp11 - tmp3; 2427 2428 /* Odd part */ 2429 2430 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 2431 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 2432 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 2433 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 2434 2435 tmp11 = z1 + z3; 2436 2437 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ 2438 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ 2439 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ 2440 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ 2441 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ 2442 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ 2443 tmp0 = tmp1 + tmp2 + tmp3 - 2444 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ 2445 tmp13 = tmp10 + tmp11 + tmp12 - 2446 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ 2447 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ 2448 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ 2449 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ 2450 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ 2451 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ 2452 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ 2453 z2 += z4; 2454 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ 2455 tmp1 += z1; 2456 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ 2457 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ 2458 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ 2459 tmp12 += z2; 2460 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ 2461 tmp2 += z2; 2462 tmp3 += z2; 2463 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ 2464 tmp10 += z2; 2465 tmp11 += z2; 2466 2467 /* Final output stage */ 2468 2469 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS); 2470 wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS); 2471 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS); 2472 wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS); 2473 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS); 2474 wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS); 2475 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS); 2476 wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS); 2477 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS); 2478 wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS); 2479 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS); 2480 wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS); 2481 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS); 2482 wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS); 2483 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS); 2484 wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS); 2485 } 2486 2487 /* Pass 2: process 16 rows from work array, store into output array. */ 2488 2489 wsptr = workspace; 2490 for (ctr = 0; ctr < 16; ctr++) { 2491 outptr = output_buf[ctr] + output_col; 2492 2493 /* Even part */ 2494 2495 /* Add fudge factor here for final descale. */ 2496 tmp0 = (INT32) wsptr[0] + (ONE << (PASS1_BITS+2)); 2497 tmp0 <<= CONST_BITS; 2498 2499 z1 = (INT32) wsptr[4]; 2500 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ 2501 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ 2502 2503 tmp10 = tmp0 + tmp1; 2504 tmp11 = tmp0 - tmp1; 2505 tmp12 = tmp0 + tmp2; 2506 tmp13 = tmp0 - tmp2; 2507 2508 z1 = (INT32) wsptr[2]; 2509 z2 = (INT32) wsptr[6]; 2510 z3 = z1 - z2; 2511 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ 2512 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ 2513 2514 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ 2515 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ 2516 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ 2517 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ 2518 2519 tmp20 = tmp10 + tmp0; 2520 tmp27 = tmp10 - tmp0; 2521 tmp21 = tmp12 + tmp1; 2522 tmp26 = tmp12 - tmp1; 2523 tmp22 = tmp13 + tmp2; 2524 tmp25 = tmp13 - tmp2; 2525 tmp23 = tmp11 + tmp3; 2526 tmp24 = tmp11 - tmp3; 2527 2528 /* Odd part */ 2529 2530 z1 = (INT32) wsptr[1]; 2531 z2 = (INT32) wsptr[3]; 2532 z3 = (INT32) wsptr[5]; 2533 z4 = (INT32) wsptr[7]; 2534 2535 tmp11 = z1 + z3; 2536 2537 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ 2538 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ 2539 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ 2540 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ 2541 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ 2542 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ 2543 tmp0 = tmp1 + tmp2 + tmp3 - 2544 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ 2545 tmp13 = tmp10 + tmp11 + tmp12 - 2546 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ 2547 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ 2548 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ 2549 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ 2550 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ 2551 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ 2552 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ 2553 z2 += z4; 2554 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ 2555 tmp1 += z1; 2556 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ 2557 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ 2558 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ 2559 tmp12 += z2; 2560 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ 2561 tmp2 += z2; 2562 tmp3 += z2; 2563 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ 2564 tmp10 += z2; 2565 tmp11 += z2; 2566 2567 /* Final output stage */ 2568 2569 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0, 2570 CONST_BITS+PASS1_BITS+3) 2571 & RANGE_MASK]; 2572 outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0, 2573 CONST_BITS+PASS1_BITS+3) 2574 & RANGE_MASK]; 2575 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1, 2576 CONST_BITS+PASS1_BITS+3) 2577 & RANGE_MASK]; 2578 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1, 2579 CONST_BITS+PASS1_BITS+3) 2580 & RANGE_MASK]; 2581 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2, 2582 CONST_BITS+PASS1_BITS+3) 2583 & RANGE_MASK]; 2584 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2, 2585 CONST_BITS+PASS1_BITS+3) 2586 & RANGE_MASK]; 2587 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3, 2588 CONST_BITS+PASS1_BITS+3) 2589 & RANGE_MASK]; 2590 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3, 2591 CONST_BITS+PASS1_BITS+3) 2592 & RANGE_MASK]; 2593 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10, 2594 CONST_BITS+PASS1_BITS+3) 2595 & RANGE_MASK]; 2596 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10, 2597 CONST_BITS+PASS1_BITS+3) 2598 & RANGE_MASK]; 2599 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11, 2600 CONST_BITS+PASS1_BITS+3) 2601 & RANGE_MASK]; 2602 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11, 2603 CONST_BITS+PASS1_BITS+3) 2604 & RANGE_MASK]; 2605 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12, 2606 CONST_BITS+PASS1_BITS+3) 2607 & RANGE_MASK]; 2608 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12, 2609 CONST_BITS+PASS1_BITS+3) 2610 & RANGE_MASK]; 2611 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13, 2612 CONST_BITS+PASS1_BITS+3) 2613 & RANGE_MASK]; 2614 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13, 2615 CONST_BITS+PASS1_BITS+3) 2616 & RANGE_MASK]; 2617 2618 wsptr += 8; /* advance pointer to next row */ 2619 } 2620 } 2621 2622 #endif /* IDCT_SCALING_SUPPORTED */ 2623 #endif /* DCT_ISLOW_SUPPORTED */ 2624