1 /* 2 * jcdctmgr.c 3 * 4 * This file was part of the Independent JPEG Group's software: 5 * Copyright (C) 1994-1996, Thomas G. Lane. 6 * libjpeg-turbo Modifications: 7 * Copyright (C) 1999-2006, MIYASAKA Masaru. 8 * Copyright 2009 Pierre Ossman <ossman (at) cendio.se> for Cendio AB 9 * Copyright (C) 2011 D. R. Commander 10 * For conditions of distribution and use, see the accompanying README file. 11 * 12 * This file contains the forward-DCT management logic. 13 * This code selects a particular DCT implementation to be used, 14 * and it performs related housekeeping chores including coefficient 15 * quantization. 16 */ 17 18 #define JPEG_INTERNALS 19 #include "jinclude.h" 20 #include "jpeglib.h" 21 #include "jdct.h" /* Private declarations for DCT subsystem */ 22 #include "jsimddct.h" 23 24 25 /* Private subobject for this module */ 26 27 typedef JMETHOD(void, forward_DCT_method_ptr, (DCTELEM * data)); 28 typedef JMETHOD(void, float_DCT_method_ptr, (FAST_FLOAT * data)); 29 30 typedef JMETHOD(void, convsamp_method_ptr, 31 (JSAMPARRAY sample_data, JDIMENSION start_col, 32 DCTELEM * workspace)); 33 typedef JMETHOD(void, float_convsamp_method_ptr, 34 (JSAMPARRAY sample_data, JDIMENSION start_col, 35 FAST_FLOAT *workspace)); 36 37 typedef JMETHOD(void, quantize_method_ptr, 38 (JCOEFPTR coef_block, DCTELEM * divisors, 39 DCTELEM * workspace)); 40 typedef JMETHOD(void, float_quantize_method_ptr, 41 (JCOEFPTR coef_block, FAST_FLOAT * divisors, 42 FAST_FLOAT * workspace)); 43 44 METHODDEF(void) quantize (JCOEFPTR, DCTELEM *, DCTELEM *); 45 46 typedef struct { 47 struct jpeg_forward_dct pub; /* public fields */ 48 49 /* Pointer to the DCT routine actually in use */ 50 forward_DCT_method_ptr dct; 51 convsamp_method_ptr convsamp; 52 quantize_method_ptr quantize; 53 54 /* The actual post-DCT divisors --- not identical to the quant table 55 * entries, because of scaling (especially for an unnormalized DCT). 56 * Each table is given in normal array order. 57 */ 58 DCTELEM * divisors[NUM_QUANT_TBLS]; 59 60 /* work area for FDCT subroutine */ 61 DCTELEM * workspace; 62 63 #ifdef DCT_FLOAT_SUPPORTED 64 /* Same as above for the floating-point case. */ 65 float_DCT_method_ptr float_dct; 66 float_convsamp_method_ptr float_convsamp; 67 float_quantize_method_ptr float_quantize; 68 FAST_FLOAT * float_divisors[NUM_QUANT_TBLS]; 69 FAST_FLOAT * float_workspace; 70 #endif 71 } my_fdct_controller; 72 73 typedef my_fdct_controller * my_fdct_ptr; 74 75 76 /* 77 * Find the highest bit in an integer through binary search. 78 */ 79 LOCAL(int) 80 flss (UINT16 val) 81 { 82 int bit; 83 84 bit = 16; 85 86 if (!val) 87 return 0; 88 89 if (!(val & 0xff00)) { 90 bit -= 8; 91 val <<= 8; 92 } 93 if (!(val & 0xf000)) { 94 bit -= 4; 95 val <<= 4; 96 } 97 if (!(val & 0xc000)) { 98 bit -= 2; 99 val <<= 2; 100 } 101 if (!(val & 0x8000)) { 102 bit -= 1; 103 val <<= 1; 104 } 105 106 return bit; 107 } 108 109 /* 110 * Compute values to do a division using reciprocal. 111 * 112 * This implementation is based on an algorithm described in 113 * "How to optimize for the Pentium family of microprocessors" 114 * (http://www.agner.org/assem/). 115 * More information about the basic algorithm can be found in 116 * the paper "Integer Division Using Reciprocals" by Robert Alverson. 117 * 118 * The basic idea is to replace x/d by x * d^-1. In order to store 119 * d^-1 with enough precision we shift it left a few places. It turns 120 * out that this algoright gives just enough precision, and also fits 121 * into DCTELEM: 122 * 123 * b = (the number of significant bits in divisor) - 1 124 * r = (word size) + b 125 * f = 2^r / divisor 126 * 127 * f will not be an integer for most cases, so we need to compensate 128 * for the rounding error introduced: 129 * 130 * no fractional part: 131 * 132 * result = input >> r 133 * 134 * fractional part of f < 0.5: 135 * 136 * round f down to nearest integer 137 * result = ((input + 1) * f) >> r 138 * 139 * fractional part of f > 0.5: 140 * 141 * round f up to nearest integer 142 * result = (input * f) >> r 143 * 144 * This is the original algorithm that gives truncated results. But we 145 * want properly rounded results, so we replace "input" with 146 * "input + divisor/2". 147 * 148 * In order to allow SIMD implementations we also tweak the values to 149 * allow the same calculation to be made at all times: 150 * 151 * dctbl[0] = f rounded to nearest integer 152 * dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5) 153 * dctbl[2] = 1 << ((word size) * 2 - r) 154 * dctbl[3] = r - (word size) 155 * 156 * dctbl[2] is for stupid instruction sets where the shift operation 157 * isn't member wise (e.g. MMX). 158 * 159 * The reason dctbl[2] and dctbl[3] reduce the shift with (word size) 160 * is that most SIMD implementations have a "multiply and store top 161 * half" operation. 162 * 163 * Lastly, we store each of the values in their own table instead 164 * of in a consecutive manner, yet again in order to allow SIMD 165 * routines. 166 */ 167 LOCAL(int) 168 compute_reciprocal (UINT16 divisor, DCTELEM * dtbl) 169 { 170 UDCTELEM2 fq, fr; 171 UDCTELEM c; 172 int b, r; 173 174 b = flss(divisor) - 1; 175 r = sizeof(DCTELEM) * 8 + b; 176 177 fq = ((UDCTELEM2)1 << r) / divisor; 178 fr = ((UDCTELEM2)1 << r) % divisor; 179 180 c = divisor / 2; /* for rounding */ 181 182 if (fr == 0) { /* divisor is power of two */ 183 /* fq will be one bit too large to fit in DCTELEM, so adjust */ 184 fq >>= 1; 185 r--; 186 } else if (fr <= (divisor / 2U)) { /* fractional part is < 0.5 */ 187 c++; 188 } else { /* fractional part is > 0.5 */ 189 fq++; 190 } 191 192 dtbl[DCTSIZE2 * 0] = (DCTELEM) fq; /* reciprocal */ 193 dtbl[DCTSIZE2 * 1] = (DCTELEM) c; /* correction + roundfactor */ 194 dtbl[DCTSIZE2 * 2] = (DCTELEM) (1 << (sizeof(DCTELEM)*8*2 - r)); /* scale */ 195 dtbl[DCTSIZE2 * 3] = (DCTELEM) r - sizeof(DCTELEM)*8; /* shift */ 196 197 if(r <= 16) return 0; 198 else return 1; 199 } 200 201 /* 202 * Initialize for a processing pass. 203 * Verify that all referenced Q-tables are present, and set up 204 * the divisor table for each one. 205 * In the current implementation, DCT of all components is done during 206 * the first pass, even if only some components will be output in the 207 * first scan. Hence all components should be examined here. 208 */ 209 210 METHODDEF(void) 211 start_pass_fdctmgr (j_compress_ptr cinfo) 212 { 213 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; 214 int ci, qtblno, i; 215 jpeg_component_info *compptr; 216 JQUANT_TBL * qtbl; 217 DCTELEM * dtbl; 218 219 for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components; 220 ci++, compptr++) { 221 qtblno = compptr->quant_tbl_no; 222 /* Make sure specified quantization table is present */ 223 if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS || 224 cinfo->quant_tbl_ptrs[qtblno] == NULL) 225 ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno); 226 qtbl = cinfo->quant_tbl_ptrs[qtblno]; 227 /* Compute divisors for this quant table */ 228 /* We may do this more than once for same table, but it's not a big deal */ 229 switch (cinfo->dct_method) { 230 #ifdef DCT_ISLOW_SUPPORTED 231 case JDCT_ISLOW: 232 /* For LL&M IDCT method, divisors are equal to raw quantization 233 * coefficients multiplied by 8 (to counteract scaling). 234 */ 235 if (fdct->divisors[qtblno] == NULL) { 236 fdct->divisors[qtblno] = (DCTELEM *) 237 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 238 (DCTSIZE2 * 4) * SIZEOF(DCTELEM)); 239 } 240 dtbl = fdct->divisors[qtblno]; 241 for (i = 0; i < DCTSIZE2; i++) { 242 if(!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) 243 && fdct->quantize == jsimd_quantize) 244 fdct->quantize = quantize; 245 } 246 break; 247 #endif 248 #ifdef DCT_IFAST_SUPPORTED 249 case JDCT_IFAST: 250 { 251 /* For AA&N IDCT method, divisors are equal to quantization 252 * coefficients scaled by scalefactor[row]*scalefactor[col], where 253 * scalefactor[0] = 1 254 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 255 * We apply a further scale factor of 8. 256 */ 257 #define CONST_BITS 14 258 static const INT16 aanscales[DCTSIZE2] = { 259 /* precomputed values scaled up by 14 bits */ 260 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, 261 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270, 262 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906, 263 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, 264 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, 265 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, 266 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, 267 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 268 }; 269 SHIFT_TEMPS 270 271 if (fdct->divisors[qtblno] == NULL) { 272 fdct->divisors[qtblno] = (DCTELEM *) 273 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 274 (DCTSIZE2 * 4) * SIZEOF(DCTELEM)); 275 } 276 dtbl = fdct->divisors[qtblno]; 277 for (i = 0; i < DCTSIZE2; i++) { 278 if(!compute_reciprocal( 279 DESCALE(MULTIPLY16V16((INT32) qtbl->quantval[i], 280 (INT32) aanscales[i]), 281 CONST_BITS-3), &dtbl[i]) 282 && fdct->quantize == jsimd_quantize) 283 fdct->quantize = quantize; 284 } 285 } 286 break; 287 #endif 288 #ifdef DCT_FLOAT_SUPPORTED 289 case JDCT_FLOAT: 290 { 291 /* For float AA&N IDCT method, divisors are equal to quantization 292 * coefficients scaled by scalefactor[row]*scalefactor[col], where 293 * scalefactor[0] = 1 294 * scalefactor[k] = cos(k*PI/16) * sqrt(2) for k=1..7 295 * We apply a further scale factor of 8. 296 * What's actually stored is 1/divisor so that the inner loop can 297 * use a multiplication rather than a division. 298 */ 299 FAST_FLOAT * fdtbl; 300 int row, col; 301 static const double aanscalefactor[DCTSIZE] = { 302 1.0, 1.387039845, 1.306562965, 1.175875602, 303 1.0, 0.785694958, 0.541196100, 0.275899379 304 }; 305 306 if (fdct->float_divisors[qtblno] == NULL) { 307 fdct->float_divisors[qtblno] = (FAST_FLOAT *) 308 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 309 DCTSIZE2 * SIZEOF(FAST_FLOAT)); 310 } 311 fdtbl = fdct->float_divisors[qtblno]; 312 i = 0; 313 for (row = 0; row < DCTSIZE; row++) { 314 for (col = 0; col < DCTSIZE; col++) { 315 fdtbl[i] = (FAST_FLOAT) 316 (1.0 / (((double) qtbl->quantval[i] * 317 aanscalefactor[row] * aanscalefactor[col] * 8.0))); 318 i++; 319 } 320 } 321 } 322 break; 323 #endif 324 default: 325 ERREXIT(cinfo, JERR_NOT_COMPILED); 326 break; 327 } 328 } 329 } 330 331 332 /* 333 * Load data into workspace, applying unsigned->signed conversion. 334 */ 335 336 METHODDEF(void) 337 convsamp (JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM * workspace) 338 { 339 register DCTELEM *workspaceptr; 340 register JSAMPROW elemptr; 341 register int elemr; 342 343 workspaceptr = workspace; 344 for (elemr = 0; elemr < DCTSIZE; elemr++) { 345 elemptr = sample_data[elemr] + start_col; 346 347 #if DCTSIZE == 8 /* unroll the inner loop */ 348 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 349 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 350 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 351 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 352 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 353 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 354 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 355 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 356 #else 357 { 358 register int elemc; 359 for (elemc = DCTSIZE; elemc > 0; elemc--) 360 *workspaceptr++ = GETJSAMPLE(*elemptr++) - CENTERJSAMPLE; 361 } 362 #endif 363 } 364 } 365 366 367 /* 368 * Quantize/descale the coefficients, and store into coef_blocks[]. 369 */ 370 371 METHODDEF(void) 372 quantize (JCOEFPTR coef_block, DCTELEM * divisors, DCTELEM * workspace) 373 { 374 int i; 375 DCTELEM temp; 376 UDCTELEM recip, corr, shift; 377 UDCTELEM2 product; 378 JCOEFPTR output_ptr = coef_block; 379 380 for (i = 0; i < DCTSIZE2; i++) { 381 temp = workspace[i]; 382 recip = divisors[i + DCTSIZE2 * 0]; 383 corr = divisors[i + DCTSIZE2 * 1]; 384 shift = divisors[i + DCTSIZE2 * 3]; 385 386 if (temp < 0) { 387 temp = -temp; 388 product = (UDCTELEM2)(temp + corr) * recip; 389 product >>= shift + sizeof(DCTELEM)*8; 390 temp = product; 391 temp = -temp; 392 } else { 393 product = (UDCTELEM2)(temp + corr) * recip; 394 product >>= shift + sizeof(DCTELEM)*8; 395 temp = product; 396 } 397 398 output_ptr[i] = (JCOEF) temp; 399 } 400 } 401 402 403 /* 404 * Perform forward DCT on one or more blocks of a component. 405 * 406 * The input samples are taken from the sample_data[] array starting at 407 * position start_row/start_col, and moving to the right for any additional 408 * blocks. The quantized coefficients are returned in coef_blocks[]. 409 */ 410 411 METHODDEF(void) 412 forward_DCT (j_compress_ptr cinfo, jpeg_component_info * compptr, 413 JSAMPARRAY sample_data, JBLOCKROW coef_blocks, 414 JDIMENSION start_row, JDIMENSION start_col, 415 JDIMENSION num_blocks) 416 /* This version is used for integer DCT implementations. */ 417 { 418 /* This routine is heavily used, so it's worth coding it tightly. */ 419 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; 420 DCTELEM * divisors = fdct->divisors[compptr->quant_tbl_no]; 421 DCTELEM * workspace; 422 JDIMENSION bi; 423 424 /* Make sure the compiler doesn't look up these every pass */ 425 forward_DCT_method_ptr do_dct = fdct->dct; 426 convsamp_method_ptr do_convsamp = fdct->convsamp; 427 quantize_method_ptr do_quantize = fdct->quantize; 428 workspace = fdct->workspace; 429 430 sample_data += start_row; /* fold in the vertical offset once */ 431 432 for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) { 433 /* Load data into workspace, applying unsigned->signed conversion */ 434 (*do_convsamp) (sample_data, start_col, workspace); 435 436 /* Perform the DCT */ 437 (*do_dct) (workspace); 438 439 /* Quantize/descale the coefficients, and store into coef_blocks[] */ 440 (*do_quantize) (coef_blocks[bi], divisors, workspace); 441 } 442 } 443 444 445 #ifdef DCT_FLOAT_SUPPORTED 446 447 448 METHODDEF(void) 449 convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col, FAST_FLOAT * workspace) 450 { 451 register FAST_FLOAT *workspaceptr; 452 register JSAMPROW elemptr; 453 register int elemr; 454 455 workspaceptr = workspace; 456 for (elemr = 0; elemr < DCTSIZE; elemr++) { 457 elemptr = sample_data[elemr] + start_col; 458 #if DCTSIZE == 8 /* unroll the inner loop */ 459 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 460 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 461 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 462 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 463 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 464 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 465 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 466 *workspaceptr++ = (FAST_FLOAT)(GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 467 #else 468 { 469 register int elemc; 470 for (elemc = DCTSIZE; elemc > 0; elemc--) 471 *workspaceptr++ = (FAST_FLOAT) 472 (GETJSAMPLE(*elemptr++) - CENTERJSAMPLE); 473 } 474 #endif 475 } 476 } 477 478 479 METHODDEF(void) 480 quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors, FAST_FLOAT * workspace) 481 { 482 register FAST_FLOAT temp; 483 register int i; 484 register JCOEFPTR output_ptr = coef_block; 485 486 for (i = 0; i < DCTSIZE2; i++) { 487 /* Apply the quantization and scaling factor */ 488 temp = workspace[i] * divisors[i]; 489 490 /* Round to nearest integer. 491 * Since C does not specify the direction of rounding for negative 492 * quotients, we have to force the dividend positive for portability. 493 * The maximum coefficient size is +-16K (for 12-bit data), so this 494 * code should work for either 16-bit or 32-bit ints. 495 */ 496 output_ptr[i] = (JCOEF) ((int) (temp + (FAST_FLOAT) 16384.5) - 16384); 497 } 498 } 499 500 501 METHODDEF(void) 502 forward_DCT_float (j_compress_ptr cinfo, jpeg_component_info * compptr, 503 JSAMPARRAY sample_data, JBLOCKROW coef_blocks, 504 JDIMENSION start_row, JDIMENSION start_col, 505 JDIMENSION num_blocks) 506 /* This version is used for floating-point DCT implementations. */ 507 { 508 /* This routine is heavily used, so it's worth coding it tightly. */ 509 my_fdct_ptr fdct = (my_fdct_ptr) cinfo->fdct; 510 FAST_FLOAT * divisors = fdct->float_divisors[compptr->quant_tbl_no]; 511 FAST_FLOAT * workspace; 512 JDIMENSION bi; 513 514 515 /* Make sure the compiler doesn't look up these every pass */ 516 float_DCT_method_ptr do_dct = fdct->float_dct; 517 float_convsamp_method_ptr do_convsamp = fdct->float_convsamp; 518 float_quantize_method_ptr do_quantize = fdct->float_quantize; 519 workspace = fdct->float_workspace; 520 521 sample_data += start_row; /* fold in the vertical offset once */ 522 523 for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) { 524 /* Load data into workspace, applying unsigned->signed conversion */ 525 (*do_convsamp) (sample_data, start_col, workspace); 526 527 /* Perform the DCT */ 528 (*do_dct) (workspace); 529 530 /* Quantize/descale the coefficients, and store into coef_blocks[] */ 531 (*do_quantize) (coef_blocks[bi], divisors, workspace); 532 } 533 } 534 535 #endif /* DCT_FLOAT_SUPPORTED */ 536 537 538 /* 539 * Initialize FDCT manager. 540 */ 541 542 GLOBAL(void) 543 jinit_forward_dct (j_compress_ptr cinfo) 544 { 545 my_fdct_ptr fdct; 546 int i; 547 548 fdct = (my_fdct_ptr) 549 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 550 SIZEOF(my_fdct_controller)); 551 cinfo->fdct = (struct jpeg_forward_dct *) fdct; 552 fdct->pub.start_pass = start_pass_fdctmgr; 553 554 /* First determine the DCT... */ 555 switch (cinfo->dct_method) { 556 #ifdef DCT_ISLOW_SUPPORTED 557 case JDCT_ISLOW: 558 fdct->pub.forward_DCT = forward_DCT; 559 if (jsimd_can_fdct_islow()) 560 fdct->dct = jsimd_fdct_islow; 561 else 562 fdct->dct = jpeg_fdct_islow; 563 break; 564 #endif 565 #ifdef DCT_IFAST_SUPPORTED 566 case JDCT_IFAST: 567 fdct->pub.forward_DCT = forward_DCT; 568 if (jsimd_can_fdct_ifast()) 569 fdct->dct = jsimd_fdct_ifast; 570 else 571 fdct->dct = jpeg_fdct_ifast; 572 break; 573 #endif 574 #ifdef DCT_FLOAT_SUPPORTED 575 case JDCT_FLOAT: 576 fdct->pub.forward_DCT = forward_DCT_float; 577 if (jsimd_can_fdct_float()) 578 fdct->float_dct = jsimd_fdct_float; 579 else 580 fdct->float_dct = jpeg_fdct_float; 581 break; 582 #endif 583 default: 584 ERREXIT(cinfo, JERR_NOT_COMPILED); 585 break; 586 } 587 588 /* ...then the supporting stages. */ 589 switch (cinfo->dct_method) { 590 #ifdef DCT_ISLOW_SUPPORTED 591 case JDCT_ISLOW: 592 #endif 593 #ifdef DCT_IFAST_SUPPORTED 594 case JDCT_IFAST: 595 #endif 596 #if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED) 597 if (jsimd_can_convsamp()) 598 fdct->convsamp = jsimd_convsamp; 599 else 600 fdct->convsamp = convsamp; 601 if (jsimd_can_quantize()) 602 fdct->quantize = jsimd_quantize; 603 else 604 fdct->quantize = quantize; 605 break; 606 #endif 607 #ifdef DCT_FLOAT_SUPPORTED 608 case JDCT_FLOAT: 609 if (jsimd_can_convsamp_float()) 610 fdct->float_convsamp = jsimd_convsamp_float; 611 else 612 fdct->float_convsamp = convsamp_float; 613 if (jsimd_can_quantize_float()) 614 fdct->float_quantize = jsimd_quantize_float; 615 else 616 fdct->float_quantize = quantize_float; 617 break; 618 #endif 619 default: 620 ERREXIT(cinfo, JERR_NOT_COMPILED); 621 break; 622 } 623 624 /* Allocate workspace memory */ 625 #ifdef DCT_FLOAT_SUPPORTED 626 if (cinfo->dct_method == JDCT_FLOAT) 627 fdct->float_workspace = (FAST_FLOAT *) 628 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 629 SIZEOF(FAST_FLOAT) * DCTSIZE2); 630 else 631 #endif 632 fdct->workspace = (DCTELEM *) 633 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 634 SIZEOF(DCTELEM) * DCTSIZE2); 635 636 /* Mark divisor tables unallocated */ 637 for (i = 0; i < NUM_QUANT_TBLS; i++) { 638 fdct->divisors[i] = NULL; 639 #ifdef DCT_FLOAT_SUPPORTED 640 fdct->float_divisors[i] = NULL; 641 #endif 642 } 643 } 644