1 /* 2 * jcphuff.c 3 * 4 * This file was part of the Independent JPEG Group's software: 5 * Copyright (C) 1995-1997, Thomas G. Lane. 6 * libjpeg-turbo Modifications: 7 * Copyright (C) 2011, 2015, 2018, D. R. Commander. 8 * Copyright (C) 2016, 2018, Matthieu Darbois. 9 * For conditions of distribution and use, see the accompanying README.ijg 10 * file. 11 * 12 * This file contains Huffman entropy encoding routines for progressive JPEG. 13 * 14 * We do not support output suspension in this module, since the library 15 * currently does not allow multiple-scan files to be written with output 16 * suspension. 17 */ 18 19 #define JPEG_INTERNALS 20 #include "jinclude.h" 21 #include "jpeglib.h" 22 #include "jsimd.h" 23 #include "jconfigint.h" 24 #include <limits.h> 25 26 #ifdef HAVE_INTRIN_H 27 #include <intrin.h> 28 #ifdef _MSC_VER 29 #ifdef HAVE_BITSCANFORWARD64 30 #pragma intrinsic(_BitScanForward64) 31 #endif 32 #ifdef HAVE_BITSCANFORWARD 33 #pragma intrinsic(_BitScanForward) 34 #endif 35 #endif 36 #endif 37 38 #ifdef C_PROGRESSIVE_SUPPORTED 39 40 /* 41 * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be 42 * used for bit counting rather than the lookup table. This will reduce the 43 * memory footprint by 64k, which is important for some mobile applications 44 * that create many isolated instances of libjpeg-turbo (web browsers, for 45 * instance.) This may improve performance on some mobile platforms as well. 46 * This feature is enabled by default only on ARM processors, because some x86 47 * chips have a slow implementation of bsr, and the use of clz/bsr cannot be 48 * shown to have a significant performance impact even on the x86 chips that 49 * have a fast implementation of it. When building for ARMv6, you can 50 * explicitly disable the use of clz/bsr by adding -mthumb to the compiler 51 * flags (this defines __thumb__). 52 */ 53 54 /* NOTE: Both GCC and Clang define __GNUC__ */ 55 #if defined __GNUC__ && (defined __arm__ || defined __aarch64__) 56 #if !defined __thumb__ || defined __thumb2__ 57 #define USE_CLZ_INTRINSIC 58 #endif 59 #endif 60 61 #ifdef USE_CLZ_INTRINSIC 62 #define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x)) 63 #define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0) 64 #else 65 #include "jpeg_nbits_table.h" 66 #define JPEG_NBITS(x) (jpeg_nbits_table[x]) 67 #define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x) 68 #endif 69 70 71 /* Expanded entropy encoder object for progressive Huffman encoding. */ 72 73 typedef struct { 74 struct jpeg_entropy_encoder pub; /* public fields */ 75 76 /* Pointer to routine to prepare data for encode_mcu_AC_first() */ 77 void (*AC_first_prepare) (const JCOEF *block, 78 const int *jpeg_natural_order_start, int Sl, 79 int Al, JCOEF *values, size_t *zerobits); 80 /* Pointer to routine to prepare data for encode_mcu_AC_refine() */ 81 int (*AC_refine_prepare) (const JCOEF *block, 82 const int *jpeg_natural_order_start, int Sl, 83 int Al, JCOEF *absvalues, size_t *bits); 84 85 /* Mode flag: TRUE for optimization, FALSE for actual data output */ 86 boolean gather_statistics; 87 88 /* Bit-level coding status. 89 * next_output_byte/free_in_buffer are local copies of cinfo->dest fields. 90 */ 91 JOCTET *next_output_byte; /* => next byte to write in buffer */ 92 size_t free_in_buffer; /* # of byte spaces remaining in buffer */ 93 size_t put_buffer; /* current bit-accumulation buffer */ 94 int put_bits; /* # of bits now in it */ 95 j_compress_ptr cinfo; /* link to cinfo (needed for dump_buffer) */ 96 97 /* Coding status for DC components */ 98 int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ 99 100 /* Coding status for AC components */ 101 int ac_tbl_no; /* the table number of the single component */ 102 unsigned int EOBRUN; /* run length of EOBs */ 103 unsigned int BE; /* # of buffered correction bits before MCU */ 104 char *bit_buffer; /* buffer for correction bits (1 per char) */ 105 /* packing correction bits tightly would save some space but cost time... */ 106 107 unsigned int restarts_to_go; /* MCUs left in this restart interval */ 108 int next_restart_num; /* next restart number to write (0-7) */ 109 110 /* Pointers to derived tables (these workspaces have image lifespan). 111 * Since any one scan codes only DC or only AC, we only need one set 112 * of tables, not one for DC and one for AC. 113 */ 114 c_derived_tbl *derived_tbls[NUM_HUFF_TBLS]; 115 116 /* Statistics tables for optimization; again, one set is enough */ 117 long *count_ptrs[NUM_HUFF_TBLS]; 118 } phuff_entropy_encoder; 119 120 typedef phuff_entropy_encoder *phuff_entropy_ptr; 121 122 /* MAX_CORR_BITS is the number of bits the AC refinement correction-bit 123 * buffer can hold. Larger sizes may slightly improve compression, but 124 * 1000 is already well into the realm of overkill. 125 * The minimum safe size is 64 bits. 126 */ 127 128 #define MAX_CORR_BITS 1000 /* Max # of correction bits I can buffer */ 129 130 /* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than JLONG. 131 * We assume that int right shift is unsigned if JLONG right shift is, 132 * which should be safe. 133 */ 134 135 #ifdef RIGHT_SHIFT_IS_UNSIGNED 136 #define ISHIFT_TEMPS int ishift_temp; 137 #define IRIGHT_SHIFT(x, shft) \ 138 ((ishift_temp = (x)) < 0 ? \ 139 (ishift_temp >> (shft)) | ((~0) << (16 - (shft))) : \ 140 (ishift_temp >> (shft))) 141 #else 142 #define ISHIFT_TEMPS 143 #define IRIGHT_SHIFT(x, shft) ((x) >> (shft)) 144 #endif 145 146 #define PAD(v, p) ((v + (p) - 1) & (~((p) - 1))) 147 148 /* Forward declarations */ 149 METHODDEF(boolean) encode_mcu_DC_first(j_compress_ptr cinfo, 150 JBLOCKROW *MCU_data); 151 METHODDEF(void) encode_mcu_AC_first_prepare 152 (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, 153 JCOEF *values, size_t *zerobits); 154 METHODDEF(boolean) encode_mcu_AC_first(j_compress_ptr cinfo, 155 JBLOCKROW *MCU_data); 156 METHODDEF(boolean) encode_mcu_DC_refine(j_compress_ptr cinfo, 157 JBLOCKROW *MCU_data); 158 METHODDEF(int) encode_mcu_AC_refine_prepare 159 (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, 160 JCOEF *absvalues, size_t *bits); 161 METHODDEF(boolean) encode_mcu_AC_refine(j_compress_ptr cinfo, 162 JBLOCKROW *MCU_data); 163 METHODDEF(void) finish_pass_phuff(j_compress_ptr cinfo); 164 METHODDEF(void) finish_pass_gather_phuff(j_compress_ptr cinfo); 165 166 167 /* Count bit loop zeroes */ 168 INLINE 169 METHODDEF(int) 170 count_zeroes(size_t *x) 171 { 172 int result; 173 #if defined(HAVE_BUILTIN_CTZL) 174 result = __builtin_ctzl(*x); 175 *x >>= result; 176 #elif defined(HAVE_BITSCANFORWARD64) 177 _BitScanForward64(&result, *x); 178 *x >>= result; 179 #elif defined(HAVE_BITSCANFORWARD) 180 _BitScanForward(&result, *x); 181 *x >>= result; 182 #else 183 result = 0; 184 while ((*x & 1) == 0) { 185 ++result; 186 *x >>= 1; 187 } 188 #endif 189 return result; 190 } 191 192 193 /* 194 * Initialize for a Huffman-compressed scan using progressive JPEG. 195 */ 196 197 METHODDEF(void) 198 start_pass_phuff(j_compress_ptr cinfo, boolean gather_statistics) 199 { 200 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; 201 boolean is_DC_band; 202 int ci, tbl; 203 jpeg_component_info *compptr; 204 205 entropy->cinfo = cinfo; 206 entropy->gather_statistics = gather_statistics; 207 208 is_DC_band = (cinfo->Ss == 0); 209 210 /* We assume jcmaster.c already validated the scan parameters. */ 211 212 /* Select execution routines */ 213 if (cinfo->Ah == 0) { 214 if (is_DC_band) 215 entropy->pub.encode_mcu = encode_mcu_DC_first; 216 else 217 entropy->pub.encode_mcu = encode_mcu_AC_first; 218 if (jsimd_can_encode_mcu_AC_first_prepare()) 219 entropy->AC_first_prepare = jsimd_encode_mcu_AC_first_prepare; 220 else 221 entropy->AC_first_prepare = encode_mcu_AC_first_prepare; 222 } else { 223 if (is_DC_band) 224 entropy->pub.encode_mcu = encode_mcu_DC_refine; 225 else { 226 entropy->pub.encode_mcu = encode_mcu_AC_refine; 227 if (jsimd_can_encode_mcu_AC_refine_prepare()) 228 entropy->AC_refine_prepare = jsimd_encode_mcu_AC_refine_prepare; 229 else 230 entropy->AC_refine_prepare = encode_mcu_AC_refine_prepare; 231 /* AC refinement needs a correction bit buffer */ 232 if (entropy->bit_buffer == NULL) 233 entropy->bit_buffer = (char *) 234 (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, 235 MAX_CORR_BITS * sizeof(char)); 236 } 237 } 238 if (gather_statistics) 239 entropy->pub.finish_pass = finish_pass_gather_phuff; 240 else 241 entropy->pub.finish_pass = finish_pass_phuff; 242 243 /* Only DC coefficients may be interleaved, so cinfo->comps_in_scan = 1 244 * for AC coefficients. 245 */ 246 for (ci = 0; ci < cinfo->comps_in_scan; ci++) { 247 compptr = cinfo->cur_comp_info[ci]; 248 /* Initialize DC predictions to 0 */ 249 entropy->last_dc_val[ci] = 0; 250 /* Get table index */ 251 if (is_DC_band) { 252 if (cinfo->Ah != 0) /* DC refinement needs no table */ 253 continue; 254 tbl = compptr->dc_tbl_no; 255 } else { 256 entropy->ac_tbl_no = tbl = compptr->ac_tbl_no; 257 } 258 if (gather_statistics) { 259 /* Check for invalid table index */ 260 /* (make_c_derived_tbl does this in the other path) */ 261 if (tbl < 0 || tbl >= NUM_HUFF_TBLS) 262 ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl); 263 /* Allocate and zero the statistics tables */ 264 /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */ 265 if (entropy->count_ptrs[tbl] == NULL) 266 entropy->count_ptrs[tbl] = (long *) 267 (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, 268 257 * sizeof(long)); 269 MEMZERO(entropy->count_ptrs[tbl], 257 * sizeof(long)); 270 } else { 271 /* Compute derived values for Huffman table */ 272 /* We may do this more than once for a table, but it's not expensive */ 273 jpeg_make_c_derived_tbl(cinfo, is_DC_band, tbl, 274 &entropy->derived_tbls[tbl]); 275 } 276 } 277 278 /* Initialize AC stuff */ 279 entropy->EOBRUN = 0; 280 entropy->BE = 0; 281 282 /* Initialize bit buffer to empty */ 283 entropy->put_buffer = 0; 284 entropy->put_bits = 0; 285 286 /* Initialize restart stuff */ 287 entropy->restarts_to_go = cinfo->restart_interval; 288 entropy->next_restart_num = 0; 289 } 290 291 292 /* Outputting bytes to the file. 293 * NB: these must be called only when actually outputting, 294 * that is, entropy->gather_statistics == FALSE. 295 */ 296 297 /* Emit a byte */ 298 #define emit_byte(entropy, val) { \ 299 *(entropy)->next_output_byte++ = (JOCTET)(val); \ 300 if (--(entropy)->free_in_buffer == 0) \ 301 dump_buffer(entropy); \ 302 } 303 304 305 LOCAL(void) 306 dump_buffer(phuff_entropy_ptr entropy) 307 /* Empty the output buffer; we do not support suspension in this module. */ 308 { 309 struct jpeg_destination_mgr *dest = entropy->cinfo->dest; 310 311 if (!(*dest->empty_output_buffer) (entropy->cinfo)) 312 ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND); 313 /* After a successful buffer dump, must reset buffer pointers */ 314 entropy->next_output_byte = dest->next_output_byte; 315 entropy->free_in_buffer = dest->free_in_buffer; 316 } 317 318 319 /* Outputting bits to the file */ 320 321 /* Only the right 24 bits of put_buffer are used; the valid bits are 322 * left-justified in this part. At most 16 bits can be passed to emit_bits 323 * in one call, and we never retain more than 7 bits in put_buffer 324 * between calls, so 24 bits are sufficient. 325 */ 326 327 LOCAL(void) 328 emit_bits(phuff_entropy_ptr entropy, unsigned int code, int size) 329 /* Emit some bits, unless we are in gather mode */ 330 { 331 /* This routine is heavily used, so it's worth coding tightly. */ 332 register size_t put_buffer = (size_t)code; 333 register int put_bits = entropy->put_bits; 334 335 /* if size is 0, caller used an invalid Huffman table entry */ 336 if (size == 0) 337 ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE); 338 339 if (entropy->gather_statistics) 340 return; /* do nothing if we're only getting stats */ 341 342 put_buffer &= (((size_t)1) << size) - 1; /* mask off any extra bits in code */ 343 344 put_bits += size; /* new number of bits in buffer */ 345 346 put_buffer <<= 24 - put_bits; /* align incoming bits */ 347 348 put_buffer |= entropy->put_buffer; /* and merge with old buffer contents */ 349 350 while (put_bits >= 8) { 351 int c = (int)((put_buffer >> 16) & 0xFF); 352 353 emit_byte(entropy, c); 354 if (c == 0xFF) { /* need to stuff a zero byte? */ 355 emit_byte(entropy, 0); 356 } 357 put_buffer <<= 8; 358 put_bits -= 8; 359 } 360 361 entropy->put_buffer = put_buffer; /* update variables */ 362 entropy->put_bits = put_bits; 363 } 364 365 366 LOCAL(void) 367 flush_bits(phuff_entropy_ptr entropy) 368 { 369 emit_bits(entropy, 0x7F, 7); /* fill any partial byte with ones */ 370 entropy->put_buffer = 0; /* and reset bit-buffer to empty */ 371 entropy->put_bits = 0; 372 } 373 374 375 /* 376 * Emit (or just count) a Huffman symbol. 377 */ 378 379 LOCAL(void) 380 emit_symbol(phuff_entropy_ptr entropy, int tbl_no, int symbol) 381 { 382 if (entropy->gather_statistics) 383 entropy->count_ptrs[tbl_no][symbol]++; 384 else { 385 c_derived_tbl *tbl = entropy->derived_tbls[tbl_no]; 386 emit_bits(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]); 387 } 388 } 389 390 391 /* 392 * Emit bits from a correction bit buffer. 393 */ 394 395 LOCAL(void) 396 emit_buffered_bits(phuff_entropy_ptr entropy, char *bufstart, 397 unsigned int nbits) 398 { 399 if (entropy->gather_statistics) 400 return; /* no real work */ 401 402 while (nbits > 0) { 403 emit_bits(entropy, (unsigned int)(*bufstart), 1); 404 bufstart++; 405 nbits--; 406 } 407 } 408 409 410 /* 411 * Emit any pending EOBRUN symbol. 412 */ 413 414 LOCAL(void) 415 emit_eobrun(phuff_entropy_ptr entropy) 416 { 417 register int temp, nbits; 418 419 if (entropy->EOBRUN > 0) { /* if there is any pending EOBRUN */ 420 temp = entropy->EOBRUN; 421 nbits = JPEG_NBITS_NONZERO(temp) - 1; 422 /* safety check: shouldn't happen given limited correction-bit buffer */ 423 if (nbits > 14) 424 ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE); 425 426 emit_symbol(entropy, entropy->ac_tbl_no, nbits << 4); 427 if (nbits) 428 emit_bits(entropy, entropy->EOBRUN, nbits); 429 430 entropy->EOBRUN = 0; 431 432 /* Emit any buffered correction bits */ 433 emit_buffered_bits(entropy, entropy->bit_buffer, entropy->BE); 434 entropy->BE = 0; 435 } 436 } 437 438 439 /* 440 * Emit a restart marker & resynchronize predictions. 441 */ 442 443 LOCAL(void) 444 emit_restart(phuff_entropy_ptr entropy, int restart_num) 445 { 446 int ci; 447 448 emit_eobrun(entropy); 449 450 if (!entropy->gather_statistics) { 451 flush_bits(entropy); 452 emit_byte(entropy, 0xFF); 453 emit_byte(entropy, JPEG_RST0 + restart_num); 454 } 455 456 if (entropy->cinfo->Ss == 0) { 457 /* Re-initialize DC predictions to 0 */ 458 for (ci = 0; ci < entropy->cinfo->comps_in_scan; ci++) 459 entropy->last_dc_val[ci] = 0; 460 } else { 461 /* Re-initialize all AC-related fields to 0 */ 462 entropy->EOBRUN = 0; 463 entropy->BE = 0; 464 } 465 } 466 467 468 /* 469 * MCU encoding for DC initial scan (either spectral selection, 470 * or first pass of successive approximation). 471 */ 472 473 METHODDEF(boolean) 474 encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data) 475 { 476 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; 477 register int temp, temp2, temp3; 478 register int nbits; 479 int blkn, ci; 480 int Al = cinfo->Al; 481 JBLOCKROW block; 482 jpeg_component_info *compptr; 483 ISHIFT_TEMPS 484 485 entropy->next_output_byte = cinfo->dest->next_output_byte; 486 entropy->free_in_buffer = cinfo->dest->free_in_buffer; 487 488 /* Emit restart marker if needed */ 489 if (cinfo->restart_interval) 490 if (entropy->restarts_to_go == 0) 491 emit_restart(entropy, entropy->next_restart_num); 492 493 /* Encode the MCU data blocks */ 494 for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { 495 block = MCU_data[blkn]; 496 ci = cinfo->MCU_membership[blkn]; 497 compptr = cinfo->cur_comp_info[ci]; 498 499 /* Compute the DC value after the required point transform by Al. 500 * This is simply an arithmetic right shift. 501 */ 502 temp2 = IRIGHT_SHIFT((int)((*block)[0]), Al); 503 504 /* DC differences are figured on the point-transformed values. */ 505 temp = temp2 - entropy->last_dc_val[ci]; 506 entropy->last_dc_val[ci] = temp2; 507 508 /* Encode the DC coefficient difference per section G.1.2.1 */ 509 510 /* This is a well-known technique for obtaining the absolute value without 511 * a branch. It is derived from an assembly language technique presented 512 * in "How to Optimize for the Pentium Processors", Copyright (c) 1996, 513 * 1997 by Agner Fog. 514 */ 515 temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); 516 temp ^= temp3; 517 temp -= temp3; /* temp is abs value of input */ 518 /* For a negative input, want temp2 = bitwise complement of abs(input) */ 519 temp2 = temp ^ temp3; 520 521 /* Find the number of bits needed for the magnitude of the coefficient */ 522 nbits = JPEG_NBITS(temp); 523 /* Check for out-of-range coefficient values. 524 * Since we're encoding a difference, the range limit is twice as much. 525 */ 526 if (nbits > MAX_COEF_BITS + 1) 527 ERREXIT(cinfo, JERR_BAD_DCT_COEF); 528 529 /* Count/emit the Huffman-coded symbol for the number of bits */ 530 emit_symbol(entropy, compptr->dc_tbl_no, nbits); 531 532 /* Emit that number of bits of the value, if positive, */ 533 /* or the complement of its magnitude, if negative. */ 534 if (nbits) /* emit_bits rejects calls with size 0 */ 535 emit_bits(entropy, (unsigned int)temp2, nbits); 536 } 537 538 cinfo->dest->next_output_byte = entropy->next_output_byte; 539 cinfo->dest->free_in_buffer = entropy->free_in_buffer; 540 541 /* Update restart-interval state too */ 542 if (cinfo->restart_interval) { 543 if (entropy->restarts_to_go == 0) { 544 entropy->restarts_to_go = cinfo->restart_interval; 545 entropy->next_restart_num++; 546 entropy->next_restart_num &= 7; 547 } 548 entropy->restarts_to_go--; 549 } 550 551 return TRUE; 552 } 553 554 555 /* 556 * Data preparation for encode_mcu_AC_first(). 557 */ 558 559 #define COMPUTE_ABSVALUES_AC_FIRST(Sl) { \ 560 for (k = 0; k < Sl; k++) { \ 561 temp = block[jpeg_natural_order_start[k]]; \ 562 if (temp == 0) \ 563 continue; \ 564 /* We must apply the point transform by Al. For AC coefficients this \ 565 * is an integer division with rounding towards 0. To do this portably \ 566 * in C, we shift after obtaining the absolute value; so the code is \ 567 * interwoven with finding the abs value (temp) and output bits (temp2). \ 568 */ \ 569 temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \ 570 temp ^= temp2; \ 571 temp -= temp2; /* temp is abs value of input */ \ 572 temp >>= Al; /* apply the point transform */ \ 573 /* Watch out for case that nonzero coef is zero after point transform */ \ 574 if (temp == 0) \ 575 continue; \ 576 /* For a negative coef, want temp2 = bitwise complement of abs(coef) */ \ 577 temp2 ^= temp; \ 578 values[k] = temp; \ 579 values[k + DCTSIZE2] = temp2; \ 580 zerobits |= ((size_t)1U) << k; \ 581 } \ 582 } 583 584 METHODDEF(void) 585 encode_mcu_AC_first_prepare(const JCOEF *block, 586 const int *jpeg_natural_order_start, int Sl, 587 int Al, JCOEF *values, size_t *bits) 588 { 589 register int k, temp, temp2; 590 size_t zerobits = 0U; 591 int Sl0 = Sl; 592 593 #if SIZEOF_SIZE_T == 4 594 if (Sl0 > 32) 595 Sl0 = 32; 596 #endif 597 598 COMPUTE_ABSVALUES_AC_FIRST(Sl0); 599 600 bits[0] = zerobits; 601 #if SIZEOF_SIZE_T == 4 602 zerobits = 0U; 603 604 if (Sl > 32) { 605 Sl -= 32; 606 jpeg_natural_order_start += 32; 607 values += 32; 608 609 COMPUTE_ABSVALUES_AC_FIRST(Sl); 610 } 611 bits[1] = zerobits; 612 #endif 613 } 614 615 /* 616 * MCU encoding for AC initial scan (either spectral selection, 617 * or first pass of successive approximation). 618 */ 619 620 #define ENCODE_COEFS_AC_FIRST(label) { \ 621 while (zerobits) { \ 622 r = count_zeroes(&zerobits); \ 623 cvalue += r; \ 624 label \ 625 temp = cvalue[0]; \ 626 temp2 = cvalue[DCTSIZE2]; \ 627 \ 628 /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \ 629 while (r > 15) { \ 630 emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \ 631 r -= 16; \ 632 } \ 633 \ 634 /* Find the number of bits needed for the magnitude of the coefficient */ \ 635 nbits = JPEG_NBITS_NONZERO(temp); /* there must be at least one 1 bit */ \ 636 /* Check for out-of-range coefficient values */ \ 637 if (nbits > MAX_COEF_BITS) \ 638 ERREXIT(cinfo, JERR_BAD_DCT_COEF); \ 639 \ 640 /* Count/emit Huffman symbol for run length / number of bits */ \ 641 emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits); \ 642 \ 643 /* Emit that number of bits of the value, if positive, */ \ 644 /* or the complement of its magnitude, if negative. */ \ 645 emit_bits(entropy, (unsigned int)temp2, nbits); \ 646 \ 647 cvalue++; \ 648 zerobits >>= 1; \ 649 } \ 650 } 651 652 METHODDEF(boolean) 653 encode_mcu_AC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data) 654 { 655 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; 656 register int temp, temp2; 657 register int nbits, r; 658 int Sl = cinfo->Se - cinfo->Ss + 1; 659 int Al = cinfo->Al; 660 JCOEF values_unaligned[2 * DCTSIZE2 + 15]; 661 JCOEF *values; 662 const JCOEF *cvalue; 663 size_t zerobits; 664 size_t bits[8 / SIZEOF_SIZE_T]; 665 666 entropy->next_output_byte = cinfo->dest->next_output_byte; 667 entropy->free_in_buffer = cinfo->dest->free_in_buffer; 668 669 /* Emit restart marker if needed */ 670 if (cinfo->restart_interval) 671 if (entropy->restarts_to_go == 0) 672 emit_restart(entropy, entropy->next_restart_num); 673 674 #ifdef WITH_SIMD 675 cvalue = values = (JCOEF *)PAD((size_t)values_unaligned, 16); 676 #else 677 /* Not using SIMD, so alignment is not needed */ 678 cvalue = values = values_unaligned; 679 #endif 680 681 /* Prepare data */ 682 entropy->AC_first_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss, 683 Sl, Al, values, bits); 684 685 zerobits = bits[0]; 686 #if SIZEOF_SIZE_T == 4 687 zerobits |= bits[1]; 688 #endif 689 690 /* Emit any pending EOBRUN */ 691 if (zerobits && (entropy->EOBRUN > 0)) 692 emit_eobrun(entropy); 693 694 #if SIZEOF_SIZE_T == 4 695 zerobits = bits[0]; 696 #endif 697 698 /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */ 699 700 ENCODE_COEFS_AC_FIRST((void)0;); 701 702 #if SIZEOF_SIZE_T == 4 703 zerobits = bits[1]; 704 if (zerobits) { 705 int diff = ((values + DCTSIZE2 / 2) - cvalue); 706 r = count_zeroes(&zerobits); 707 r += diff; 708 cvalue += r; 709 goto first_iter_ac_first; 710 } 711 712 ENCODE_COEFS_AC_FIRST(first_iter_ac_first:); 713 #endif 714 715 if (cvalue < (values + Sl)) { /* If there are trailing zeroes, */ 716 entropy->EOBRUN++; /* count an EOB */ 717 if (entropy->EOBRUN == 0x7FFF) 718 emit_eobrun(entropy); /* force it out to avoid overflow */ 719 } 720 721 cinfo->dest->next_output_byte = entropy->next_output_byte; 722 cinfo->dest->free_in_buffer = entropy->free_in_buffer; 723 724 /* Update restart-interval state too */ 725 if (cinfo->restart_interval) { 726 if (entropy->restarts_to_go == 0) { 727 entropy->restarts_to_go = cinfo->restart_interval; 728 entropy->next_restart_num++; 729 entropy->next_restart_num &= 7; 730 } 731 entropy->restarts_to_go--; 732 } 733 734 return TRUE; 735 } 736 737 738 /* 739 * MCU encoding for DC successive approximation refinement scan. 740 * Note: we assume such scans can be multi-component, although the spec 741 * is not very clear on the point. 742 */ 743 744 METHODDEF(boolean) 745 encode_mcu_DC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data) 746 { 747 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; 748 register int temp; 749 int blkn; 750 int Al = cinfo->Al; 751 JBLOCKROW block; 752 753 entropy->next_output_byte = cinfo->dest->next_output_byte; 754 entropy->free_in_buffer = cinfo->dest->free_in_buffer; 755 756 /* Emit restart marker if needed */ 757 if (cinfo->restart_interval) 758 if (entropy->restarts_to_go == 0) 759 emit_restart(entropy, entropy->next_restart_num); 760 761 /* Encode the MCU data blocks */ 762 for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { 763 block = MCU_data[blkn]; 764 765 /* We simply emit the Al'th bit of the DC coefficient value. */ 766 temp = (*block)[0]; 767 emit_bits(entropy, (unsigned int)(temp >> Al), 1); 768 } 769 770 cinfo->dest->next_output_byte = entropy->next_output_byte; 771 cinfo->dest->free_in_buffer = entropy->free_in_buffer; 772 773 /* Update restart-interval state too */ 774 if (cinfo->restart_interval) { 775 if (entropy->restarts_to_go == 0) { 776 entropy->restarts_to_go = cinfo->restart_interval; 777 entropy->next_restart_num++; 778 entropy->next_restart_num &= 7; 779 } 780 entropy->restarts_to_go--; 781 } 782 783 return TRUE; 784 } 785 786 787 /* 788 * Data preparation for encode_mcu_AC_refine(). 789 */ 790 791 #define COMPUTE_ABSVALUES_AC_REFINE(Sl, koffset) { \ 792 /* It is convenient to make a pre-pass to determine the transformed \ 793 * coefficients' absolute values and the EOB position. \ 794 */ \ 795 for (k = 0; k < Sl; k++) { \ 796 temp = block[jpeg_natural_order_start[k]]; \ 797 /* We must apply the point transform by Al. For AC coefficients this \ 798 * is an integer division with rounding towards 0. To do this portably \ 799 * in C, we shift after obtaining the absolute value. \ 800 */ \ 801 temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \ 802 temp ^= temp2; \ 803 temp -= temp2; /* temp is abs value of input */ \ 804 temp >>= Al; /* apply the point transform */ \ 805 if (temp != 0) { \ 806 zerobits |= ((size_t)1U) << k; \ 807 signbits |= ((size_t)(temp2 + 1)) << k; \ 808 } \ 809 absvalues[k] = (JCOEF)temp; /* save abs value for main pass */ \ 810 if (temp == 1) \ 811 EOB = k + koffset; /* EOB = index of last newly-nonzero coef */ \ 812 } \ 813 } 814 815 METHODDEF(int) 816 encode_mcu_AC_refine_prepare(const JCOEF *block, 817 const int *jpeg_natural_order_start, int Sl, 818 int Al, JCOEF *absvalues, size_t *bits) 819 { 820 register int k, temp, temp2; 821 int EOB = 0; 822 size_t zerobits = 0U, signbits = 0U; 823 int Sl0 = Sl; 824 825 #if SIZEOF_SIZE_T == 4 826 if (Sl0 > 32) 827 Sl0 = 32; 828 #endif 829 830 COMPUTE_ABSVALUES_AC_REFINE(Sl0, 0); 831 832 bits[0] = zerobits; 833 #if SIZEOF_SIZE_T == 8 834 bits[1] = signbits; 835 #else 836 bits[2] = signbits; 837 838 zerobits = 0U; 839 signbits = 0U; 840 841 if (Sl > 32) { 842 Sl -= 32; 843 jpeg_natural_order_start += 32; 844 absvalues += 32; 845 846 COMPUTE_ABSVALUES_AC_REFINE(Sl, 32); 847 } 848 849 bits[1] = zerobits; 850 bits[3] = signbits; 851 #endif 852 853 return EOB; 854 } 855 856 857 /* 858 * MCU encoding for AC successive approximation refinement scan. 859 */ 860 861 #define ENCODE_COEFS_AC_REFINE(label) { \ 862 while (zerobits) { \ 863 int idx = count_zeroes(&zerobits); \ 864 r += idx; \ 865 cabsvalue += idx; \ 866 signbits >>= idx; \ 867 label \ 868 /* Emit any required ZRLs, but not if they can be folded into EOB */ \ 869 while (r > 15 && (cabsvalue <= EOBPTR)) { \ 870 /* emit any pending EOBRUN and the BE correction bits */ \ 871 emit_eobrun(entropy); \ 872 /* Emit ZRL */ \ 873 emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \ 874 r -= 16; \ 875 /* Emit buffered correction bits that must be associated with ZRL */ \ 876 emit_buffered_bits(entropy, BR_buffer, BR); \ 877 BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \ 878 BR = 0; \ 879 } \ 880 \ 881 temp = *cabsvalue++; \ 882 \ 883 /* If the coef was previously nonzero, it only needs a correction bit. \ 884 * NOTE: a straight translation of the spec's figure G.7 would suggest \ 885 * that we also need to test r > 15. But if r > 15, we can only get here \ 886 * if k > EOB, which implies that this coefficient is not 1. \ 887 */ \ 888 if (temp > 1) { \ 889 /* The correction bit is the next bit of the absolute value. */ \ 890 BR_buffer[BR++] = (char)(temp & 1); \ 891 signbits >>= 1; \ 892 zerobits >>= 1; \ 893 continue; \ 894 } \ 895 \ 896 /* Emit any pending EOBRUN and the BE correction bits */ \ 897 emit_eobrun(entropy); \ 898 \ 899 /* Count/emit Huffman symbol for run length / number of bits */ \ 900 emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + 1); \ 901 \ 902 /* Emit output bit for newly-nonzero coef */ \ 903 temp = signbits & 1; /* ((*block)[jpeg_natural_order_start[k]] < 0) ? 0 : 1 */ \ 904 emit_bits(entropy, (unsigned int)temp, 1); \ 905 \ 906 /* Emit buffered correction bits that must be associated with this code */ \ 907 emit_buffered_bits(entropy, BR_buffer, BR); \ 908 BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \ 909 BR = 0; \ 910 r = 0; /* reset zero run length */ \ 911 signbits >>= 1; \ 912 zerobits >>= 1; \ 913 } \ 914 } 915 916 METHODDEF(boolean) 917 encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data) 918 { 919 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; 920 register int temp, r; 921 char *BR_buffer; 922 unsigned int BR; 923 int Sl = cinfo->Se - cinfo->Ss + 1; 924 int Al = cinfo->Al; 925 JCOEF absvalues_unaligned[DCTSIZE2 + 15]; 926 JCOEF *absvalues; 927 const JCOEF *cabsvalue, *EOBPTR; 928 size_t zerobits, signbits; 929 size_t bits[16 / SIZEOF_SIZE_T]; 930 931 entropy->next_output_byte = cinfo->dest->next_output_byte; 932 entropy->free_in_buffer = cinfo->dest->free_in_buffer; 933 934 /* Emit restart marker if needed */ 935 if (cinfo->restart_interval) 936 if (entropy->restarts_to_go == 0) 937 emit_restart(entropy, entropy->next_restart_num); 938 939 #ifdef WITH_SIMD 940 cabsvalue = absvalues = (JCOEF *)PAD((size_t)absvalues_unaligned, 16); 941 #else 942 /* Not using SIMD, so alignment is not needed */ 943 cabsvalue = absvalues = absvalues_unaligned; 944 #endif 945 946 /* Prepare data */ 947 EOBPTR = absvalues + 948 entropy->AC_refine_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss, 949 Sl, Al, absvalues, bits); 950 951 /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */ 952 953 r = 0; /* r = run length of zeros */ 954 BR = 0; /* BR = count of buffered bits added now */ 955 BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */ 956 957 zerobits = bits[0]; 958 #if SIZEOF_SIZE_T == 8 959 signbits = bits[1]; 960 #else 961 signbits = bits[2]; 962 #endif 963 ENCODE_COEFS_AC_REFINE((void)0;); 964 965 #if SIZEOF_SIZE_T == 4 966 zerobits = bits[1]; 967 signbits = bits[3]; 968 969 if (zerobits) { 970 int diff = ((absvalues + DCTSIZE2 / 2) - cabsvalue); 971 int idx = count_zeroes(&zerobits); 972 signbits >>= idx; 973 idx += diff; 974 r += idx; 975 cabsvalue += idx; 976 goto first_iter_ac_refine; 977 } 978 979 ENCODE_COEFS_AC_REFINE(first_iter_ac_refine:); 980 #endif 981 982 r |= (int)((absvalues + Sl) - cabsvalue); 983 984 if (r > 0 || BR > 0) { /* If there are trailing zeroes, */ 985 entropy->EOBRUN++; /* count an EOB */ 986 entropy->BE += BR; /* concat my correction bits to older ones */ 987 /* We force out the EOB if we risk either: 988 * 1. overflow of the EOB counter; 989 * 2. overflow of the correction bit buffer during the next MCU. 990 */ 991 if (entropy->EOBRUN == 0x7FFF || 992 entropy->BE > (MAX_CORR_BITS - DCTSIZE2 + 1)) 993 emit_eobrun(entropy); 994 } 995 996 cinfo->dest->next_output_byte = entropy->next_output_byte; 997 cinfo->dest->free_in_buffer = entropy->free_in_buffer; 998 999 /* Update restart-interval state too */ 1000 if (cinfo->restart_interval) { 1001 if (entropy->restarts_to_go == 0) { 1002 entropy->restarts_to_go = cinfo->restart_interval; 1003 entropy->next_restart_num++; 1004 entropy->next_restart_num &= 7; 1005 } 1006 entropy->restarts_to_go--; 1007 } 1008 1009 return TRUE; 1010 } 1011 1012 1013 /* 1014 * Finish up at the end of a Huffman-compressed progressive scan. 1015 */ 1016 1017 METHODDEF(void) 1018 finish_pass_phuff(j_compress_ptr cinfo) 1019 { 1020 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; 1021 1022 entropy->next_output_byte = cinfo->dest->next_output_byte; 1023 entropy->free_in_buffer = cinfo->dest->free_in_buffer; 1024 1025 /* Flush out any buffered data */ 1026 emit_eobrun(entropy); 1027 flush_bits(entropy); 1028 1029 cinfo->dest->next_output_byte = entropy->next_output_byte; 1030 cinfo->dest->free_in_buffer = entropy->free_in_buffer; 1031 } 1032 1033 1034 /* 1035 * Finish up a statistics-gathering pass and create the new Huffman tables. 1036 */ 1037 1038 METHODDEF(void) 1039 finish_pass_gather_phuff(j_compress_ptr cinfo) 1040 { 1041 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; 1042 boolean is_DC_band; 1043 int ci, tbl; 1044 jpeg_component_info *compptr; 1045 JHUFF_TBL **htblptr; 1046 boolean did[NUM_HUFF_TBLS]; 1047 1048 /* Flush out buffered data (all we care about is counting the EOB symbol) */ 1049 emit_eobrun(entropy); 1050 1051 is_DC_band = (cinfo->Ss == 0); 1052 1053 /* It's important not to apply jpeg_gen_optimal_table more than once 1054 * per table, because it clobbers the input frequency counts! 1055 */ 1056 MEMZERO(did, sizeof(did)); 1057 1058 for (ci = 0; ci < cinfo->comps_in_scan; ci++) { 1059 compptr = cinfo->cur_comp_info[ci]; 1060 if (is_DC_band) { 1061 if (cinfo->Ah != 0) /* DC refinement needs no table */ 1062 continue; 1063 tbl = compptr->dc_tbl_no; 1064 } else { 1065 tbl = compptr->ac_tbl_no; 1066 } 1067 if (!did[tbl]) { 1068 if (is_DC_band) 1069 htblptr = &cinfo->dc_huff_tbl_ptrs[tbl]; 1070 else 1071 htblptr = &cinfo->ac_huff_tbl_ptrs[tbl]; 1072 if (*htblptr == NULL) 1073 *htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo); 1074 jpeg_gen_optimal_table(cinfo, *htblptr, entropy->count_ptrs[tbl]); 1075 did[tbl] = TRUE; 1076 } 1077 } 1078 } 1079 1080 1081 /* 1082 * Module initialization routine for progressive Huffman entropy encoding. 1083 */ 1084 1085 GLOBAL(void) 1086 jinit_phuff_encoder(j_compress_ptr cinfo) 1087 { 1088 phuff_entropy_ptr entropy; 1089 int i; 1090 1091 entropy = (phuff_entropy_ptr) 1092 (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, 1093 sizeof(phuff_entropy_encoder)); 1094 cinfo->entropy = (struct jpeg_entropy_encoder *)entropy; 1095 entropy->pub.start_pass = start_pass_phuff; 1096 1097 /* Mark tables unallocated */ 1098 for (i = 0; i < NUM_HUFF_TBLS; i++) { 1099 entropy->derived_tbls[i] = NULL; 1100 entropy->count_ptrs[i] = NULL; 1101 } 1102 entropy->bit_buffer = NULL; /* needed only in AC refinement scan */ 1103 } 1104 1105 #endif /* C_PROGRESSIVE_SUPPORTED */ 1106