1 /* 2 * jchuff.c 3 * 4 * This file was part of the Independent JPEG Group's software: 5 * Copyright (C) 1991-1997, Thomas G. Lane. 6 * libjpeg-turbo Modifications: 7 * Copyright (C) 2009-2011, D. R. Commander. 8 * For conditions of distribution and use, see the accompanying README file. 9 * 10 * This file contains Huffman entropy encoding routines. 11 * 12 * Much of the complexity here has to do with supporting output suspension. 13 * If the data destination module demands suspension, we want to be able to 14 * back up to the start of the current MCU. To do this, we copy state 15 * variables into local working storage, and update them back to the 16 * permanent JPEG objects only upon successful completion of an MCU. 17 */ 18 19 #define JPEG_INTERNALS 20 #include "jinclude.h" 21 #include "jpeglib.h" 22 #include "jchuff.h" /* Declarations shared with jcphuff.c */ 23 #include <limits.h> 24 25 /* 26 * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be 27 * used for bit counting rather than the lookup table. This will reduce the 28 * memory footprint by 64k, which is important for some mobile applications 29 * that create many isolated instances of libjpeg-turbo (web browsers, for 30 * instance.) This may improve performance on some mobile platforms as well. 31 * This feature is enabled by default only on ARM processors, because some x86 32 * chips have a slow implementation of bsr, and the use of clz/bsr cannot be 33 * shown to have a significant performance impact even on the x86 chips that 34 * have a fast implementation of it. When building for ARMv6, you can 35 * explicitly disable the use of clz/bsr by adding -mthumb to the compiler 36 * flags (this defines __thumb__). 37 */ 38 39 /* NOTE: Both GCC and Clang define __GNUC__ */ 40 #if defined __GNUC__ && defined __arm__ 41 #if !defined __thumb__ || defined __thumb2__ 42 #define USE_CLZ_INTRINSIC 43 #endif 44 #endif 45 46 #ifdef USE_CLZ_INTRINSIC 47 #define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x)) 48 #define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0) 49 #else 50 static unsigned char jpeg_nbits_table[65536]; 51 static int jpeg_nbits_table_init = 0; 52 #define JPEG_NBITS(x) (jpeg_nbits_table[x]) 53 #define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x) 54 #endif 55 56 #ifndef min 57 #define min(a,b) ((a)<(b)?(a):(b)) 58 #endif 59 60 61 /* Expanded entropy encoder object for Huffman encoding. 62 * 63 * The savable_state subrecord contains fields that change within an MCU, 64 * but must not be updated permanently until we complete the MCU. 65 */ 66 67 typedef struct { 68 size_t put_buffer; /* current bit-accumulation buffer */ 69 int put_bits; /* # of bits now in it */ 70 int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ 71 } savable_state; 72 73 /* This macro is to work around compilers with missing or broken 74 * structure assignment. You'll need to fix this code if you have 75 * such a compiler and you change MAX_COMPS_IN_SCAN. 76 */ 77 78 #ifndef NO_STRUCT_ASSIGN 79 #define ASSIGN_STATE(dest,src) ((dest) = (src)) 80 #else 81 #if MAX_COMPS_IN_SCAN == 4 82 #define ASSIGN_STATE(dest,src) \ 83 ((dest).put_buffer = (src).put_buffer, \ 84 (dest).put_bits = (src).put_bits, \ 85 (dest).last_dc_val[0] = (src).last_dc_val[0], \ 86 (dest).last_dc_val[1] = (src).last_dc_val[1], \ 87 (dest).last_dc_val[2] = (src).last_dc_val[2], \ 88 (dest).last_dc_val[3] = (src).last_dc_val[3]) 89 #endif 90 #endif 91 92 93 typedef struct { 94 struct jpeg_entropy_encoder pub; /* public fields */ 95 96 savable_state saved; /* Bit buffer & DC state at start of MCU */ 97 98 /* These fields are NOT loaded into local working state. */ 99 unsigned int restarts_to_go; /* MCUs left in this restart interval */ 100 int next_restart_num; /* next restart number to write (0-7) */ 101 102 /* Pointers to derived tables (these workspaces have image lifespan) */ 103 c_derived_tbl * dc_derived_tbls[NUM_HUFF_TBLS]; 104 c_derived_tbl * ac_derived_tbls[NUM_HUFF_TBLS]; 105 106 #ifdef ENTROPY_OPT_SUPPORTED /* Statistics tables for optimization */ 107 long * dc_count_ptrs[NUM_HUFF_TBLS]; 108 long * ac_count_ptrs[NUM_HUFF_TBLS]; 109 #endif 110 } huff_entropy_encoder; 111 112 typedef huff_entropy_encoder * huff_entropy_ptr; 113 114 /* Working state while writing an MCU. 115 * This struct contains all the fields that are needed by subroutines. 116 */ 117 118 typedef struct { 119 JOCTET * next_output_byte; /* => next byte to write in buffer */ 120 size_t free_in_buffer; /* # of byte spaces remaining in buffer */ 121 savable_state cur; /* Current bit buffer & DC state */ 122 j_compress_ptr cinfo; /* dump_buffer needs access to this */ 123 } working_state; 124 125 126 /* Forward declarations */ 127 METHODDEF(boolean) encode_mcu_huff JPP((j_compress_ptr cinfo, 128 JBLOCKROW *MCU_data)); 129 METHODDEF(void) finish_pass_huff JPP((j_compress_ptr cinfo)); 130 #ifdef ENTROPY_OPT_SUPPORTED 131 METHODDEF(boolean) encode_mcu_gather JPP((j_compress_ptr cinfo, 132 JBLOCKROW *MCU_data)); 133 METHODDEF(void) finish_pass_gather JPP((j_compress_ptr cinfo)); 134 #endif 135 136 137 /* 138 * Initialize for a Huffman-compressed scan. 139 * If gather_statistics is TRUE, we do not output anything during the scan, 140 * just count the Huffman symbols used and generate Huffman code tables. 141 */ 142 143 METHODDEF(void) 144 start_pass_huff (j_compress_ptr cinfo, boolean gather_statistics) 145 { 146 huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; 147 int ci, dctbl, actbl; 148 jpeg_component_info * compptr; 149 150 if (gather_statistics) { 151 #ifdef ENTROPY_OPT_SUPPORTED 152 entropy->pub.encode_mcu = encode_mcu_gather; 153 entropy->pub.finish_pass = finish_pass_gather; 154 #else 155 ERREXIT(cinfo, JERR_NOT_COMPILED); 156 #endif 157 } else { 158 entropy->pub.encode_mcu = encode_mcu_huff; 159 entropy->pub.finish_pass = finish_pass_huff; 160 } 161 162 for (ci = 0; ci < cinfo->comps_in_scan; ci++) { 163 compptr = cinfo->cur_comp_info[ci]; 164 dctbl = compptr->dc_tbl_no; 165 actbl = compptr->ac_tbl_no; 166 if (gather_statistics) { 167 #ifdef ENTROPY_OPT_SUPPORTED 168 /* Check for invalid table indexes */ 169 /* (make_c_derived_tbl does this in the other path) */ 170 if (dctbl < 0 || dctbl >= NUM_HUFF_TBLS) 171 ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl); 172 if (actbl < 0 || actbl >= NUM_HUFF_TBLS) 173 ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, actbl); 174 /* Allocate and zero the statistics tables */ 175 /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */ 176 if (entropy->dc_count_ptrs[dctbl] == NULL) 177 entropy->dc_count_ptrs[dctbl] = (long *) 178 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 179 257 * SIZEOF(long)); 180 MEMZERO(entropy->dc_count_ptrs[dctbl], 257 * SIZEOF(long)); 181 if (entropy->ac_count_ptrs[actbl] == NULL) 182 entropy->ac_count_ptrs[actbl] = (long *) 183 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 184 257 * SIZEOF(long)); 185 MEMZERO(entropy->ac_count_ptrs[actbl], 257 * SIZEOF(long)); 186 #endif 187 } else { 188 /* Compute derived values for Huffman tables */ 189 /* We may do this more than once for a table, but it's not expensive */ 190 jpeg_make_c_derived_tbl(cinfo, TRUE, dctbl, 191 & entropy->dc_derived_tbls[dctbl]); 192 jpeg_make_c_derived_tbl(cinfo, FALSE, actbl, 193 & entropy->ac_derived_tbls[actbl]); 194 } 195 /* Initialize DC predictions to 0 */ 196 entropy->saved.last_dc_val[ci] = 0; 197 } 198 199 /* Initialize bit buffer to empty */ 200 entropy->saved.put_buffer = 0; 201 entropy->saved.put_bits = 0; 202 203 /* Initialize restart stuff */ 204 entropy->restarts_to_go = cinfo->restart_interval; 205 entropy->next_restart_num = 0; 206 } 207 208 209 /* 210 * Compute the derived values for a Huffman table. 211 * This routine also performs some validation checks on the table. 212 * 213 * Note this is also used by jcphuff.c. 214 */ 215 216 GLOBAL(void) 217 jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno, 218 c_derived_tbl ** pdtbl) 219 { 220 JHUFF_TBL *htbl; 221 c_derived_tbl *dtbl; 222 int p, i, l, lastp, si, maxsymbol; 223 char huffsize[257]; 224 unsigned int huffcode[257]; 225 unsigned int code; 226 227 /* Note that huffsize[] and huffcode[] are filled in code-length order, 228 * paralleling the order of the symbols themselves in htbl->huffval[]. 229 */ 230 231 /* Find the input Huffman table */ 232 if (tblno < 0 || tblno >= NUM_HUFF_TBLS) 233 ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno); 234 htbl = 235 isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno]; 236 if (htbl == NULL) 237 ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno); 238 239 /* Allocate a workspace if we haven't already done so. */ 240 if (*pdtbl == NULL) 241 *pdtbl = (c_derived_tbl *) 242 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 243 SIZEOF(c_derived_tbl)); 244 dtbl = *pdtbl; 245 246 /* Figure C.1: make table of Huffman code length for each symbol */ 247 248 p = 0; 249 for (l = 1; l <= 16; l++) { 250 i = (int) htbl->bits[l]; 251 if (i < 0 || p + i > 256) /* protect against table overrun */ 252 ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); 253 while (i--) 254 huffsize[p++] = (char) l; 255 } 256 huffsize[p] = 0; 257 lastp = p; 258 259 /* Figure C.2: generate the codes themselves */ 260 /* We also validate that the counts represent a legal Huffman code tree. */ 261 262 code = 0; 263 si = huffsize[0]; 264 p = 0; 265 while (huffsize[p]) { 266 while (((int) huffsize[p]) == si) { 267 huffcode[p++] = code; 268 code++; 269 } 270 /* code is now 1 more than the last code used for codelength si; but 271 * it must still fit in si bits, since no code is allowed to be all ones. 272 */ 273 if (((INT32) code) >= (((INT32) 1) << si)) 274 ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); 275 code <<= 1; 276 si++; 277 } 278 279 /* Figure C.3: generate encoding tables */ 280 /* These are code and size indexed by symbol value */ 281 282 /* Set all codeless symbols to have code length 0; 283 * this lets us detect duplicate VAL entries here, and later 284 * allows emit_bits to detect any attempt to emit such symbols. 285 */ 286 MEMZERO(dtbl->ehufsi, SIZEOF(dtbl->ehufsi)); 287 288 /* This is also a convenient place to check for out-of-range 289 * and duplicated VAL entries. We allow 0..255 for AC symbols 290 * but only 0..15 for DC. (We could constrain them further 291 * based on data depth and mode, but this seems enough.) 292 */ 293 maxsymbol = isDC ? 15 : 255; 294 295 for (p = 0; p < lastp; p++) { 296 i = htbl->huffval[p]; 297 if (i < 0 || i > maxsymbol || dtbl->ehufsi[i]) 298 ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); 299 dtbl->ehufco[i] = huffcode[p]; 300 dtbl->ehufsi[i] = huffsize[p]; 301 } 302 303 #ifndef USE_CLZ_INTRINSIC 304 if(!jpeg_nbits_table_init) { 305 for(i = 0; i < 65536; i++) { 306 int nbits = 0, temp = i; 307 while (temp) {temp >>= 1; nbits++;} 308 jpeg_nbits_table[i] = nbits; 309 } 310 jpeg_nbits_table_init = 1; 311 } 312 #endif 313 } 314 315 316 /* Outputting bytes to the file */ 317 318 /* Emit a byte, taking 'action' if must suspend. */ 319 #define emit_byte(state,val,action) \ 320 { *(state)->next_output_byte++ = (JOCTET) (val); \ 321 if (--(state)->free_in_buffer == 0) \ 322 if (! dump_buffer(state)) \ 323 { action; } } 324 325 326 LOCAL(boolean) 327 dump_buffer (working_state * state) 328 /* Empty the output buffer; return TRUE if successful, FALSE if must suspend */ 329 { 330 struct jpeg_destination_mgr * dest = state->cinfo->dest; 331 332 if (! (*dest->empty_output_buffer) (state->cinfo)) 333 return FALSE; 334 /* After a successful buffer dump, must reset buffer pointers */ 335 state->next_output_byte = dest->next_output_byte; 336 state->free_in_buffer = dest->free_in_buffer; 337 return TRUE; 338 } 339 340 341 /* Outputting bits to the file */ 342 343 /* These macros perform the same task as the emit_bits() function in the 344 * original libjpeg code. In addition to reducing overhead by explicitly 345 * inlining the code, additional performance is achieved by taking into 346 * account the size of the bit buffer and waiting until it is almost full 347 * before emptying it. This mostly benefits 64-bit platforms, since 6 348 * bytes can be stored in a 64-bit bit buffer before it has to be emptied. 349 */ 350 351 #define EMIT_BYTE() { \ 352 JOCTET c; \ 353 put_bits -= 8; \ 354 c = (JOCTET)GETJOCTET(put_buffer >> put_bits); \ 355 *buffer++ = c; \ 356 if (c == 0xFF) /* need to stuff a zero byte? */ \ 357 *buffer++ = 0; \ 358 } 359 360 #define PUT_BITS(code, size) { \ 361 put_bits += size; \ 362 put_buffer = (put_buffer << size) | code; \ 363 } 364 365 #define CHECKBUF15() { \ 366 if (put_bits > 15) { \ 367 EMIT_BYTE() \ 368 EMIT_BYTE() \ 369 } \ 370 } 371 372 #define CHECKBUF31() { \ 373 if (put_bits > 31) { \ 374 EMIT_BYTE() \ 375 EMIT_BYTE() \ 376 EMIT_BYTE() \ 377 EMIT_BYTE() \ 378 } \ 379 } 380 381 #define CHECKBUF47() { \ 382 if (put_bits > 47) { \ 383 EMIT_BYTE() \ 384 EMIT_BYTE() \ 385 EMIT_BYTE() \ 386 EMIT_BYTE() \ 387 EMIT_BYTE() \ 388 EMIT_BYTE() \ 389 } \ 390 } 391 392 #if __WORDSIZE==64 || defined(_WIN64) 393 394 #define EMIT_BITS(code, size) { \ 395 CHECKBUF47() \ 396 PUT_BITS(code, size) \ 397 } 398 399 #define EMIT_CODE(code, size) { \ 400 temp2 &= (((INT32) 1)<<nbits) - 1; \ 401 CHECKBUF31() \ 402 PUT_BITS(code, size) \ 403 PUT_BITS(temp2, nbits) \ 404 } 405 406 #else 407 408 #define EMIT_BITS(code, size) { \ 409 PUT_BITS(code, size) \ 410 CHECKBUF15() \ 411 } 412 413 #define EMIT_CODE(code, size) { \ 414 temp2 &= (((INT32) 1)<<nbits) - 1; \ 415 PUT_BITS(code, size) \ 416 CHECKBUF15() \ 417 PUT_BITS(temp2, nbits) \ 418 CHECKBUF15() \ 419 } 420 421 #endif 422 423 424 #define BUFSIZE (DCTSIZE2 * 2) 425 426 #define LOAD_BUFFER() { \ 427 if (state->free_in_buffer < BUFSIZE) { \ 428 localbuf = 1; \ 429 buffer = _buffer; \ 430 } \ 431 else buffer = state->next_output_byte; \ 432 } 433 434 #define STORE_BUFFER() { \ 435 if (localbuf) { \ 436 bytes = buffer - _buffer; \ 437 buffer = _buffer; \ 438 while (bytes > 0) { \ 439 bytestocopy = min(bytes, state->free_in_buffer); \ 440 MEMCOPY(state->next_output_byte, buffer, bytestocopy); \ 441 state->next_output_byte += bytestocopy; \ 442 buffer += bytestocopy; \ 443 state->free_in_buffer -= bytestocopy; \ 444 if (state->free_in_buffer == 0) \ 445 if (! dump_buffer(state)) return FALSE; \ 446 bytes -= bytestocopy; \ 447 } \ 448 } \ 449 else { \ 450 state->free_in_buffer -= (buffer - state->next_output_byte); \ 451 state->next_output_byte = buffer; \ 452 } \ 453 } 454 455 456 LOCAL(boolean) 457 flush_bits (working_state * state) 458 { 459 JOCTET _buffer[BUFSIZE], *buffer; 460 size_t put_buffer; int put_bits; 461 size_t bytes, bytestocopy; int localbuf = 0; 462 463 put_buffer = state->cur.put_buffer; 464 put_bits = state->cur.put_bits; 465 LOAD_BUFFER() 466 467 /* fill any partial byte with ones */ 468 PUT_BITS(0x7F, 7) 469 while (put_bits >= 8) EMIT_BYTE() 470 471 state->cur.put_buffer = 0; /* and reset bit-buffer to empty */ 472 state->cur.put_bits = 0; 473 STORE_BUFFER() 474 475 return TRUE; 476 } 477 478 479 /* Encode a single block's worth of coefficients */ 480 481 LOCAL(boolean) 482 encode_one_block (working_state * state, JCOEFPTR block, int last_dc_val, 483 c_derived_tbl *dctbl, c_derived_tbl *actbl) 484 { 485 int temp, temp2, temp3; 486 int nbits; 487 int r, code, size; 488 JOCTET _buffer[BUFSIZE], *buffer; 489 size_t put_buffer; int put_bits; 490 int code_0xf0 = actbl->ehufco[0xf0], size_0xf0 = actbl->ehufsi[0xf0]; 491 size_t bytes, bytestocopy; int localbuf = 0; 492 493 put_buffer = state->cur.put_buffer; 494 put_bits = state->cur.put_bits; 495 LOAD_BUFFER() 496 497 /* Encode the DC coefficient difference per section F.1.2.1 */ 498 499 temp = temp2 = block[0] - last_dc_val; 500 501 /* This is a well-known technique for obtaining the absolute value without a 502 * branch. It is derived from an assembly language technique presented in 503 * "How to Optimize for the Pentium Processors", Copyright (c) 1996, 1997 by 504 * Agner Fog. 505 */ 506 temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); 507 temp ^= temp3; 508 temp -= temp3; 509 510 /* For a negative input, want temp2 = bitwise complement of abs(input) */ 511 /* This code assumes we are on a two's complement machine */ 512 temp2 += temp3; 513 514 /* Find the number of bits needed for the magnitude of the coefficient */ 515 nbits = JPEG_NBITS(temp); 516 517 /* Emit the Huffman-coded symbol for the number of bits */ 518 code = dctbl->ehufco[nbits]; 519 size = dctbl->ehufsi[nbits]; 520 PUT_BITS(code, size) 521 CHECKBUF15() 522 523 /* Mask off any extra bits in code */ 524 temp2 &= (((INT32) 1)<<nbits) - 1; 525 526 /* Emit that number of bits of the value, if positive, */ 527 /* or the complement of its magnitude, if negative. */ 528 PUT_BITS(temp2, nbits) 529 CHECKBUF15() 530 531 /* Encode the AC coefficients per section F.1.2.2 */ 532 533 r = 0; /* r = run length of zeros */ 534 535 /* Manually unroll the k loop to eliminate the counter variable. This 536 * improves performance greatly on systems with a limited number of 537 * registers (such as x86.) 538 */ 539 #define kloop(jpeg_natural_order_of_k) { \ 540 if ((temp = block[jpeg_natural_order_of_k]) == 0) { \ 541 r++; \ 542 } else { \ 543 temp2 = temp; \ 544 /* Branch-less absolute value, bitwise complement, etc., same as above */ \ 545 temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); \ 546 temp ^= temp3; \ 547 temp -= temp3; \ 548 temp2 += temp3; \ 549 nbits = JPEG_NBITS_NONZERO(temp); \ 550 /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \ 551 while (r > 15) { \ 552 EMIT_BITS(code_0xf0, size_0xf0) \ 553 r -= 16; \ 554 } \ 555 /* Emit Huffman symbol for run length / number of bits */ \ 556 temp3 = (r << 4) + nbits; \ 557 code = actbl->ehufco[temp3]; \ 558 size = actbl->ehufsi[temp3]; \ 559 EMIT_CODE(code, size) \ 560 r = 0; \ 561 } \ 562 } 563 564 /* One iteration for each value in jpeg_natural_order[] */ 565 kloop(1); kloop(8); kloop(16); kloop(9); kloop(2); kloop(3); 566 kloop(10); kloop(17); kloop(24); kloop(32); kloop(25); kloop(18); 567 kloop(11); kloop(4); kloop(5); kloop(12); kloop(19); kloop(26); 568 kloop(33); kloop(40); kloop(48); kloop(41); kloop(34); kloop(27); 569 kloop(20); kloop(13); kloop(6); kloop(7); kloop(14); kloop(21); 570 kloop(28); kloop(35); kloop(42); kloop(49); kloop(56); kloop(57); 571 kloop(50); kloop(43); kloop(36); kloop(29); kloop(22); kloop(15); 572 kloop(23); kloop(30); kloop(37); kloop(44); kloop(51); kloop(58); 573 kloop(59); kloop(52); kloop(45); kloop(38); kloop(31); kloop(39); 574 kloop(46); kloop(53); kloop(60); kloop(61); kloop(54); kloop(47); 575 kloop(55); kloop(62); kloop(63); 576 577 /* If the last coef(s) were zero, emit an end-of-block code */ 578 if (r > 0) { 579 code = actbl->ehufco[0]; 580 size = actbl->ehufsi[0]; 581 EMIT_BITS(code, size) 582 } 583 584 state->cur.put_buffer = put_buffer; 585 state->cur.put_bits = put_bits; 586 STORE_BUFFER() 587 588 return TRUE; 589 } 590 591 592 /* 593 * Emit a restart marker & resynchronize predictions. 594 */ 595 596 LOCAL(boolean) 597 emit_restart (working_state * state, int restart_num) 598 { 599 int ci; 600 601 if (! flush_bits(state)) 602 return FALSE; 603 604 emit_byte(state, 0xFF, return FALSE); 605 emit_byte(state, JPEG_RST0 + restart_num, return FALSE); 606 607 /* Re-initialize DC predictions to 0 */ 608 for (ci = 0; ci < state->cinfo->comps_in_scan; ci++) 609 state->cur.last_dc_val[ci] = 0; 610 611 /* The restart counter is not updated until we successfully write the MCU. */ 612 613 return TRUE; 614 } 615 616 617 /* 618 * Encode and output one MCU's worth of Huffman-compressed coefficients. 619 */ 620 621 METHODDEF(boolean) 622 encode_mcu_huff (j_compress_ptr cinfo, JBLOCKROW *MCU_data) 623 { 624 huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; 625 working_state state; 626 int blkn, ci; 627 jpeg_component_info * compptr; 628 629 /* Load up working state */ 630 state.next_output_byte = cinfo->dest->next_output_byte; 631 state.free_in_buffer = cinfo->dest->free_in_buffer; 632 ASSIGN_STATE(state.cur, entropy->saved); 633 state.cinfo = cinfo; 634 635 /* Emit restart marker if needed */ 636 if (cinfo->restart_interval) { 637 if (entropy->restarts_to_go == 0) 638 if (! emit_restart(&state, entropy->next_restart_num)) 639 return FALSE; 640 } 641 642 /* Encode the MCU data blocks */ 643 for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { 644 ci = cinfo->MCU_membership[blkn]; 645 compptr = cinfo->cur_comp_info[ci]; 646 if (! encode_one_block(&state, 647 MCU_data[blkn][0], state.cur.last_dc_val[ci], 648 entropy->dc_derived_tbls[compptr->dc_tbl_no], 649 entropy->ac_derived_tbls[compptr->ac_tbl_no])) 650 return FALSE; 651 /* Update last_dc_val */ 652 state.cur.last_dc_val[ci] = MCU_data[blkn][0][0]; 653 } 654 655 /* Completed MCU, so update state */ 656 cinfo->dest->next_output_byte = state.next_output_byte; 657 cinfo->dest->free_in_buffer = state.free_in_buffer; 658 ASSIGN_STATE(entropy->saved, state.cur); 659 660 /* Update restart-interval state too */ 661 if (cinfo->restart_interval) { 662 if (entropy->restarts_to_go == 0) { 663 entropy->restarts_to_go = cinfo->restart_interval; 664 entropy->next_restart_num++; 665 entropy->next_restart_num &= 7; 666 } 667 entropy->restarts_to_go--; 668 } 669 670 return TRUE; 671 } 672 673 674 /* 675 * Finish up at the end of a Huffman-compressed scan. 676 */ 677 678 METHODDEF(void) 679 finish_pass_huff (j_compress_ptr cinfo) 680 { 681 huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; 682 working_state state; 683 684 /* Load up working state ... flush_bits needs it */ 685 state.next_output_byte = cinfo->dest->next_output_byte; 686 state.free_in_buffer = cinfo->dest->free_in_buffer; 687 ASSIGN_STATE(state.cur, entropy->saved); 688 state.cinfo = cinfo; 689 690 /* Flush out the last data */ 691 if (! flush_bits(&state)) 692 ERREXIT(cinfo, JERR_CANT_SUSPEND); 693 694 /* Update state */ 695 cinfo->dest->next_output_byte = state.next_output_byte; 696 cinfo->dest->free_in_buffer = state.free_in_buffer; 697 ASSIGN_STATE(entropy->saved, state.cur); 698 } 699 700 701 /* 702 * Huffman coding optimization. 703 * 704 * We first scan the supplied data and count the number of uses of each symbol 705 * that is to be Huffman-coded. (This process MUST agree with the code above.) 706 * Then we build a Huffman coding tree for the observed counts. 707 * Symbols which are not needed at all for the particular image are not 708 * assigned any code, which saves space in the DHT marker as well as in 709 * the compressed data. 710 */ 711 712 #ifdef ENTROPY_OPT_SUPPORTED 713 714 715 /* Process a single block's worth of coefficients */ 716 717 LOCAL(void) 718 htest_one_block (j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val, 719 long dc_counts[], long ac_counts[]) 720 { 721 register int temp; 722 register int nbits; 723 register int k, r; 724 725 /* Encode the DC coefficient difference per section F.1.2.1 */ 726 727 temp = block[0] - last_dc_val; 728 if (temp < 0) 729 temp = -temp; 730 731 /* Find the number of bits needed for the magnitude of the coefficient */ 732 nbits = 0; 733 while (temp) { 734 nbits++; 735 temp >>= 1; 736 } 737 /* Check for out-of-range coefficient values. 738 * Since we're encoding a difference, the range limit is twice as much. 739 */ 740 if (nbits > MAX_COEF_BITS+1) 741 ERREXIT(cinfo, JERR_BAD_DCT_COEF); 742 743 /* Count the Huffman symbol for the number of bits */ 744 dc_counts[nbits]++; 745 746 /* Encode the AC coefficients per section F.1.2.2 */ 747 748 r = 0; /* r = run length of zeros */ 749 750 for (k = 1; k < DCTSIZE2; k++) { 751 if ((temp = block[jpeg_natural_order[k]]) == 0) { 752 r++; 753 } else { 754 /* if run length > 15, must emit special run-length-16 codes (0xF0) */ 755 while (r > 15) { 756 ac_counts[0xF0]++; 757 r -= 16; 758 } 759 760 /* Find the number of bits needed for the magnitude of the coefficient */ 761 if (temp < 0) 762 temp = -temp; 763 764 /* Find the number of bits needed for the magnitude of the coefficient */ 765 nbits = 1; /* there must be at least one 1 bit */ 766 while ((temp >>= 1)) 767 nbits++; 768 /* Check for out-of-range coefficient values */ 769 if (nbits > MAX_COEF_BITS) 770 ERREXIT(cinfo, JERR_BAD_DCT_COEF); 771 772 /* Count Huffman symbol for run length / number of bits */ 773 ac_counts[(r << 4) + nbits]++; 774 775 r = 0; 776 } 777 } 778 779 /* If the last coef(s) were zero, emit an end-of-block code */ 780 if (r > 0) 781 ac_counts[0]++; 782 } 783 784 785 /* 786 * Trial-encode one MCU's worth of Huffman-compressed coefficients. 787 * No data is actually output, so no suspension return is possible. 788 */ 789 790 METHODDEF(boolean) 791 encode_mcu_gather (j_compress_ptr cinfo, JBLOCKROW *MCU_data) 792 { 793 huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; 794 int blkn, ci; 795 jpeg_component_info * compptr; 796 797 /* Take care of restart intervals if needed */ 798 if (cinfo->restart_interval) { 799 if (entropy->restarts_to_go == 0) { 800 /* Re-initialize DC predictions to 0 */ 801 for (ci = 0; ci < cinfo->comps_in_scan; ci++) 802 entropy->saved.last_dc_val[ci] = 0; 803 /* Update restart state */ 804 entropy->restarts_to_go = cinfo->restart_interval; 805 } 806 entropy->restarts_to_go--; 807 } 808 809 for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { 810 ci = cinfo->MCU_membership[blkn]; 811 compptr = cinfo->cur_comp_info[ci]; 812 htest_one_block(cinfo, MCU_data[blkn][0], entropy->saved.last_dc_val[ci], 813 entropy->dc_count_ptrs[compptr->dc_tbl_no], 814 entropy->ac_count_ptrs[compptr->ac_tbl_no]); 815 entropy->saved.last_dc_val[ci] = MCU_data[blkn][0][0]; 816 } 817 818 return TRUE; 819 } 820 821 822 /* 823 * Generate the best Huffman code table for the given counts, fill htbl. 824 * Note this is also used by jcphuff.c. 825 * 826 * The JPEG standard requires that no symbol be assigned a codeword of all 827 * one bits (so that padding bits added at the end of a compressed segment 828 * can't look like a valid code). Because of the canonical ordering of 829 * codewords, this just means that there must be an unused slot in the 830 * longest codeword length category. Section K.2 of the JPEG spec suggests 831 * reserving such a slot by pretending that symbol 256 is a valid symbol 832 * with count 1. In theory that's not optimal; giving it count zero but 833 * including it in the symbol set anyway should give a better Huffman code. 834 * But the theoretically better code actually seems to come out worse in 835 * practice, because it produces more all-ones bytes (which incur stuffed 836 * zero bytes in the final file). In any case the difference is tiny. 837 * 838 * The JPEG standard requires Huffman codes to be no more than 16 bits long. 839 * If some symbols have a very small but nonzero probability, the Huffman tree 840 * must be adjusted to meet the code length restriction. We currently use 841 * the adjustment method suggested in JPEG section K.2. This method is *not* 842 * optimal; it may not choose the best possible limited-length code. But 843 * typically only very-low-frequency symbols will be given less-than-optimal 844 * lengths, so the code is almost optimal. Experimental comparisons against 845 * an optimal limited-length-code algorithm indicate that the difference is 846 * microscopic --- usually less than a hundredth of a percent of total size. 847 * So the extra complexity of an optimal algorithm doesn't seem worthwhile. 848 */ 849 850 GLOBAL(void) 851 jpeg_gen_optimal_table (j_compress_ptr cinfo, JHUFF_TBL * htbl, long freq[]) 852 { 853 #define MAX_CLEN 32 /* assumed maximum initial code length */ 854 UINT8 bits[MAX_CLEN+1]; /* bits[k] = # of symbols with code length k */ 855 int codesize[257]; /* codesize[k] = code length of symbol k */ 856 int others[257]; /* next symbol in current branch of tree */ 857 int c1, c2; 858 int p, i, j; 859 long v; 860 861 /* This algorithm is explained in section K.2 of the JPEG standard */ 862 863 MEMZERO(bits, SIZEOF(bits)); 864 MEMZERO(codesize, SIZEOF(codesize)); 865 for (i = 0; i < 257; i++) 866 others[i] = -1; /* init links to empty */ 867 868 freq[256] = 1; /* make sure 256 has a nonzero count */ 869 /* Including the pseudo-symbol 256 in the Huffman procedure guarantees 870 * that no real symbol is given code-value of all ones, because 256 871 * will be placed last in the largest codeword category. 872 */ 873 874 /* Huffman's basic algorithm to assign optimal code lengths to symbols */ 875 876 for (;;) { 877 /* Find the smallest nonzero frequency, set c1 = its symbol */ 878 /* In case of ties, take the larger symbol number */ 879 c1 = -1; 880 v = 1000000000L; 881 for (i = 0; i <= 256; i++) { 882 if (freq[i] && freq[i] <= v) { 883 v = freq[i]; 884 c1 = i; 885 } 886 } 887 888 /* Find the next smallest nonzero frequency, set c2 = its symbol */ 889 /* In case of ties, take the larger symbol number */ 890 c2 = -1; 891 v = 1000000000L; 892 for (i = 0; i <= 256; i++) { 893 if (freq[i] && freq[i] <= v && i != c1) { 894 v = freq[i]; 895 c2 = i; 896 } 897 } 898 899 /* Done if we've merged everything into one frequency */ 900 if (c2 < 0) 901 break; 902 903 /* Else merge the two counts/trees */ 904 freq[c1] += freq[c2]; 905 freq[c2] = 0; 906 907 /* Increment the codesize of everything in c1's tree branch */ 908 codesize[c1]++; 909 while (others[c1] >= 0) { 910 c1 = others[c1]; 911 codesize[c1]++; 912 } 913 914 others[c1] = c2; /* chain c2 onto c1's tree branch */ 915 916 /* Increment the codesize of everything in c2's tree branch */ 917 codesize[c2]++; 918 while (others[c2] >= 0) { 919 c2 = others[c2]; 920 codesize[c2]++; 921 } 922 } 923 924 /* Now count the number of symbols of each code length */ 925 for (i = 0; i <= 256; i++) { 926 if (codesize[i]) { 927 /* The JPEG standard seems to think that this can't happen, */ 928 /* but I'm paranoid... */ 929 if (codesize[i] > MAX_CLEN) 930 ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW); 931 932 bits[codesize[i]]++; 933 } 934 } 935 936 /* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure 937 * Huffman procedure assigned any such lengths, we must adjust the coding. 938 * Here is what the JPEG spec says about how this next bit works: 939 * Since symbols are paired for the longest Huffman code, the symbols are 940 * removed from this length category two at a time. The prefix for the pair 941 * (which is one bit shorter) is allocated to one of the pair; then, 942 * skipping the BITS entry for that prefix length, a code word from the next 943 * shortest nonzero BITS entry is converted into a prefix for two code words 944 * one bit longer. 945 */ 946 947 for (i = MAX_CLEN; i > 16; i--) { 948 while (bits[i] > 0) { 949 j = i - 2; /* find length of new prefix to be used */ 950 while (bits[j] == 0) 951 j--; 952 953 bits[i] -= 2; /* remove two symbols */ 954 bits[i-1]++; /* one goes in this length */ 955 bits[j+1] += 2; /* two new symbols in this length */ 956 bits[j]--; /* symbol of this length is now a prefix */ 957 } 958 } 959 960 /* Remove the count for the pseudo-symbol 256 from the largest codelength */ 961 while (bits[i] == 0) /* find largest codelength still in use */ 962 i--; 963 bits[i]--; 964 965 /* Return final symbol counts (only for lengths 0..16) */ 966 MEMCOPY(htbl->bits, bits, SIZEOF(htbl->bits)); 967 968 /* Return a list of the symbols sorted by code length */ 969 /* It's not real clear to me why we don't need to consider the codelength 970 * changes made above, but the JPEG spec seems to think this works. 971 */ 972 p = 0; 973 for (i = 1; i <= MAX_CLEN; i++) { 974 for (j = 0; j <= 255; j++) { 975 if (codesize[j] == i) { 976 htbl->huffval[p] = (UINT8) j; 977 p++; 978 } 979 } 980 } 981 982 /* Set sent_table FALSE so updated table will be written to JPEG file. */ 983 htbl->sent_table = FALSE; 984 } 985 986 987 /* 988 * Finish up a statistics-gathering pass and create the new Huffman tables. 989 */ 990 991 METHODDEF(void) 992 finish_pass_gather (j_compress_ptr cinfo) 993 { 994 huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; 995 int ci, dctbl, actbl; 996 jpeg_component_info * compptr; 997 JHUFF_TBL **htblptr; 998 boolean did_dc[NUM_HUFF_TBLS]; 999 boolean did_ac[NUM_HUFF_TBLS]; 1000 1001 /* It's important not to apply jpeg_gen_optimal_table more than once 1002 * per table, because it clobbers the input frequency counts! 1003 */ 1004 MEMZERO(did_dc, SIZEOF(did_dc)); 1005 MEMZERO(did_ac, SIZEOF(did_ac)); 1006 1007 for (ci = 0; ci < cinfo->comps_in_scan; ci++) { 1008 compptr = cinfo->cur_comp_info[ci]; 1009 dctbl = compptr->dc_tbl_no; 1010 actbl = compptr->ac_tbl_no; 1011 if (! did_dc[dctbl]) { 1012 htblptr = & cinfo->dc_huff_tbl_ptrs[dctbl]; 1013 if (*htblptr == NULL) 1014 *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); 1015 jpeg_gen_optimal_table(cinfo, *htblptr, entropy->dc_count_ptrs[dctbl]); 1016 did_dc[dctbl] = TRUE; 1017 } 1018 if (! did_ac[actbl]) { 1019 htblptr = & cinfo->ac_huff_tbl_ptrs[actbl]; 1020 if (*htblptr == NULL) 1021 *htblptr = jpeg_alloc_huff_table((j_common_ptr) cinfo); 1022 jpeg_gen_optimal_table(cinfo, *htblptr, entropy->ac_count_ptrs[actbl]); 1023 did_ac[actbl] = TRUE; 1024 } 1025 } 1026 } 1027 1028 1029 #endif /* ENTROPY_OPT_SUPPORTED */ 1030 1031 1032 /* 1033 * Module initialization routine for Huffman entropy encoding. 1034 */ 1035 1036 GLOBAL(void) 1037 jinit_huff_encoder (j_compress_ptr cinfo) 1038 { 1039 huff_entropy_ptr entropy; 1040 int i; 1041 1042 entropy = (huff_entropy_ptr) 1043 (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, 1044 SIZEOF(huff_entropy_encoder)); 1045 cinfo->entropy = (struct jpeg_entropy_encoder *) entropy; 1046 entropy->pub.start_pass = start_pass_huff; 1047 1048 /* Mark tables unallocated */ 1049 for (i = 0; i < NUM_HUFF_TBLS; i++) { 1050 entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL; 1051 #ifdef ENTROPY_OPT_SUPPORTED 1052 entropy->dc_count_ptrs[i] = entropy->ac_count_ptrs[i] = NULL; 1053 #endif 1054 } 1055 } 1056