Home | History | Annotate | Download | only in libjpeg-turbo
      1 /*
      2  * jcphuff.c
      3  *
      4  * This file was part of the Independent JPEG Group's software:
      5  * Copyright (C) 1995-1997, Thomas G. Lane.
      6  * libjpeg-turbo Modifications:
      7  * Copyright (C) 2011, 2015, 2018, D. R. Commander.
      8  * Copyright (C) 2016, 2018, Matthieu Darbois.
      9  * For conditions of distribution and use, see the accompanying README.ijg
     10  * file.
     11  *
     12  * This file contains Huffman entropy encoding routines for progressive JPEG.
     13  *
     14  * We do not support output suspension in this module, since the library
     15  * currently does not allow multiple-scan files to be written with output
     16  * suspension.
     17  */
     18 
     19 #define JPEG_INTERNALS
     20 #include "jinclude.h"
     21 #include "jpeglib.h"
     22 #include "jsimd.h"
     23 #include "jconfigint.h"
     24 #include <limits.h>
     25 
     26 #ifdef HAVE_INTRIN_H
     27 #include <intrin.h>
     28 #ifdef _MSC_VER
     29 #ifdef HAVE_BITSCANFORWARD64
     30 #pragma intrinsic(_BitScanForward64)
     31 #endif
     32 #ifdef HAVE_BITSCANFORWARD
     33 #pragma intrinsic(_BitScanForward)
     34 #endif
     35 #endif
     36 #endif
     37 
     38 #ifdef C_PROGRESSIVE_SUPPORTED
     39 
     40 /*
     41  * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be
     42  * used for bit counting rather than the lookup table.  This will reduce the
     43  * memory footprint by 64k, which is important for some mobile applications
     44  * that create many isolated instances of libjpeg-turbo (web browsers, for
     45  * instance.)  This may improve performance on some mobile platforms as well.
     46  * This feature is enabled by default only on ARM processors, because some x86
     47  * chips have a slow implementation of bsr, and the use of clz/bsr cannot be
     48  * shown to have a significant performance impact even on the x86 chips that
     49  * have a fast implementation of it.  When building for ARMv6, you can
     50  * explicitly disable the use of clz/bsr by adding -mthumb to the compiler
     51  * flags (this defines __thumb__).
     52  */
     53 
     54 /* NOTE: Both GCC and Clang define __GNUC__ */
     55 #if defined __GNUC__ && (defined __arm__ || defined __aarch64__)
     56 #if !defined __thumb__ || defined __thumb2__
     57 #define USE_CLZ_INTRINSIC
     58 #endif
     59 #endif
     60 
     61 #ifdef USE_CLZ_INTRINSIC
     62 #define JPEG_NBITS_NONZERO(x)  (32 - __builtin_clz(x))
     63 #define JPEG_NBITS(x)          (x ? JPEG_NBITS_NONZERO(x) : 0)
     64 #else
     65 #include "jpeg_nbits_table.h"
     66 #define JPEG_NBITS(x)          (jpeg_nbits_table[x])
     67 #define JPEG_NBITS_NONZERO(x)  JPEG_NBITS(x)
     68 #endif
     69 
     70 
     71 /* Expanded entropy encoder object for progressive Huffman encoding. */
     72 
     73 typedef struct {
     74   struct jpeg_entropy_encoder pub; /* public fields */
     75 
     76   /* Pointer to routine to prepare data for encode_mcu_AC_first() */
     77   void (*AC_first_prepare) (const JCOEF *block,
     78                             const int *jpeg_natural_order_start, int Sl,
     79                             int Al, JCOEF *values, size_t *zerobits);
     80   /* Pointer to routine to prepare data for encode_mcu_AC_refine() */
     81   int (*AC_refine_prepare) (const JCOEF *block,
     82                             const int *jpeg_natural_order_start, int Sl,
     83                             int Al, JCOEF *absvalues, size_t *bits);
     84 
     85   /* Mode flag: TRUE for optimization, FALSE for actual data output */
     86   boolean gather_statistics;
     87 
     88   /* Bit-level coding status.
     89    * next_output_byte/free_in_buffer are local copies of cinfo->dest fields.
     90    */
     91   JOCTET *next_output_byte;     /* => next byte to write in buffer */
     92   size_t free_in_buffer;        /* # of byte spaces remaining in buffer */
     93   size_t put_buffer;            /* current bit-accumulation buffer */
     94   int put_bits;                 /* # of bits now in it */
     95   j_compress_ptr cinfo;         /* link to cinfo (needed for dump_buffer) */
     96 
     97   /* Coding status for DC components */
     98   int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
     99 
    100   /* Coding status for AC components */
    101   int ac_tbl_no;                /* the table number of the single component */
    102   unsigned int EOBRUN;          /* run length of EOBs */
    103   unsigned int BE;              /* # of buffered correction bits before MCU */
    104   char *bit_buffer;             /* buffer for correction bits (1 per char) */
    105   /* packing correction bits tightly would save some space but cost time... */
    106 
    107   unsigned int restarts_to_go;  /* MCUs left in this restart interval */
    108   int next_restart_num;         /* next restart number to write (0-7) */
    109 
    110   /* Pointers to derived tables (these workspaces have image lifespan).
    111    * Since any one scan codes only DC or only AC, we only need one set
    112    * of tables, not one for DC and one for AC.
    113    */
    114   c_derived_tbl *derived_tbls[NUM_HUFF_TBLS];
    115 
    116   /* Statistics tables for optimization; again, one set is enough */
    117   long *count_ptrs[NUM_HUFF_TBLS];
    118 } phuff_entropy_encoder;
    119 
    120 typedef phuff_entropy_encoder *phuff_entropy_ptr;
    121 
    122 /* MAX_CORR_BITS is the number of bits the AC refinement correction-bit
    123  * buffer can hold.  Larger sizes may slightly improve compression, but
    124  * 1000 is already well into the realm of overkill.
    125  * The minimum safe size is 64 bits.
    126  */
    127 
    128 #define MAX_CORR_BITS  1000     /* Max # of correction bits I can buffer */
    129 
    130 /* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than JLONG.
    131  * We assume that int right shift is unsigned if JLONG right shift is,
    132  * which should be safe.
    133  */
    134 
    135 #ifdef RIGHT_SHIFT_IS_UNSIGNED
    136 #define ISHIFT_TEMPS    int ishift_temp;
    137 #define IRIGHT_SHIFT(x, shft) \
    138   ((ishift_temp = (x)) < 0 ? \
    139    (ishift_temp >> (shft)) | ((~0) << (16 - (shft))) : \
    140    (ishift_temp >> (shft)))
    141 #else
    142 #define ISHIFT_TEMPS
    143 #define IRIGHT_SHIFT(x, shft)   ((x) >> (shft))
    144 #endif
    145 
    146 #define PAD(v, p)  ((v + (p) - 1) & (~((p) - 1)))
    147 
    148 /* Forward declarations */
    149 METHODDEF(boolean) encode_mcu_DC_first(j_compress_ptr cinfo,
    150                                        JBLOCKROW *MCU_data);
    151 METHODDEF(void) encode_mcu_AC_first_prepare
    152   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
    153    JCOEF *values, size_t *zerobits);
    154 METHODDEF(boolean) encode_mcu_AC_first(j_compress_ptr cinfo,
    155                                        JBLOCKROW *MCU_data);
    156 METHODDEF(boolean) encode_mcu_DC_refine(j_compress_ptr cinfo,
    157                                         JBLOCKROW *MCU_data);
    158 METHODDEF(int) encode_mcu_AC_refine_prepare
    159   (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
    160    JCOEF *absvalues, size_t *bits);
    161 METHODDEF(boolean) encode_mcu_AC_refine(j_compress_ptr cinfo,
    162                                         JBLOCKROW *MCU_data);
    163 METHODDEF(void) finish_pass_phuff(j_compress_ptr cinfo);
    164 METHODDEF(void) finish_pass_gather_phuff(j_compress_ptr cinfo);
    165 
    166 
    167 /* Count bit loop zeroes */
    168 INLINE
    169 METHODDEF(int)
    170 count_zeroes(size_t *x)
    171 {
    172   int result;
    173 #if defined(HAVE_BUILTIN_CTZL)
    174   result = __builtin_ctzl(*x);
    175   *x >>= result;
    176 #elif defined(HAVE_BITSCANFORWARD64)
    177   _BitScanForward64(&result, *x);
    178   *x >>= result;
    179 #elif defined(HAVE_BITSCANFORWARD)
    180   _BitScanForward(&result, *x);
    181   *x >>= result;
    182 #else
    183   result = 0;
    184   while ((*x & 1) == 0) {
    185     ++result;
    186     *x >>= 1;
    187   }
    188 #endif
    189   return result;
    190 }
    191 
    192 
    193 /*
    194  * Initialize for a Huffman-compressed scan using progressive JPEG.
    195  */
    196 
    197 METHODDEF(void)
    198 start_pass_phuff(j_compress_ptr cinfo, boolean gather_statistics)
    199 {
    200   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
    201   boolean is_DC_band;
    202   int ci, tbl;
    203   jpeg_component_info *compptr;
    204 
    205   entropy->cinfo = cinfo;
    206   entropy->gather_statistics = gather_statistics;
    207 
    208   is_DC_band = (cinfo->Ss == 0);
    209 
    210   /* We assume jcmaster.c already validated the scan parameters. */
    211 
    212   /* Select execution routines */
    213   if (cinfo->Ah == 0) {
    214     if (is_DC_band)
    215       entropy->pub.encode_mcu = encode_mcu_DC_first;
    216     else
    217       entropy->pub.encode_mcu = encode_mcu_AC_first;
    218     if (jsimd_can_encode_mcu_AC_first_prepare())
    219       entropy->AC_first_prepare = jsimd_encode_mcu_AC_first_prepare;
    220     else
    221       entropy->AC_first_prepare = encode_mcu_AC_first_prepare;
    222   } else {
    223     if (is_DC_band)
    224       entropy->pub.encode_mcu = encode_mcu_DC_refine;
    225     else {
    226       entropy->pub.encode_mcu = encode_mcu_AC_refine;
    227       if (jsimd_can_encode_mcu_AC_refine_prepare())
    228         entropy->AC_refine_prepare = jsimd_encode_mcu_AC_refine_prepare;
    229       else
    230         entropy->AC_refine_prepare = encode_mcu_AC_refine_prepare;
    231       /* AC refinement needs a correction bit buffer */
    232       if (entropy->bit_buffer == NULL)
    233         entropy->bit_buffer = (char *)
    234           (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
    235                                       MAX_CORR_BITS * sizeof(char));
    236     }
    237   }
    238   if (gather_statistics)
    239     entropy->pub.finish_pass = finish_pass_gather_phuff;
    240   else
    241     entropy->pub.finish_pass = finish_pass_phuff;
    242 
    243   /* Only DC coefficients may be interleaved, so cinfo->comps_in_scan = 1
    244    * for AC coefficients.
    245    */
    246   for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
    247     compptr = cinfo->cur_comp_info[ci];
    248     /* Initialize DC predictions to 0 */
    249     entropy->last_dc_val[ci] = 0;
    250     /* Get table index */
    251     if (is_DC_band) {
    252       if (cinfo->Ah != 0)       /* DC refinement needs no table */
    253         continue;
    254       tbl = compptr->dc_tbl_no;
    255     } else {
    256       entropy->ac_tbl_no = tbl = compptr->ac_tbl_no;
    257     }
    258     if (gather_statistics) {
    259       /* Check for invalid table index */
    260       /* (make_c_derived_tbl does this in the other path) */
    261       if (tbl < 0 || tbl >= NUM_HUFF_TBLS)
    262         ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl);
    263       /* Allocate and zero the statistics tables */
    264       /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
    265       if (entropy->count_ptrs[tbl] == NULL)
    266         entropy->count_ptrs[tbl] = (long *)
    267           (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
    268                                       257 * sizeof(long));
    269       MEMZERO(entropy->count_ptrs[tbl], 257 * sizeof(long));
    270     } else {
    271       /* Compute derived values for Huffman table */
    272       /* We may do this more than once for a table, but it's not expensive */
    273       jpeg_make_c_derived_tbl(cinfo, is_DC_band, tbl,
    274                               &entropy->derived_tbls[tbl]);
    275     }
    276   }
    277 
    278   /* Initialize AC stuff */
    279   entropy->EOBRUN = 0;
    280   entropy->BE = 0;
    281 
    282   /* Initialize bit buffer to empty */
    283   entropy->put_buffer = 0;
    284   entropy->put_bits = 0;
    285 
    286   /* Initialize restart stuff */
    287   entropy->restarts_to_go = cinfo->restart_interval;
    288   entropy->next_restart_num = 0;
    289 }
    290 
    291 
    292 /* Outputting bytes to the file.
    293  * NB: these must be called only when actually outputting,
    294  * that is, entropy->gather_statistics == FALSE.
    295  */
    296 
    297 /* Emit a byte */
    298 #define emit_byte(entropy, val) { \
    299   *(entropy)->next_output_byte++ = (JOCTET)(val); \
    300   if (--(entropy)->free_in_buffer == 0) \
    301     dump_buffer(entropy); \
    302 }
    303 
    304 
    305 LOCAL(void)
    306 dump_buffer(phuff_entropy_ptr entropy)
    307 /* Empty the output buffer; we do not support suspension in this module. */
    308 {
    309   struct jpeg_destination_mgr *dest = entropy->cinfo->dest;
    310 
    311   if (!(*dest->empty_output_buffer) (entropy->cinfo))
    312     ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND);
    313   /* After a successful buffer dump, must reset buffer pointers */
    314   entropy->next_output_byte = dest->next_output_byte;
    315   entropy->free_in_buffer = dest->free_in_buffer;
    316 }
    317 
    318 
    319 /* Outputting bits to the file */
    320 
    321 /* Only the right 24 bits of put_buffer are used; the valid bits are
    322  * left-justified in this part.  At most 16 bits can be passed to emit_bits
    323  * in one call, and we never retain more than 7 bits in put_buffer
    324  * between calls, so 24 bits are sufficient.
    325  */
    326 
    327 LOCAL(void)
    328 emit_bits(phuff_entropy_ptr entropy, unsigned int code, int size)
    329 /* Emit some bits, unless we are in gather mode */
    330 {
    331   /* This routine is heavily used, so it's worth coding tightly. */
    332   register size_t put_buffer = (size_t)code;
    333   register int put_bits = entropy->put_bits;
    334 
    335   /* if size is 0, caller used an invalid Huffman table entry */
    336   if (size == 0)
    337     ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
    338 
    339   if (entropy->gather_statistics)
    340     return;                     /* do nothing if we're only getting stats */
    341 
    342   put_buffer &= (((size_t)1) << size) - 1; /* mask off any extra bits in code */
    343 
    344   put_bits += size;             /* new number of bits in buffer */
    345 
    346   put_buffer <<= 24 - put_bits; /* align incoming bits */
    347 
    348   put_buffer |= entropy->put_buffer; /* and merge with old buffer contents */
    349 
    350   while (put_bits >= 8) {
    351     int c = (int)((put_buffer >> 16) & 0xFF);
    352 
    353     emit_byte(entropy, c);
    354     if (c == 0xFF) {            /* need to stuff a zero byte? */
    355       emit_byte(entropy, 0);
    356     }
    357     put_buffer <<= 8;
    358     put_bits -= 8;
    359   }
    360 
    361   entropy->put_buffer = put_buffer; /* update variables */
    362   entropy->put_bits = put_bits;
    363 }
    364 
    365 
    366 LOCAL(void)
    367 flush_bits(phuff_entropy_ptr entropy)
    368 {
    369   emit_bits(entropy, 0x7F, 7); /* fill any partial byte with ones */
    370   entropy->put_buffer = 0;     /* and reset bit-buffer to empty */
    371   entropy->put_bits = 0;
    372 }
    373 
    374 
    375 /*
    376  * Emit (or just count) a Huffman symbol.
    377  */
    378 
    379 LOCAL(void)
    380 emit_symbol(phuff_entropy_ptr entropy, int tbl_no, int symbol)
    381 {
    382   if (entropy->gather_statistics)
    383     entropy->count_ptrs[tbl_no][symbol]++;
    384   else {
    385     c_derived_tbl *tbl = entropy->derived_tbls[tbl_no];
    386     emit_bits(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]);
    387   }
    388 }
    389 
    390 
    391 /*
    392  * Emit bits from a correction bit buffer.
    393  */
    394 
    395 LOCAL(void)
    396 emit_buffered_bits(phuff_entropy_ptr entropy, char *bufstart,
    397                    unsigned int nbits)
    398 {
    399   if (entropy->gather_statistics)
    400     return;                     /* no real work */
    401 
    402   while (nbits > 0) {
    403     emit_bits(entropy, (unsigned int)(*bufstart), 1);
    404     bufstart++;
    405     nbits--;
    406   }
    407 }
    408 
    409 
    410 /*
    411  * Emit any pending EOBRUN symbol.
    412  */
    413 
    414 LOCAL(void)
    415 emit_eobrun(phuff_entropy_ptr entropy)
    416 {
    417   register int temp, nbits;
    418 
    419   if (entropy->EOBRUN > 0) {    /* if there is any pending EOBRUN */
    420     temp = entropy->EOBRUN;
    421     nbits = JPEG_NBITS_NONZERO(temp) - 1;
    422     /* safety check: shouldn't happen given limited correction-bit buffer */
    423     if (nbits > 14)
    424       ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
    425 
    426     emit_symbol(entropy, entropy->ac_tbl_no, nbits << 4);
    427     if (nbits)
    428       emit_bits(entropy, entropy->EOBRUN, nbits);
    429 
    430     entropy->EOBRUN = 0;
    431 
    432     /* Emit any buffered correction bits */
    433     emit_buffered_bits(entropy, entropy->bit_buffer, entropy->BE);
    434     entropy->BE = 0;
    435   }
    436 }
    437 
    438 
    439 /*
    440  * Emit a restart marker & resynchronize predictions.
    441  */
    442 
    443 LOCAL(void)
    444 emit_restart(phuff_entropy_ptr entropy, int restart_num)
    445 {
    446   int ci;
    447 
    448   emit_eobrun(entropy);
    449 
    450   if (!entropy->gather_statistics) {
    451     flush_bits(entropy);
    452     emit_byte(entropy, 0xFF);
    453     emit_byte(entropy, JPEG_RST0 + restart_num);
    454   }
    455 
    456   if (entropy->cinfo->Ss == 0) {
    457     /* Re-initialize DC predictions to 0 */
    458     for (ci = 0; ci < entropy->cinfo->comps_in_scan; ci++)
    459       entropy->last_dc_val[ci] = 0;
    460   } else {
    461     /* Re-initialize all AC-related fields to 0 */
    462     entropy->EOBRUN = 0;
    463     entropy->BE = 0;
    464   }
    465 }
    466 
    467 
    468 /*
    469  * MCU encoding for DC initial scan (either spectral selection,
    470  * or first pass of successive approximation).
    471  */
    472 
    473 METHODDEF(boolean)
    474 encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
    475 {
    476   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
    477   register int temp, temp2, temp3;
    478   register int nbits;
    479   int blkn, ci;
    480   int Al = cinfo->Al;
    481   JBLOCKROW block;
    482   jpeg_component_info *compptr;
    483   ISHIFT_TEMPS
    484 
    485   entropy->next_output_byte = cinfo->dest->next_output_byte;
    486   entropy->free_in_buffer = cinfo->dest->free_in_buffer;
    487 
    488   /* Emit restart marker if needed */
    489   if (cinfo->restart_interval)
    490     if (entropy->restarts_to_go == 0)
    491       emit_restart(entropy, entropy->next_restart_num);
    492 
    493   /* Encode the MCU data blocks */
    494   for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
    495     block = MCU_data[blkn];
    496     ci = cinfo->MCU_membership[blkn];
    497     compptr = cinfo->cur_comp_info[ci];
    498 
    499     /* Compute the DC value after the required point transform by Al.
    500      * This is simply an arithmetic right shift.
    501      */
    502     temp2 = IRIGHT_SHIFT((int)((*block)[0]), Al);
    503 
    504     /* DC differences are figured on the point-transformed values. */
    505     temp = temp2 - entropy->last_dc_val[ci];
    506     entropy->last_dc_val[ci] = temp2;
    507 
    508     /* Encode the DC coefficient difference per section G.1.2.1 */
    509 
    510     /* This is a well-known technique for obtaining the absolute value without
    511      * a branch.  It is derived from an assembly language technique presented
    512      * in "How to Optimize for the Pentium Processors", Copyright (c) 1996,
    513      * 1997 by Agner Fog.
    514      */
    515     temp3 = temp >> (CHAR_BIT * sizeof(int) - 1);
    516     temp ^= temp3;
    517     temp -= temp3;              /* temp is abs value of input */
    518     /* For a negative input, want temp2 = bitwise complement of abs(input) */
    519     temp2 = temp ^ temp3;
    520 
    521     /* Find the number of bits needed for the magnitude of the coefficient */
    522     nbits = JPEG_NBITS(temp);
    523     /* Check for out-of-range coefficient values.
    524      * Since we're encoding a difference, the range limit is twice as much.
    525      */
    526     if (nbits > MAX_COEF_BITS + 1)
    527       ERREXIT(cinfo, JERR_BAD_DCT_COEF);
    528 
    529     /* Count/emit the Huffman-coded symbol for the number of bits */
    530     emit_symbol(entropy, compptr->dc_tbl_no, nbits);
    531 
    532     /* Emit that number of bits of the value, if positive, */
    533     /* or the complement of its magnitude, if negative. */
    534     if (nbits)                  /* emit_bits rejects calls with size 0 */
    535       emit_bits(entropy, (unsigned int)temp2, nbits);
    536   }
    537 
    538   cinfo->dest->next_output_byte = entropy->next_output_byte;
    539   cinfo->dest->free_in_buffer = entropy->free_in_buffer;
    540 
    541   /* Update restart-interval state too */
    542   if (cinfo->restart_interval) {
    543     if (entropy->restarts_to_go == 0) {
    544       entropy->restarts_to_go = cinfo->restart_interval;
    545       entropy->next_restart_num++;
    546       entropy->next_restart_num &= 7;
    547     }
    548     entropy->restarts_to_go--;
    549   }
    550 
    551   return TRUE;
    552 }
    553 
    554 
    555 /*
    556  * Data preparation for encode_mcu_AC_first().
    557  */
    558 
    559 #define COMPUTE_ABSVALUES_AC_FIRST(Sl) { \
    560   for (k = 0; k < Sl; k++) { \
    561     temp = block[jpeg_natural_order_start[k]]; \
    562     if (temp == 0) \
    563       continue; \
    564     /* We must apply the point transform by Al.  For AC coefficients this \
    565      * is an integer division with rounding towards 0.  To do this portably \
    566      * in C, we shift after obtaining the absolute value; so the code is \
    567      * interwoven with finding the abs value (temp) and output bits (temp2). \
    568      */ \
    569     temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \
    570     temp ^= temp2; \
    571     temp -= temp2;              /* temp is abs value of input */ \
    572     temp >>= Al;                /* apply the point transform */ \
    573     /* Watch out for case that nonzero coef is zero after point transform */ \
    574     if (temp == 0) \
    575       continue; \
    576     /* For a negative coef, want temp2 = bitwise complement of abs(coef) */ \
    577     temp2 ^= temp; \
    578     values[k] = temp; \
    579     values[k + DCTSIZE2] = temp2; \
    580     zerobits |= ((size_t)1U) << k; \
    581   } \
    582 }
    583 
    584 METHODDEF(void)
    585 encode_mcu_AC_first_prepare(const JCOEF *block,
    586                             const int *jpeg_natural_order_start, int Sl,
    587                             int Al, JCOEF *values, size_t *bits)
    588 {
    589   register int k, temp, temp2;
    590   size_t zerobits = 0U;
    591   int Sl0 = Sl;
    592 
    593 #if SIZEOF_SIZE_T == 4
    594   if (Sl0 > 32)
    595     Sl0 = 32;
    596 #endif
    597 
    598   COMPUTE_ABSVALUES_AC_FIRST(Sl0);
    599 
    600   bits[0] = zerobits;
    601 #if SIZEOF_SIZE_T == 4
    602   zerobits = 0U;
    603 
    604   if (Sl > 32) {
    605     Sl -= 32;
    606     jpeg_natural_order_start += 32;
    607     values += 32;
    608 
    609     COMPUTE_ABSVALUES_AC_FIRST(Sl);
    610   }
    611   bits[1] = zerobits;
    612 #endif
    613 }
    614 
    615 /*
    616  * MCU encoding for AC initial scan (either spectral selection,
    617  * or first pass of successive approximation).
    618  */
    619 
    620 #define ENCODE_COEFS_AC_FIRST(label) { \
    621   while (zerobits) { \
    622     r = count_zeroes(&zerobits); \
    623     cvalue += r; \
    624 label \
    625     temp  = cvalue[0]; \
    626     temp2 = cvalue[DCTSIZE2]; \
    627     \
    628     /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \
    629     while (r > 15) { \
    630       emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \
    631       r -= 16; \
    632     } \
    633     \
    634     /* Find the number of bits needed for the magnitude of the coefficient */ \
    635     nbits = JPEG_NBITS_NONZERO(temp);  /* there must be at least one 1 bit */ \
    636     /* Check for out-of-range coefficient values */ \
    637     if (nbits > MAX_COEF_BITS) \
    638       ERREXIT(cinfo, JERR_BAD_DCT_COEF); \
    639     \
    640     /* Count/emit Huffman symbol for run length / number of bits */ \
    641     emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits); \
    642     \
    643     /* Emit that number of bits of the value, if positive, */ \
    644     /* or the complement of its magnitude, if negative. */ \
    645     emit_bits(entropy, (unsigned int)temp2, nbits); \
    646     \
    647     cvalue++; \
    648     zerobits >>= 1; \
    649   } \
    650 }
    651 
    652 METHODDEF(boolean)
    653 encode_mcu_AC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
    654 {
    655   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
    656   register int temp, temp2;
    657   register int nbits, r;
    658   int Sl = cinfo->Se - cinfo->Ss + 1;
    659   int Al = cinfo->Al;
    660   JCOEF values_unaligned[2 * DCTSIZE2 + 15];
    661   JCOEF *values;
    662   const JCOEF *cvalue;
    663   size_t zerobits;
    664   size_t bits[8 / SIZEOF_SIZE_T];
    665 
    666   entropy->next_output_byte = cinfo->dest->next_output_byte;
    667   entropy->free_in_buffer = cinfo->dest->free_in_buffer;
    668 
    669   /* Emit restart marker if needed */
    670   if (cinfo->restart_interval)
    671     if (entropy->restarts_to_go == 0)
    672       emit_restart(entropy, entropy->next_restart_num);
    673 
    674 #ifdef WITH_SIMD
    675   cvalue = values = (JCOEF *)PAD((size_t)values_unaligned, 16);
    676 #else
    677   /* Not using SIMD, so alignment is not needed */
    678   cvalue = values = values_unaligned;
    679 #endif
    680 
    681   /* Prepare data */
    682   entropy->AC_first_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss,
    683                             Sl, Al, values, bits);
    684 
    685   zerobits = bits[0];
    686 #if SIZEOF_SIZE_T == 4
    687   zerobits |= bits[1];
    688 #endif
    689 
    690   /* Emit any pending EOBRUN */
    691   if (zerobits && (entropy->EOBRUN > 0))
    692     emit_eobrun(entropy);
    693 
    694 #if SIZEOF_SIZE_T == 4
    695   zerobits = bits[0];
    696 #endif
    697 
    698   /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */
    699 
    700   ENCODE_COEFS_AC_FIRST((void)0;);
    701 
    702 #if SIZEOF_SIZE_T == 4
    703   zerobits = bits[1];
    704   if (zerobits) {
    705     int diff = ((values + DCTSIZE2 / 2) - cvalue);
    706     r = count_zeroes(&zerobits);
    707     r += diff;
    708     cvalue += r;
    709     goto first_iter_ac_first;
    710   }
    711 
    712   ENCODE_COEFS_AC_FIRST(first_iter_ac_first:);
    713 #endif
    714 
    715   if (cvalue < (values + Sl)) { /* If there are trailing zeroes, */
    716     entropy->EOBRUN++;          /* count an EOB */
    717     if (entropy->EOBRUN == 0x7FFF)
    718       emit_eobrun(entropy);     /* force it out to avoid overflow */
    719   }
    720 
    721   cinfo->dest->next_output_byte = entropy->next_output_byte;
    722   cinfo->dest->free_in_buffer = entropy->free_in_buffer;
    723 
    724   /* Update restart-interval state too */
    725   if (cinfo->restart_interval) {
    726     if (entropy->restarts_to_go == 0) {
    727       entropy->restarts_to_go = cinfo->restart_interval;
    728       entropy->next_restart_num++;
    729       entropy->next_restart_num &= 7;
    730     }
    731     entropy->restarts_to_go--;
    732   }
    733 
    734   return TRUE;
    735 }
    736 
    737 
    738 /*
    739  * MCU encoding for DC successive approximation refinement scan.
    740  * Note: we assume such scans can be multi-component, although the spec
    741  * is not very clear on the point.
    742  */
    743 
    744 METHODDEF(boolean)
    745 encode_mcu_DC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
    746 {
    747   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
    748   register int temp;
    749   int blkn;
    750   int Al = cinfo->Al;
    751   JBLOCKROW block;
    752 
    753   entropy->next_output_byte = cinfo->dest->next_output_byte;
    754   entropy->free_in_buffer = cinfo->dest->free_in_buffer;
    755 
    756   /* Emit restart marker if needed */
    757   if (cinfo->restart_interval)
    758     if (entropy->restarts_to_go == 0)
    759       emit_restart(entropy, entropy->next_restart_num);
    760 
    761   /* Encode the MCU data blocks */
    762   for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
    763     block = MCU_data[blkn];
    764 
    765     /* We simply emit the Al'th bit of the DC coefficient value. */
    766     temp = (*block)[0];
    767     emit_bits(entropy, (unsigned int)(temp >> Al), 1);
    768   }
    769 
    770   cinfo->dest->next_output_byte = entropy->next_output_byte;
    771   cinfo->dest->free_in_buffer = entropy->free_in_buffer;
    772 
    773   /* Update restart-interval state too */
    774   if (cinfo->restart_interval) {
    775     if (entropy->restarts_to_go == 0) {
    776       entropy->restarts_to_go = cinfo->restart_interval;
    777       entropy->next_restart_num++;
    778       entropy->next_restart_num &= 7;
    779     }
    780     entropy->restarts_to_go--;
    781   }
    782 
    783   return TRUE;
    784 }
    785 
    786 
    787 /*
    788  * Data preparation for encode_mcu_AC_refine().
    789  */
    790 
    791 #define COMPUTE_ABSVALUES_AC_REFINE(Sl, koffset) { \
    792   /* It is convenient to make a pre-pass to determine the transformed \
    793    * coefficients' absolute values and the EOB position. \
    794    */ \
    795   for (k = 0; k < Sl; k++) { \
    796     temp = block[jpeg_natural_order_start[k]]; \
    797     /* We must apply the point transform by Al.  For AC coefficients this \
    798      * is an integer division with rounding towards 0.  To do this portably \
    799      * in C, we shift after obtaining the absolute value. \
    800      */ \
    801     temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \
    802     temp ^= temp2; \
    803     temp -= temp2;              /* temp is abs value of input */ \
    804     temp >>= Al;                /* apply the point transform */ \
    805     if (temp != 0) { \
    806       zerobits |= ((size_t)1U) << k; \
    807       signbits |= ((size_t)(temp2 + 1)) << k; \
    808     } \
    809     absvalues[k] = (JCOEF)temp; /* save abs value for main pass */ \
    810     if (temp == 1) \
    811       EOB = k + koffset;        /* EOB = index of last newly-nonzero coef */ \
    812   } \
    813 }
    814 
    815 METHODDEF(int)
    816 encode_mcu_AC_refine_prepare(const JCOEF *block,
    817                              const int *jpeg_natural_order_start, int Sl,
    818                              int Al, JCOEF *absvalues, size_t *bits)
    819 {
    820   register int k, temp, temp2;
    821   int EOB = 0;
    822   size_t zerobits = 0U, signbits = 0U;
    823   int Sl0 = Sl;
    824 
    825 #if SIZEOF_SIZE_T == 4
    826   if (Sl0 > 32)
    827     Sl0 = 32;
    828 #endif
    829 
    830   COMPUTE_ABSVALUES_AC_REFINE(Sl0, 0);
    831 
    832   bits[0] = zerobits;
    833 #if SIZEOF_SIZE_T == 8
    834   bits[1] = signbits;
    835 #else
    836   bits[2] = signbits;
    837 
    838   zerobits = 0U;
    839   signbits = 0U;
    840 
    841   if (Sl > 32) {
    842     Sl -= 32;
    843     jpeg_natural_order_start += 32;
    844     absvalues += 32;
    845 
    846     COMPUTE_ABSVALUES_AC_REFINE(Sl, 32);
    847   }
    848 
    849   bits[1] = zerobits;
    850   bits[3] = signbits;
    851 #endif
    852 
    853   return EOB;
    854 }
    855 
    856 
    857 /*
    858  * MCU encoding for AC successive approximation refinement scan.
    859  */
    860 
    861 #define ENCODE_COEFS_AC_REFINE(label) { \
    862   while (zerobits) { \
    863     int idx = count_zeroes(&zerobits); \
    864     r += idx; \
    865     cabsvalue += idx; \
    866     signbits >>= idx; \
    867 label \
    868     /* Emit any required ZRLs, but not if they can be folded into EOB */ \
    869     while (r > 15 && (cabsvalue <= EOBPTR)) { \
    870       /* emit any pending EOBRUN and the BE correction bits */ \
    871       emit_eobrun(entropy); \
    872       /* Emit ZRL */ \
    873       emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \
    874       r -= 16; \
    875       /* Emit buffered correction bits that must be associated with ZRL */ \
    876       emit_buffered_bits(entropy, BR_buffer, BR); \
    877       BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \
    878       BR = 0; \
    879     } \
    880     \
    881     temp = *cabsvalue++; \
    882     \
    883     /* If the coef was previously nonzero, it only needs a correction bit. \
    884      * NOTE: a straight translation of the spec's figure G.7 would suggest \
    885      * that we also need to test r > 15.  But if r > 15, we can only get here \
    886      * if k > EOB, which implies that this coefficient is not 1. \
    887      */ \
    888     if (temp > 1) { \
    889       /* The correction bit is the next bit of the absolute value. */ \
    890       BR_buffer[BR++] = (char)(temp & 1); \
    891       signbits >>= 1; \
    892       zerobits >>= 1; \
    893       continue; \
    894     } \
    895     \
    896     /* Emit any pending EOBRUN and the BE correction bits */ \
    897     emit_eobrun(entropy); \
    898     \
    899     /* Count/emit Huffman symbol for run length / number of bits */ \
    900     emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + 1); \
    901     \
    902     /* Emit output bit for newly-nonzero coef */ \
    903     temp = signbits & 1; /* ((*block)[jpeg_natural_order_start[k]] < 0) ? 0 : 1 */ \
    904     emit_bits(entropy, (unsigned int)temp, 1); \
    905     \
    906     /* Emit buffered correction bits that must be associated with this code */ \
    907     emit_buffered_bits(entropy, BR_buffer, BR); \
    908     BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \
    909     BR = 0; \
    910     r = 0;                      /* reset zero run length */ \
    911     signbits >>= 1; \
    912     zerobits >>= 1; \
    913   } \
    914 }
    915 
    916 METHODDEF(boolean)
    917 encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
    918 {
    919   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
    920   register int temp, r;
    921   char *BR_buffer;
    922   unsigned int BR;
    923   int Sl = cinfo->Se - cinfo->Ss + 1;
    924   int Al = cinfo->Al;
    925   JCOEF absvalues_unaligned[DCTSIZE2 + 15];
    926   JCOEF *absvalues;
    927   const JCOEF *cabsvalue, *EOBPTR;
    928   size_t zerobits, signbits;
    929   size_t bits[16 / SIZEOF_SIZE_T];
    930 
    931   entropy->next_output_byte = cinfo->dest->next_output_byte;
    932   entropy->free_in_buffer = cinfo->dest->free_in_buffer;
    933 
    934   /* Emit restart marker if needed */
    935   if (cinfo->restart_interval)
    936     if (entropy->restarts_to_go == 0)
    937       emit_restart(entropy, entropy->next_restart_num);
    938 
    939 #ifdef WITH_SIMD
    940   cabsvalue = absvalues = (JCOEF *)PAD((size_t)absvalues_unaligned, 16);
    941 #else
    942   /* Not using SIMD, so alignment is not needed */
    943   cabsvalue = absvalues = absvalues_unaligned;
    944 #endif
    945 
    946   /* Prepare data */
    947   EOBPTR = absvalues +
    948     entropy->AC_refine_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss,
    949                                Sl, Al, absvalues, bits);
    950 
    951   /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */
    952 
    953   r = 0;                        /* r = run length of zeros */
    954   BR = 0;                       /* BR = count of buffered bits added now */
    955   BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */
    956 
    957   zerobits = bits[0];
    958 #if SIZEOF_SIZE_T == 8
    959   signbits = bits[1];
    960 #else
    961   signbits = bits[2];
    962 #endif
    963   ENCODE_COEFS_AC_REFINE((void)0;);
    964 
    965 #if SIZEOF_SIZE_T == 4
    966   zerobits = bits[1];
    967   signbits = bits[3];
    968 
    969   if (zerobits) {
    970     int diff = ((absvalues + DCTSIZE2 / 2) - cabsvalue);
    971     int idx = count_zeroes(&zerobits);
    972     signbits >>= idx;
    973     idx += diff;
    974     r += idx;
    975     cabsvalue += idx;
    976     goto first_iter_ac_refine;
    977   }
    978 
    979   ENCODE_COEFS_AC_REFINE(first_iter_ac_refine:);
    980 #endif
    981 
    982   r |= (int)((absvalues + Sl) - cabsvalue);
    983 
    984   if (r > 0 || BR > 0) {        /* If there are trailing zeroes, */
    985     entropy->EOBRUN++;          /* count an EOB */
    986     entropy->BE += BR;          /* concat my correction bits to older ones */
    987     /* We force out the EOB if we risk either:
    988      * 1. overflow of the EOB counter;
    989      * 2. overflow of the correction bit buffer during the next MCU.
    990      */
    991     if (entropy->EOBRUN == 0x7FFF ||
    992         entropy->BE > (MAX_CORR_BITS - DCTSIZE2 + 1))
    993       emit_eobrun(entropy);
    994   }
    995 
    996   cinfo->dest->next_output_byte = entropy->next_output_byte;
    997   cinfo->dest->free_in_buffer = entropy->free_in_buffer;
    998 
    999   /* Update restart-interval state too */
   1000   if (cinfo->restart_interval) {
   1001     if (entropy->restarts_to_go == 0) {
   1002       entropy->restarts_to_go = cinfo->restart_interval;
   1003       entropy->next_restart_num++;
   1004       entropy->next_restart_num &= 7;
   1005     }
   1006     entropy->restarts_to_go--;
   1007   }
   1008 
   1009   return TRUE;
   1010 }
   1011 
   1012 
   1013 /*
   1014  * Finish up at the end of a Huffman-compressed progressive scan.
   1015  */
   1016 
   1017 METHODDEF(void)
   1018 finish_pass_phuff(j_compress_ptr cinfo)
   1019 {
   1020   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
   1021 
   1022   entropy->next_output_byte = cinfo->dest->next_output_byte;
   1023   entropy->free_in_buffer = cinfo->dest->free_in_buffer;
   1024 
   1025   /* Flush out any buffered data */
   1026   emit_eobrun(entropy);
   1027   flush_bits(entropy);
   1028 
   1029   cinfo->dest->next_output_byte = entropy->next_output_byte;
   1030   cinfo->dest->free_in_buffer = entropy->free_in_buffer;
   1031 }
   1032 
   1033 
   1034 /*
   1035  * Finish up a statistics-gathering pass and create the new Huffman tables.
   1036  */
   1037 
   1038 METHODDEF(void)
   1039 finish_pass_gather_phuff(j_compress_ptr cinfo)
   1040 {
   1041   phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
   1042   boolean is_DC_band;
   1043   int ci, tbl;
   1044   jpeg_component_info *compptr;
   1045   JHUFF_TBL **htblptr;
   1046   boolean did[NUM_HUFF_TBLS];
   1047 
   1048   /* Flush out buffered data (all we care about is counting the EOB symbol) */
   1049   emit_eobrun(entropy);
   1050 
   1051   is_DC_band = (cinfo->Ss == 0);
   1052 
   1053   /* It's important not to apply jpeg_gen_optimal_table more than once
   1054    * per table, because it clobbers the input frequency counts!
   1055    */
   1056   MEMZERO(did, sizeof(did));
   1057 
   1058   for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
   1059     compptr = cinfo->cur_comp_info[ci];
   1060     if (is_DC_band) {
   1061       if (cinfo->Ah != 0)       /* DC refinement needs no table */
   1062         continue;
   1063       tbl = compptr->dc_tbl_no;
   1064     } else {
   1065       tbl = compptr->ac_tbl_no;
   1066     }
   1067     if (!did[tbl]) {
   1068       if (is_DC_band)
   1069         htblptr = &cinfo->dc_huff_tbl_ptrs[tbl];
   1070       else
   1071         htblptr = &cinfo->ac_huff_tbl_ptrs[tbl];
   1072       if (*htblptr == NULL)
   1073         *htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo);
   1074       jpeg_gen_optimal_table(cinfo, *htblptr, entropy->count_ptrs[tbl]);
   1075       did[tbl] = TRUE;
   1076     }
   1077   }
   1078 }
   1079 
   1080 
   1081 /*
   1082  * Module initialization routine for progressive Huffman entropy encoding.
   1083  */
   1084 
   1085 GLOBAL(void)
   1086 jinit_phuff_encoder(j_compress_ptr cinfo)
   1087 {
   1088   phuff_entropy_ptr entropy;
   1089   int i;
   1090 
   1091   entropy = (phuff_entropy_ptr)
   1092     (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
   1093                                 sizeof(phuff_entropy_encoder));
   1094   cinfo->entropy = (struct jpeg_entropy_encoder *)entropy;
   1095   entropy->pub.start_pass = start_pass_phuff;
   1096 
   1097   /* Mark tables unallocated */
   1098   for (i = 0; i < NUM_HUFF_TBLS; i++) {
   1099     entropy->derived_tbls[i] = NULL;
   1100     entropy->count_ptrs[i] = NULL;
   1101   }
   1102   entropy->bit_buffer = NULL;   /* needed only in AC refinement scan */
   1103 }
   1104 
   1105 #endif /* C_PROGRESSIVE_SUPPORTED */
   1106