Home | History | Annotate | Download | only in libFLAC
      1 /* libFLAC - Free Lossless Audio Codec library
      2  * Copyright (C) 2000,2001,2002,2003,2004,2005,2006,2007  Josh Coalson
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  *
      8  * - Redistributions of source code must retain the above copyright
      9  * notice, this list of conditions and the following disclaimer.
     10  *
     11  * - Redistributions in binary form must reproduce the above copyright
     12  * notice, this list of conditions and the following disclaimer in the
     13  * documentation and/or other materials provided with the distribution.
     14  *
     15  * - Neither the name of the Xiph.org Foundation nor the names of its
     16  * contributors may be used to endorse or promote products derived from
     17  * this software without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
     23  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     24  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     25  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     26  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     27  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     28  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     29  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #if HAVE_CONFIG_H
     33 #  include <config.h>
     34 #endif
     35 
     36 #include <stdlib.h> /* for malloc() */
     37 #include <string.h> /* for memcpy(), memset() */
     38 #ifdef _MSC_VER
     39 #include <winsock.h> /* for ntohl() */
     40 #elif defined FLAC__SYS_DARWIN
     41 #include <machine/endian.h> /* for ntohl() */
     42 #elif defined __MINGW32__
     43 #include <winsock.h> /* for ntohl() */
     44 #else
     45 #include <netinet/in.h> /* for ntohl() */
     46 #endif
     47 #include "private/bitmath.h"
     48 #include "private/bitreader.h"
     49 #include "private/crc.h"
     50 #include "FLAC/assert.h"
     51 
     52 /* Things should be fastest when this matches the machine word size */
     53 /* WATCHOUT: if you change this you must also change the following #defines down to COUNT_ZERO_MSBS below to match */
     54 /* WATCHOUT: there are a few places where the code will not work unless brword is >= 32 bits wide */
     55 /*           also, some sections currently only have fast versions for 4 or 8 bytes per word */
     56 typedef FLAC__uint32 brword;
     57 #define FLAC__BYTES_PER_WORD 4
     58 #define FLAC__BITS_PER_WORD 32
     59 #define FLAC__WORD_ALL_ONES ((FLAC__uint32)0xffffffff)
     60 /* SWAP_BE_WORD_TO_HOST swaps bytes in a brword (which is always big-endian) if necessary to match host byte order */
     61 #if WORDS_BIGENDIAN
     62 #define SWAP_BE_WORD_TO_HOST(x) (x)
     63 #else
     64 #ifdef _MSC_VER
     65 #define SWAP_BE_WORD_TO_HOST(x) local_swap32_(x)
     66 #else
     67 #define SWAP_BE_WORD_TO_HOST(x) ntohl(x)
     68 #endif
     69 #endif
     70 /* counts the # of zero MSBs in a word */
     71 #define COUNT_ZERO_MSBS(word) ( \
     72 	(word) <= 0xffff ? \
     73 		( (word) <= 0xff? byte_to_unary_table[word] + 24 : byte_to_unary_table[(word) >> 8] + 16 ) : \
     74 		( (word) <= 0xffffff? byte_to_unary_table[word >> 16] + 8 : byte_to_unary_table[(word) >> 24] ) \
     75 )
     76 /* this alternate might be slightly faster on some systems/compilers: */
     77 #define COUNT_ZERO_MSBS2(word) ( (word) <= 0xff ? byte_to_unary_table[word] + 24 : ((word) <= 0xffff ? byte_to_unary_table[(word) >> 8] + 16 : ((word) <= 0xffffff ? byte_to_unary_table[(word) >> 16] + 8 : byte_to_unary_table[(word) >> 24])) )
     78 
     79 
     80 /*
     81  * This should be at least twice as large as the largest number of words
     82  * required to represent any 'number' (in any encoding) you are going to
     83  * read.  With FLAC this is on the order of maybe a few hundred bits.
     84  * If the buffer is smaller than that, the decoder won't be able to read
     85  * in a whole number that is in a variable length encoding (e.g. Rice).
     86  * But to be practical it should be at least 1K bytes.
     87  *
     88  * Increase this number to decrease the number of read callbacks, at the
     89  * expense of using more memory.  Or decrease for the reverse effect,
     90  * keeping in mind the limit from the first paragraph.  The optimal size
     91  * also depends on the CPU cache size and other factors; some twiddling
     92  * may be necessary to squeeze out the best performance.
     93  */
     94 static const unsigned FLAC__BITREADER_DEFAULT_CAPACITY = 65536u / FLAC__BITS_PER_WORD; /* in words */
     95 
     96 static const unsigned char byte_to_unary_table[] = {
     97 	8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
     98 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
     99 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    100 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    101 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    102 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    103 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    104 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    105 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    106 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    107 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    108 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    109 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    110 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    111 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    112 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    113 };
    114 
    115 #ifdef min
    116 #undef min
    117 #endif
    118 #define min(x,y) ((x)<(y)?(x):(y))
    119 #ifdef max
    120 #undef max
    121 #endif
    122 #define max(x,y) ((x)>(y)?(x):(y))
    123 
    124 /* adjust for compilers that can't understand using LLU suffix for uint64_t literals */
    125 #ifdef _MSC_VER
    126 #define FLAC__U64L(x) x
    127 #else
    128 #define FLAC__U64L(x) x##LLU
    129 #endif
    130 
    131 #ifndef FLaC__INLINE
    132 #define FLaC__INLINE
    133 #endif
    134 
    135 /* WATCHOUT: assembly routines rely on the order in which these fields are declared */
    136 struct FLAC__BitReader {
    137 	/* any partially-consumed word at the head will stay right-justified as bits are consumed from the left */
    138 	/* any incomplete word at the tail will be left-justified, and bytes from the read callback are added on the right */
    139 	brword *buffer;
    140 	unsigned capacity; /* in words */
    141 	unsigned words; /* # of completed words in buffer */
    142 	unsigned bytes; /* # of bytes in incomplete word at buffer[words] */
    143 	unsigned consumed_words; /* #words ... */
    144 	unsigned consumed_bits; /* ... + (#bits of head word) already consumed from the front of buffer */
    145 	unsigned read_crc16; /* the running frame CRC */
    146 	unsigned crc16_align; /* the number of bits in the current consumed word that should not be CRC'd */
    147 	FLAC__BitReaderReadCallback read_callback;
    148 	void *client_data;
    149 	FLAC__CPUInfo cpu_info;
    150 };
    151 
    152 #ifdef _MSC_VER
    153 /* OPT: an MSVC built-in would be better */
    154 static _inline FLAC__uint32 local_swap32_(FLAC__uint32 x)
    155 {
    156 	x = ((x<<8)&0xFF00FF00) | ((x>>8)&0x00FF00FF);
    157 	return (x>>16) | (x<<16);
    158 }
    159 static void local_swap32_block_(FLAC__uint32 *start, FLAC__uint32 len)
    160 {
    161 	__asm {
    162 		mov edx, start
    163 		mov ecx, len
    164 		test ecx, ecx
    165 loop1:
    166 		jz done1
    167 		mov eax, [edx]
    168 		bswap eax
    169 		mov [edx], eax
    170 		add edx, 4
    171 		dec ecx
    172 		jmp short loop1
    173 done1:
    174 	}
    175 }
    176 #endif
    177 
    178 static FLaC__INLINE void crc16_update_word_(FLAC__BitReader *br, brword word)
    179 {
    180 	register unsigned crc = br->read_crc16;
    181 #if FLAC__BYTES_PER_WORD == 4
    182 	switch(br->crc16_align) {
    183 		case  0: crc = FLAC__CRC16_UPDATE((unsigned)(word >> 24), crc);
    184 		case  8: crc = FLAC__CRC16_UPDATE((unsigned)((word >> 16) & 0xff), crc);
    185 		case 16: crc = FLAC__CRC16_UPDATE((unsigned)((word >> 8) & 0xff), crc);
    186 		case 24: br->read_crc16 = FLAC__CRC16_UPDATE((unsigned)(word & 0xff), crc);
    187 	}
    188 #elif FLAC__BYTES_PER_WORD == 8
    189 	switch(br->crc16_align) {
    190 		case  0: crc = FLAC__CRC16_UPDATE((unsigned)(word >> 56), crc);
    191 		case  8: crc = FLAC__CRC16_UPDATE((unsigned)((word >> 48) & 0xff), crc);
    192 		case 16: crc = FLAC__CRC16_UPDATE((unsigned)((word >> 40) & 0xff), crc);
    193 		case 24: crc = FLAC__CRC16_UPDATE((unsigned)((word >> 32) & 0xff), crc);
    194 		case 32: crc = FLAC__CRC16_UPDATE((unsigned)((word >> 24) & 0xff), crc);
    195 		case 40: crc = FLAC__CRC16_UPDATE((unsigned)((word >> 16) & 0xff), crc);
    196 		case 48: crc = FLAC__CRC16_UPDATE((unsigned)((word >> 8) & 0xff), crc);
    197 		case 56: br->read_crc16 = FLAC__CRC16_UPDATE((unsigned)(word & 0xff), crc);
    198 	}
    199 #else
    200 	for( ; br->crc16_align < FLAC__BITS_PER_WORD; br->crc16_align += 8)
    201 		crc = FLAC__CRC16_UPDATE((unsigned)((word >> (FLAC__BITS_PER_WORD-8-br->crc16_align)) & 0xff), crc);
    202 	br->read_crc16 = crc;
    203 #endif
    204 	br->crc16_align = 0;
    205 }
    206 
    207 /* would be static except it needs to be called by asm routines */
    208 FLAC__bool bitreader_read_from_client_(FLAC__BitReader *br)
    209 {
    210 	unsigned start, end;
    211 	size_t bytes;
    212 	FLAC__byte *target;
    213 
    214 	/* first shift the unconsumed buffer data toward the front as much as possible */
    215 	if(br->consumed_words > 0) {
    216 		start = br->consumed_words;
    217 		end = br->words + (br->bytes? 1:0);
    218 		memmove(br->buffer, br->buffer+start, FLAC__BYTES_PER_WORD * (end - start));
    219 
    220 		br->words -= start;
    221 		br->consumed_words = 0;
    222 	}
    223 
    224 	/*
    225 	 * set the target for reading, taking into account word alignment and endianness
    226 	 */
    227 	bytes = (br->capacity - br->words) * FLAC__BYTES_PER_WORD - br->bytes;
    228 	if(bytes == 0)
    229 		return false; /* no space left, buffer is too small; see note for FLAC__BITREADER_DEFAULT_CAPACITY  */
    230 	target = ((FLAC__byte*)(br->buffer+br->words)) + br->bytes;
    231 
    232 	/* before reading, if the existing reader looks like this (say brword is 32 bits wide)
    233 	 *   bitstream :  11 22 33 44 55            br->words=1 br->bytes=1 (partial tail word is left-justified)
    234 	 *   buffer[BE]:  11 22 33 44 55 ?? ?? ??   (shown layed out as bytes sequentially in memory)
    235 	 *   buffer[LE]:  44 33 22 11 ?? ?? ?? 55   (?? being don't-care)
    236 	 *                               ^^-------target, bytes=3
    237 	 * on LE machines, have to byteswap the odd tail word so nothing is
    238 	 * overwritten:
    239 	 */
    240 #if WORDS_BIGENDIAN
    241 #else
    242 	if(br->bytes)
    243 		br->buffer[br->words] = SWAP_BE_WORD_TO_HOST(br->buffer[br->words]);
    244 #endif
    245 
    246 	/* now it looks like:
    247 	 *   bitstream :  11 22 33 44 55            br->words=1 br->bytes=1
    248 	 *   buffer[BE]:  11 22 33 44 55 ?? ?? ??
    249 	 *   buffer[LE]:  44 33 22 11 55 ?? ?? ??
    250 	 *                               ^^-------target, bytes=3
    251 	 */
    252 
    253 	/* read in the data; note that the callback may return a smaller number of bytes */
    254 	if(!br->read_callback(target, &bytes, br->client_data))
    255 		return false;
    256 
    257 	/* after reading bytes 66 77 88 99 AA BB CC DD EE FF from the client:
    258 	 *   bitstream :  11 22 33 44 55 66 77 88 99 AA BB CC DD EE FF
    259 	 *   buffer[BE]:  11 22 33 44 55 66 77 88 99 AA BB CC DD EE FF ??
    260 	 *   buffer[LE]:  44 33 22 11 55 66 77 88 99 AA BB CC DD EE FF ??
    261 	 * now have to byteswap on LE machines:
    262 	 */
    263 #if WORDS_BIGENDIAN
    264 #else
    265 	end = (br->words*FLAC__BYTES_PER_WORD + br->bytes + bytes + (FLAC__BYTES_PER_WORD-1)) / FLAC__BYTES_PER_WORD;
    266 # if defined(_MSC_VER) && (FLAC__BYTES_PER_WORD == 4)
    267 	if(br->cpu_info.type == FLAC__CPUINFO_TYPE_IA32 && br->cpu_info.data.ia32.bswap) {
    268 		start = br->words;
    269 		local_swap32_block_(br->buffer + start, end - start);
    270 	}
    271 	else
    272 # endif
    273 	for(start = br->words; start < end; start++)
    274 		br->buffer[start] = SWAP_BE_WORD_TO_HOST(br->buffer[start]);
    275 #endif
    276 
    277 	/* now it looks like:
    278 	 *   bitstream :  11 22 33 44 55 66 77 88 99 AA BB CC DD EE FF
    279 	 *   buffer[BE]:  11 22 33 44 55 66 77 88 99 AA BB CC DD EE FF ??
    280 	 *   buffer[LE]:  44 33 22 11 88 77 66 55 CC BB AA 99 ?? FF EE DD
    281 	 * finally we'll update the reader values:
    282 	 */
    283 	end = br->words*FLAC__BYTES_PER_WORD + br->bytes + bytes;
    284 	br->words = end / FLAC__BYTES_PER_WORD;
    285 	br->bytes = end % FLAC__BYTES_PER_WORD;
    286 
    287 	return true;
    288 }
    289 
    290 /***********************************************************************
    291  *
    292  * Class constructor/destructor
    293  *
    294  ***********************************************************************/
    295 
    296 FLAC__BitReader *FLAC__bitreader_new(void)
    297 {
    298 	FLAC__BitReader *br = (FLAC__BitReader*)calloc(1, sizeof(FLAC__BitReader));
    299 
    300 	/* calloc() implies:
    301 		memset(br, 0, sizeof(FLAC__BitReader));
    302 		br->buffer = 0;
    303 		br->capacity = 0;
    304 		br->words = br->bytes = 0;
    305 		br->consumed_words = br->consumed_bits = 0;
    306 		br->read_callback = 0;
    307 		br->client_data = 0;
    308 	*/
    309 	return br;
    310 }
    311 
    312 void FLAC__bitreader_delete(FLAC__BitReader *br)
    313 {
    314 	FLAC__ASSERT(0 != br);
    315 
    316 	FLAC__bitreader_free(br);
    317 	free(br);
    318 }
    319 
    320 /***********************************************************************
    321  *
    322  * Public class methods
    323  *
    324  ***********************************************************************/
    325 
    326 FLAC__bool FLAC__bitreader_init(FLAC__BitReader *br, FLAC__CPUInfo cpu, FLAC__BitReaderReadCallback rcb, void *cd)
    327 {
    328 	FLAC__ASSERT(0 != br);
    329 
    330 	br->words = br->bytes = 0;
    331 	br->consumed_words = br->consumed_bits = 0;
    332 	br->capacity = FLAC__BITREADER_DEFAULT_CAPACITY;
    333 	br->buffer = (brword*)malloc(sizeof(brword) * br->capacity);
    334 	if(br->buffer == 0)
    335 		return false;
    336 	br->read_callback = rcb;
    337 	br->client_data = cd;
    338 	br->cpu_info = cpu;
    339 
    340 	return true;
    341 }
    342 
    343 void FLAC__bitreader_free(FLAC__BitReader *br)
    344 {
    345 	FLAC__ASSERT(0 != br);
    346 
    347 	if(0 != br->buffer)
    348 		free(br->buffer);
    349 	br->buffer = 0;
    350 	br->capacity = 0;
    351 	br->words = br->bytes = 0;
    352 	br->consumed_words = br->consumed_bits = 0;
    353 	br->read_callback = 0;
    354 	br->client_data = 0;
    355 }
    356 
    357 FLAC__bool FLAC__bitreader_clear(FLAC__BitReader *br)
    358 {
    359 	br->words = br->bytes = 0;
    360 	br->consumed_words = br->consumed_bits = 0;
    361 	return true;
    362 }
    363 
    364 void FLAC__bitreader_dump(const FLAC__BitReader *br, FILE *out)
    365 {
    366 	unsigned i, j;
    367 	if(br == 0) {
    368 		fprintf(out, "bitreader is NULL\n");
    369 	}
    370 	else {
    371 		fprintf(out, "bitreader: capacity=%u words=%u bytes=%u consumed: words=%u, bits=%u\n", br->capacity, br->words, br->bytes, br->consumed_words, br->consumed_bits);
    372 
    373 		for(i = 0; i < br->words; i++) {
    374 			fprintf(out, "%08X: ", i);
    375 			for(j = 0; j < FLAC__BITS_PER_WORD; j++)
    376 				if(i < br->consumed_words || (i == br->consumed_words && j < br->consumed_bits))
    377 					fprintf(out, ".");
    378 				else
    379 					fprintf(out, "%01u", br->buffer[i] & (1 << (FLAC__BITS_PER_WORD-j-1)) ? 1:0);
    380 			fprintf(out, "\n");
    381 		}
    382 		if(br->bytes > 0) {
    383 			fprintf(out, "%08X: ", i);
    384 			for(j = 0; j < br->bytes*8; j++)
    385 				if(i < br->consumed_words || (i == br->consumed_words && j < br->consumed_bits))
    386 					fprintf(out, ".");
    387 				else
    388 					fprintf(out, "%01u", br->buffer[i] & (1 << (br->bytes*8-j-1)) ? 1:0);
    389 			fprintf(out, "\n");
    390 		}
    391 	}
    392 }
    393 
    394 void FLAC__bitreader_reset_read_crc16(FLAC__BitReader *br, FLAC__uint16 seed)
    395 {
    396 	FLAC__ASSERT(0 != br);
    397 	FLAC__ASSERT(0 != br->buffer);
    398 	FLAC__ASSERT((br->consumed_bits & 7) == 0);
    399 
    400 	br->read_crc16 = (unsigned)seed;
    401 	br->crc16_align = br->consumed_bits;
    402 }
    403 
    404 FLAC__uint16 FLAC__bitreader_get_read_crc16(FLAC__BitReader *br)
    405 {
    406 	FLAC__ASSERT(0 != br);
    407 	FLAC__ASSERT(0 != br->buffer);
    408 	FLAC__ASSERT((br->consumed_bits & 7) == 0);
    409 	FLAC__ASSERT(br->crc16_align <= br->consumed_bits);
    410 
    411 	/* CRC any tail bytes in a partially-consumed word */
    412 	if(br->consumed_bits) {
    413 		const brword tail = br->buffer[br->consumed_words];
    414 		for( ; br->crc16_align < br->consumed_bits; br->crc16_align += 8)
    415 			br->read_crc16 = FLAC__CRC16_UPDATE((unsigned)((tail >> (FLAC__BITS_PER_WORD-8-br->crc16_align)) & 0xff), br->read_crc16);
    416 	}
    417 	return br->read_crc16;
    418 }
    419 
    420 FLaC__INLINE FLAC__bool FLAC__bitreader_is_consumed_byte_aligned(const FLAC__BitReader *br)
    421 {
    422 	return ((br->consumed_bits & 7) == 0);
    423 }
    424 
    425 FLaC__INLINE unsigned FLAC__bitreader_bits_left_for_byte_alignment(const FLAC__BitReader *br)
    426 {
    427 	return 8 - (br->consumed_bits & 7);
    428 }
    429 
    430 FLaC__INLINE unsigned FLAC__bitreader_get_input_bits_unconsumed(const FLAC__BitReader *br)
    431 {
    432 	return (br->words-br->consumed_words)*FLAC__BITS_PER_WORD + br->bytes*8 - br->consumed_bits;
    433 }
    434 
    435 FLaC__INLINE FLAC__bool FLAC__bitreader_read_raw_uint32(FLAC__BitReader *br, FLAC__uint32 *val, unsigned bits)
    436 {
    437 	FLAC__ASSERT(0 != br);
    438 	FLAC__ASSERT(0 != br->buffer);
    439 
    440 	FLAC__ASSERT(bits <= 32);
    441 	FLAC__ASSERT((br->capacity*FLAC__BITS_PER_WORD) * 2 >= bits);
    442 	FLAC__ASSERT(br->consumed_words <= br->words);
    443 
    444 	/* WATCHOUT: code does not work with <32bit words; we can make things much faster with this assertion */
    445 	FLAC__ASSERT(FLAC__BITS_PER_WORD >= 32);
    446 
    447 	if(bits == 0) { /* OPT: investigate if this can ever happen, maybe change to assertion */
    448 		*val = 0;
    449 		return true;
    450 	}
    451 
    452 	while((br->words-br->consumed_words)*FLAC__BITS_PER_WORD + br->bytes*8 - br->consumed_bits < bits) {
    453 		if(!bitreader_read_from_client_(br))
    454 			return false;
    455 	}
    456 	if(br->consumed_words < br->words) { /* if we've not consumed up to a partial tail word... */
    457 		/* OPT: taking out the consumed_bits==0 "else" case below might make things faster if less code allows the compiler to inline this function */
    458 		if(br->consumed_bits) {
    459 			/* this also works when consumed_bits==0, it's just a little slower than necessary for that case */
    460 			const unsigned n = FLAC__BITS_PER_WORD - br->consumed_bits;
    461 			const brword word = br->buffer[br->consumed_words];
    462 			if(bits < n) {
    463 				*val = (word & (FLAC__WORD_ALL_ONES >> br->consumed_bits)) >> (n-bits);
    464 				br->consumed_bits += bits;
    465 				return true;
    466 			}
    467 			*val = word & (FLAC__WORD_ALL_ONES >> br->consumed_bits);
    468 			bits -= n;
    469 			crc16_update_word_(br, word);
    470 			br->consumed_words++;
    471 			br->consumed_bits = 0;
    472 			if(bits) { /* if there are still bits left to read, there have to be less than 32 so they will all be in the next word */
    473 				*val <<= bits;
    474 				*val |= (br->buffer[br->consumed_words] >> (FLAC__BITS_PER_WORD-bits));
    475 				br->consumed_bits = bits;
    476 			}
    477 			return true;
    478 		}
    479 		else {
    480 			const brword word = br->buffer[br->consumed_words];
    481 			if(bits < FLAC__BITS_PER_WORD) {
    482 				*val = word >> (FLAC__BITS_PER_WORD-bits);
    483 				br->consumed_bits = bits;
    484 				return true;
    485 			}
    486 			/* at this point 'bits' must be == FLAC__BITS_PER_WORD; because of previous assertions, it can't be larger */
    487 			*val = word;
    488 			crc16_update_word_(br, word);
    489 			br->consumed_words++;
    490 			return true;
    491 		}
    492 	}
    493 	else {
    494 		/* in this case we're starting our read at a partial tail word;
    495 		 * the reader has guaranteed that we have at least 'bits' bits
    496 		 * available to read, which makes this case simpler.
    497 		 */
    498 		/* OPT: taking out the consumed_bits==0 "else" case below might make things faster if less code allows the compiler to inline this function */
    499 		if(br->consumed_bits) {
    500 			/* this also works when consumed_bits==0, it's just a little slower than necessary for that case */
    501 			FLAC__ASSERT(br->consumed_bits + bits <= br->bytes*8);
    502 			*val = (br->buffer[br->consumed_words] & (FLAC__WORD_ALL_ONES >> br->consumed_bits)) >> (FLAC__BITS_PER_WORD-br->consumed_bits-bits);
    503 			br->consumed_bits += bits;
    504 			return true;
    505 		}
    506 		else {
    507 			*val = br->buffer[br->consumed_words] >> (FLAC__BITS_PER_WORD-bits);
    508 			br->consumed_bits += bits;
    509 			return true;
    510 		}
    511 	}
    512 }
    513 
    514 FLAC__bool FLAC__bitreader_read_raw_int32(FLAC__BitReader *br, FLAC__int32 *val, unsigned bits)
    515 {
    516 	/* OPT: inline raw uint32 code here, or make into a macro if possible in the .h file */
    517 	if(!FLAC__bitreader_read_raw_uint32(br, (FLAC__uint32*)val, bits))
    518 		return false;
    519 	/* sign-extend: */
    520 	*val <<= (32-bits);
    521 	*val >>= (32-bits);
    522 	return true;
    523 }
    524 
    525 FLAC__bool FLAC__bitreader_read_raw_uint64(FLAC__BitReader *br, FLAC__uint64 *val, unsigned bits)
    526 {
    527 	FLAC__uint32 hi, lo;
    528 
    529 	if(bits > 32) {
    530 		if(!FLAC__bitreader_read_raw_uint32(br, &hi, bits-32))
    531 			return false;
    532 		if(!FLAC__bitreader_read_raw_uint32(br, &lo, 32))
    533 			return false;
    534 		*val = hi;
    535 		*val <<= 32;
    536 		*val |= lo;
    537 	}
    538 	else {
    539 		if(!FLAC__bitreader_read_raw_uint32(br, &lo, bits))
    540 			return false;
    541 		*val = lo;
    542 	}
    543 	return true;
    544 }
    545 
    546 FLaC__INLINE FLAC__bool FLAC__bitreader_read_uint32_little_endian(FLAC__BitReader *br, FLAC__uint32 *val)
    547 {
    548 	FLAC__uint32 x8, x32 = 0;
    549 
    550 	/* this doesn't need to be that fast as currently it is only used for vorbis comments */
    551 
    552 	if(!FLAC__bitreader_read_raw_uint32(br, &x32, 8))
    553 		return false;
    554 
    555 	if(!FLAC__bitreader_read_raw_uint32(br, &x8, 8))
    556 		return false;
    557 	x32 |= (x8 << 8);
    558 
    559 	if(!FLAC__bitreader_read_raw_uint32(br, &x8, 8))
    560 		return false;
    561 	x32 |= (x8 << 16);
    562 
    563 	if(!FLAC__bitreader_read_raw_uint32(br, &x8, 8))
    564 		return false;
    565 	x32 |= (x8 << 24);
    566 
    567 	*val = x32;
    568 	return true;
    569 }
    570 
    571 FLAC__bool FLAC__bitreader_skip_bits_no_crc(FLAC__BitReader *br, unsigned bits)
    572 {
    573 	/*
    574 	 * OPT: a faster implementation is possible but probably not that useful
    575 	 * since this is only called a couple of times in the metadata readers.
    576 	 */
    577 	FLAC__ASSERT(0 != br);
    578 	FLAC__ASSERT(0 != br->buffer);
    579 
    580 	if(bits > 0) {
    581 		const unsigned n = br->consumed_bits & 7;
    582 		unsigned m;
    583 		FLAC__uint32 x;
    584 
    585 		if(n != 0) {
    586 			m = min(8-n, bits);
    587 			if(!FLAC__bitreader_read_raw_uint32(br, &x, m))
    588 				return false;
    589 			bits -= m;
    590 		}
    591 		m = bits / 8;
    592 		if(m > 0) {
    593 			if(!FLAC__bitreader_skip_byte_block_aligned_no_crc(br, m))
    594 				return false;
    595 			bits %= 8;
    596 		}
    597 		if(bits > 0) {
    598 			if(!FLAC__bitreader_read_raw_uint32(br, &x, bits))
    599 				return false;
    600 		}
    601 	}
    602 
    603 	return true;
    604 }
    605 
    606 FLAC__bool FLAC__bitreader_skip_byte_block_aligned_no_crc(FLAC__BitReader *br, unsigned nvals)
    607 {
    608 	FLAC__uint32 x;
    609 
    610 	FLAC__ASSERT(0 != br);
    611 	FLAC__ASSERT(0 != br->buffer);
    612 	FLAC__ASSERT(FLAC__bitreader_is_consumed_byte_aligned(br));
    613 
    614 	/* step 1: skip over partial head word to get word aligned */
    615 	while(nvals && br->consumed_bits) { /* i.e. run until we read 'nvals' bytes or we hit the end of the head word */
    616 		if(!FLAC__bitreader_read_raw_uint32(br, &x, 8))
    617 			return false;
    618 		nvals--;
    619 	}
    620 	if(0 == nvals)
    621 		return true;
    622 	/* step 2: skip whole words in chunks */
    623 	while(nvals >= FLAC__BYTES_PER_WORD) {
    624 		if(br->consumed_words < br->words) {
    625 			br->consumed_words++;
    626 			nvals -= FLAC__BYTES_PER_WORD;
    627 		}
    628 		else if(!bitreader_read_from_client_(br))
    629 			return false;
    630 	}
    631 	/* step 3: skip any remainder from partial tail bytes */
    632 	while(nvals) {
    633 		if(!FLAC__bitreader_read_raw_uint32(br, &x, 8))
    634 			return false;
    635 		nvals--;
    636 	}
    637 
    638 	return true;
    639 }
    640 
    641 FLAC__bool FLAC__bitreader_read_byte_block_aligned_no_crc(FLAC__BitReader *br, FLAC__byte *val, unsigned nvals)
    642 {
    643 	FLAC__uint32 x;
    644 
    645 	FLAC__ASSERT(0 != br);
    646 	FLAC__ASSERT(0 != br->buffer);
    647 	FLAC__ASSERT(FLAC__bitreader_is_consumed_byte_aligned(br));
    648 
    649 	/* step 1: read from partial head word to get word aligned */
    650 	while(nvals && br->consumed_bits) { /* i.e. run until we read 'nvals' bytes or we hit the end of the head word */
    651 		if(!FLAC__bitreader_read_raw_uint32(br, &x, 8))
    652 			return false;
    653 		*val++ = (FLAC__byte)x;
    654 		nvals--;
    655 	}
    656 	if(0 == nvals)
    657 		return true;
    658 	/* step 2: read whole words in chunks */
    659 	while(nvals >= FLAC__BYTES_PER_WORD) {
    660 		if(br->consumed_words < br->words) {
    661 			const brword word = br->buffer[br->consumed_words++];
    662 #if FLAC__BYTES_PER_WORD == 4
    663 			val[0] = (FLAC__byte)(word >> 24);
    664 			val[1] = (FLAC__byte)(word >> 16);
    665 			val[2] = (FLAC__byte)(word >> 8);
    666 			val[3] = (FLAC__byte)word;
    667 #elif FLAC__BYTES_PER_WORD == 8
    668 			val[0] = (FLAC__byte)(word >> 56);
    669 			val[1] = (FLAC__byte)(word >> 48);
    670 			val[2] = (FLAC__byte)(word >> 40);
    671 			val[3] = (FLAC__byte)(word >> 32);
    672 			val[4] = (FLAC__byte)(word >> 24);
    673 			val[5] = (FLAC__byte)(word >> 16);
    674 			val[6] = (FLAC__byte)(word >> 8);
    675 			val[7] = (FLAC__byte)word;
    676 #else
    677 			for(x = 0; x < FLAC__BYTES_PER_WORD; x++)
    678 				val[x] = (FLAC__byte)(word >> (8*(FLAC__BYTES_PER_WORD-x-1)));
    679 #endif
    680 			val += FLAC__BYTES_PER_WORD;
    681 			nvals -= FLAC__BYTES_PER_WORD;
    682 		}
    683 		else if(!bitreader_read_from_client_(br))
    684 			return false;
    685 	}
    686 	/* step 3: read any remainder from partial tail bytes */
    687 	while(nvals) {
    688 		if(!FLAC__bitreader_read_raw_uint32(br, &x, 8))
    689 			return false;
    690 		*val++ = (FLAC__byte)x;
    691 		nvals--;
    692 	}
    693 
    694 	return true;
    695 }
    696 
    697 FLaC__INLINE FLAC__bool FLAC__bitreader_read_unary_unsigned(FLAC__BitReader *br, unsigned *val)
    698 #if 0 /* slow but readable version */
    699 {
    700 	unsigned bit;
    701 
    702 	FLAC__ASSERT(0 != br);
    703 	FLAC__ASSERT(0 != br->buffer);
    704 
    705 	*val = 0;
    706 	while(1) {
    707 		if(!FLAC__bitreader_read_bit(br, &bit))
    708 			return false;
    709 		if(bit)
    710 			break;
    711 		else
    712 			*val++;
    713 	}
    714 	return true;
    715 }
    716 #else
    717 {
    718 	unsigned i;
    719 
    720 	FLAC__ASSERT(0 != br);
    721 	FLAC__ASSERT(0 != br->buffer);
    722 
    723 	*val = 0;
    724 	while(1) {
    725 		while(br->consumed_words < br->words) { /* if we've not consumed up to a partial tail word... */
    726 			brword b = br->buffer[br->consumed_words] << br->consumed_bits;
    727 			if(b) {
    728 				i = COUNT_ZERO_MSBS(b);
    729 				*val += i;
    730 				i++;
    731 				br->consumed_bits += i;
    732 				if(br->consumed_bits >= FLAC__BITS_PER_WORD) { /* faster way of testing if(br->consumed_bits == FLAC__BITS_PER_WORD) */
    733 					crc16_update_word_(br, br->buffer[br->consumed_words]);
    734 					br->consumed_words++;
    735 					br->consumed_bits = 0;
    736 				}
    737 				return true;
    738 			}
    739 			else {
    740 				*val += FLAC__BITS_PER_WORD - br->consumed_bits;
    741 				crc16_update_word_(br, br->buffer[br->consumed_words]);
    742 				br->consumed_words++;
    743 				br->consumed_bits = 0;
    744 				/* didn't find stop bit yet, have to keep going... */
    745 			}
    746 		}
    747 		/* at this point we've eaten up all the whole words; have to try
    748 		 * reading through any tail bytes before calling the read callback.
    749 		 * this is a repeat of the above logic adjusted for the fact we
    750 		 * don't have a whole word.  note though if the client is feeding
    751 		 * us data a byte at a time (unlikely), br->consumed_bits may not
    752 		 * be zero.
    753 		 */
    754 		if(br->bytes) {
    755 			const unsigned end = br->bytes * 8;
    756 			brword b = (br->buffer[br->consumed_words] & (FLAC__WORD_ALL_ONES << (FLAC__BITS_PER_WORD-end))) << br->consumed_bits;
    757 			if(b) {
    758 				i = COUNT_ZERO_MSBS(b);
    759 				*val += i;
    760 				i++;
    761 				br->consumed_bits += i;
    762 				FLAC__ASSERT(br->consumed_bits < FLAC__BITS_PER_WORD);
    763 				return true;
    764 			}
    765 			else {
    766 				*val += end - br->consumed_bits;
    767 				br->consumed_bits += end;
    768 				FLAC__ASSERT(br->consumed_bits < FLAC__BITS_PER_WORD);
    769 				/* didn't find stop bit yet, have to keep going... */
    770 			}
    771 		}
    772 		if(!bitreader_read_from_client_(br))
    773 			return false;
    774 	}
    775 }
    776 #endif
    777 
    778 FLAC__bool FLAC__bitreader_read_rice_signed(FLAC__BitReader *br, int *val, unsigned parameter)
    779 {
    780 	FLAC__uint32 lsbs = 0, msbs = 0;
    781 	unsigned uval;
    782 
    783 	FLAC__ASSERT(0 != br);
    784 	FLAC__ASSERT(0 != br->buffer);
    785 	FLAC__ASSERT(parameter <= 31);
    786 
    787 	/* read the unary MSBs and end bit */
    788 	if(!FLAC__bitreader_read_unary_unsigned(br, &msbs))
    789 		return false;
    790 
    791 	/* read the binary LSBs */
    792 	if(!FLAC__bitreader_read_raw_uint32(br, &lsbs, parameter))
    793 		return false;
    794 
    795 	/* compose the value */
    796 	uval = (msbs << parameter) | lsbs;
    797 	if(uval & 1)
    798 		*val = -((int)(uval >> 1)) - 1;
    799 	else
    800 		*val = (int)(uval >> 1);
    801 
    802 	return true;
    803 }
    804 
    805 /* this is by far the most heavily used reader call.  it ain't pretty but it's fast */
    806 /* a lot of the logic is copied, then adapted, from FLAC__bitreader_read_unary_unsigned() and FLAC__bitreader_read_raw_uint32() */
    807 FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter)
    808 /* OPT: possibly faster version for use with MSVC */
    809 #ifdef _MSC_VER
    810 {
    811 	unsigned i;
    812 	unsigned uval = 0;
    813 	unsigned bits; /* the # of binary LSBs left to read to finish a rice codeword */
    814 
    815 	/* try and get br->consumed_words and br->consumed_bits into register;
    816 	 * must remember to flush them back to *br before calling other
    817 	 * bitwriter functions that use them, and before returning */
    818 	register unsigned cwords;
    819 	register unsigned cbits;
    820 
    821 	FLAC__ASSERT(0 != br);
    822 	FLAC__ASSERT(0 != br->buffer);
    823 	/* WATCHOUT: code does not work with <32bit words; we can make things much faster with this assertion */
    824 	FLAC__ASSERT(FLAC__BITS_PER_WORD >= 32);
    825 	FLAC__ASSERT(parameter < 32);
    826 	/* the above two asserts also guarantee that the binary part never straddles more that 2 words, so we don't have to loop to read it */
    827 
    828 	if(nvals == 0)
    829 		return true;
    830 
    831 	cbits = br->consumed_bits;
    832 	cwords = br->consumed_words;
    833 
    834 	while(1) {
    835 
    836 		/* read unary part */
    837 		while(1) {
    838 			while(cwords < br->words) { /* if we've not consumed up to a partial tail word... */
    839 				brword b = br->buffer[cwords] << cbits;
    840 				if(b) {
    841 #if 0 /* slower, probably due to bad register allocation... */ && defined FLAC__CPU_IA32 && !defined FLAC__NO_ASM && FLAC__BITS_PER_WORD == 32
    842 					__asm {
    843 						bsr eax, b
    844 						not eax
    845 						and eax, 31
    846 						mov i, eax
    847 					}
    848 #else
    849 					i = COUNT_ZERO_MSBS(b);
    850 #endif
    851 					uval += i;
    852 					bits = parameter;
    853 					i++;
    854 					cbits += i;
    855 					if(cbits == FLAC__BITS_PER_WORD) {
    856 						crc16_update_word_(br, br->buffer[cwords]);
    857 						cwords++;
    858 						cbits = 0;
    859 					}
    860 					goto break1;
    861 				}
    862 				else {
    863 					uval += FLAC__BITS_PER_WORD - cbits;
    864 					crc16_update_word_(br, br->buffer[cwords]);
    865 					cwords++;
    866 					cbits = 0;
    867 					/* didn't find stop bit yet, have to keep going... */
    868 				}
    869 			}
    870 			/* at this point we've eaten up all the whole words; have to try
    871 			 * reading through any tail bytes before calling the read callback.
    872 			 * this is a repeat of the above logic adjusted for the fact we
    873 			 * don't have a whole word.  note though if the client is feeding
    874 			 * us data a byte at a time (unlikely), br->consumed_bits may not
    875 			 * be zero.
    876 			 */
    877 			if(br->bytes) {
    878 				const unsigned end = br->bytes * 8;
    879 				brword b = (br->buffer[cwords] & (FLAC__WORD_ALL_ONES << (FLAC__BITS_PER_WORD-end))) << cbits;
    880 				if(b) {
    881 					i = COUNT_ZERO_MSBS(b);
    882 					uval += i;
    883 					bits = parameter;
    884 					i++;
    885 					cbits += i;
    886 					FLAC__ASSERT(cbits < FLAC__BITS_PER_WORD);
    887 					goto break1;
    888 				}
    889 				else {
    890 					uval += end - cbits;
    891 					cbits += end;
    892 					FLAC__ASSERT(cbits < FLAC__BITS_PER_WORD);
    893 					/* didn't find stop bit yet, have to keep going... */
    894 				}
    895 			}
    896 			/* flush registers and read; bitreader_read_from_client_() does
    897 			 * not touch br->consumed_bits at all but we still need to set
    898 			 * it in case it fails and we have to return false.
    899 			 */
    900 			br->consumed_bits = cbits;
    901 			br->consumed_words = cwords;
    902 			if(!bitreader_read_from_client_(br))
    903 				return false;
    904 			cwords = br->consumed_words;
    905 		}
    906 break1:
    907 		/* read binary part */
    908 		FLAC__ASSERT(cwords <= br->words);
    909 
    910 		if(bits) {
    911 			while((br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits < bits) {
    912 				/* flush registers and read; bitreader_read_from_client_() does
    913 				 * not touch br->consumed_bits at all but we still need to set
    914 				 * it in case it fails and we have to return false.
    915 				 */
    916 				br->consumed_bits = cbits;
    917 				br->consumed_words = cwords;
    918 				if(!bitreader_read_from_client_(br))
    919 					return false;
    920 				cwords = br->consumed_words;
    921 			}
    922 			if(cwords < br->words) { /* if we've not consumed up to a partial tail word... */
    923 				if(cbits) {
    924 					/* this also works when consumed_bits==0, it's just a little slower than necessary for that case */
    925 					const unsigned n = FLAC__BITS_PER_WORD - cbits;
    926 					const brword word = br->buffer[cwords];
    927 					if(bits < n) {
    928 						uval <<= bits;
    929 						uval |= (word & (FLAC__WORD_ALL_ONES >> cbits)) >> (n-bits);
    930 						cbits += bits;
    931 						goto break2;
    932 					}
    933 					uval <<= n;
    934 					uval |= word & (FLAC__WORD_ALL_ONES >> cbits);
    935 					bits -= n;
    936 					crc16_update_word_(br, word);
    937 					cwords++;
    938 					cbits = 0;
    939 					if(bits) { /* if there are still bits left to read, there have to be less than 32 so they will all be in the next word */
    940 						uval <<= bits;
    941 						uval |= (br->buffer[cwords] >> (FLAC__BITS_PER_WORD-bits));
    942 						cbits = bits;
    943 					}
    944 					goto break2;
    945 				}
    946 				else {
    947 					FLAC__ASSERT(bits < FLAC__BITS_PER_WORD);
    948 					uval <<= bits;
    949 					uval |= br->buffer[cwords] >> (FLAC__BITS_PER_WORD-bits);
    950 					cbits = bits;
    951 					goto break2;
    952 				}
    953 			}
    954 			else {
    955 				/* in this case we're starting our read at a partial tail word;
    956 				 * the reader has guaranteed that we have at least 'bits' bits
    957 				 * available to read, which makes this case simpler.
    958 				 */
    959 				uval <<= bits;
    960 				if(cbits) {
    961 					/* this also works when consumed_bits==0, it's just a little slower than necessary for that case */
    962 					FLAC__ASSERT(cbits + bits <= br->bytes*8);
    963 					uval |= (br->buffer[cwords] & (FLAC__WORD_ALL_ONES >> cbits)) >> (FLAC__BITS_PER_WORD-cbits-bits);
    964 					cbits += bits;
    965 					goto break2;
    966 				}
    967 				else {
    968 					uval |= br->buffer[cwords] >> (FLAC__BITS_PER_WORD-bits);
    969 					cbits += bits;
    970 					goto break2;
    971 				}
    972 			}
    973 		}
    974 break2:
    975 		/* compose the value */
    976 		*vals = (int)(uval >> 1 ^ -(int)(uval & 1));
    977 
    978 		/* are we done? */
    979 		--nvals;
    980 		if(nvals == 0) {
    981 			br->consumed_bits = cbits;
    982 			br->consumed_words = cwords;
    983 			return true;
    984 		}
    985 
    986 		uval = 0;
    987 		++vals;
    988 
    989 	}
    990 }
    991 #else
    992 {
    993 	unsigned i;
    994 	unsigned uval = 0;
    995 
    996 	/* try and get br->consumed_words and br->consumed_bits into register;
    997 	 * must remember to flush them back to *br before calling other
    998 	 * bitwriter functions that use them, and before returning */
    999 	register unsigned cwords;
   1000 	register unsigned cbits;
   1001 	unsigned ucbits; /* keep track of the number of unconsumed bits in the buffer */
   1002 
   1003 	FLAC__ASSERT(0 != br);
   1004 	FLAC__ASSERT(0 != br->buffer);
   1005 	/* WATCHOUT: code does not work with <32bit words; we can make things much faster with this assertion */
   1006 	FLAC__ASSERT(FLAC__BITS_PER_WORD >= 32);
   1007 	FLAC__ASSERT(parameter < 32);
   1008 	/* the above two asserts also guarantee that the binary part never straddles more than 2 words, so we don't have to loop to read it */
   1009 
   1010 	if(nvals == 0)
   1011 		return true;
   1012 
   1013 	cbits = br->consumed_bits;
   1014 	cwords = br->consumed_words;
   1015 	ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits;
   1016 
   1017 	while(1) {
   1018 
   1019 		/* read unary part */
   1020 		while(1) {
   1021 			while(cwords < br->words) { /* if we've not consumed up to a partial tail word... */
   1022 				brword b = br->buffer[cwords] << cbits;
   1023 				if(b) {
   1024 #if 0 /* is not discernably faster... */ && defined FLAC__CPU_IA32 && !defined FLAC__NO_ASM && FLAC__BITS_PER_WORD == 32 && defined __GNUC__
   1025 					asm volatile (
   1026 						"bsrl %1, %0;"
   1027 						"notl %0;"
   1028 						"andl $31, %0;"
   1029 						: "=r"(i)
   1030 						: "r"(b)
   1031 					);
   1032 #else
   1033 					i = COUNT_ZERO_MSBS(b);
   1034 #endif
   1035 					uval += i;
   1036 					cbits += i;
   1037 					cbits++; /* skip over stop bit */
   1038 					if(cbits >= FLAC__BITS_PER_WORD) { /* faster way of testing if(cbits == FLAC__BITS_PER_WORD) */
   1039 						crc16_update_word_(br, br->buffer[cwords]);
   1040 						cwords++;
   1041 						cbits = 0;
   1042 					}
   1043 					goto break1;
   1044 				}
   1045 				else {
   1046 					uval += FLAC__BITS_PER_WORD - cbits;
   1047 					crc16_update_word_(br, br->buffer[cwords]);
   1048 					cwords++;
   1049 					cbits = 0;
   1050 					/* didn't find stop bit yet, have to keep going... */
   1051 				}
   1052 			}
   1053 			/* at this point we've eaten up all the whole words; have to try
   1054 			 * reading through any tail bytes before calling the read callback.
   1055 			 * this is a repeat of the above logic adjusted for the fact we
   1056 			 * don't have a whole word.  note though if the client is feeding
   1057 			 * us data a byte at a time (unlikely), br->consumed_bits may not
   1058 			 * be zero.
   1059 			 */
   1060 			if(br->bytes) {
   1061 				const unsigned end = br->bytes * 8;
   1062 				brword b = (br->buffer[cwords] & ~(FLAC__WORD_ALL_ONES >> end)) << cbits;
   1063 				if(b) {
   1064 					i = COUNT_ZERO_MSBS(b);
   1065 					uval += i;
   1066 					cbits += i;
   1067 					cbits++; /* skip over stop bit */
   1068 					FLAC__ASSERT(cbits < FLAC__BITS_PER_WORD);
   1069 					goto break1;
   1070 				}
   1071 				else {
   1072 					uval += end - cbits;
   1073 					cbits += end;
   1074 					FLAC__ASSERT(cbits < FLAC__BITS_PER_WORD);
   1075 					/* didn't find stop bit yet, have to keep going... */
   1076 				}
   1077 			}
   1078 			/* flush registers and read; bitreader_read_from_client_() does
   1079 			 * not touch br->consumed_bits at all but we still need to set
   1080 			 * it in case it fails and we have to return false.
   1081 			 */
   1082 			br->consumed_bits = cbits;
   1083 			br->consumed_words = cwords;
   1084 			if(!bitreader_read_from_client_(br))
   1085 				return false;
   1086 			cwords = br->consumed_words;
   1087 			ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits + uval;
   1088 			/* + uval to offset our count by the # of unary bits already
   1089 			 * consumed before the read, because we will add these back
   1090 			 * in all at once at break1
   1091 			 */
   1092 		}
   1093 break1:
   1094 		ucbits -= uval;
   1095 		ucbits--; /* account for stop bit */
   1096 
   1097 		/* read binary part */
   1098 		FLAC__ASSERT(cwords <= br->words);
   1099 
   1100 		if(parameter) {
   1101 			while(ucbits < parameter) {
   1102 				/* flush registers and read; bitreader_read_from_client_() does
   1103 				 * not touch br->consumed_bits at all but we still need to set
   1104 				 * it in case it fails and we have to return false.
   1105 				 */
   1106 				br->consumed_bits = cbits;
   1107 				br->consumed_words = cwords;
   1108 				if(!bitreader_read_from_client_(br))
   1109 					return false;
   1110 				cwords = br->consumed_words;
   1111 				ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits;
   1112 			}
   1113 			if(cwords < br->words) { /* if we've not consumed up to a partial tail word... */
   1114 				if(cbits) {
   1115 					/* this also works when consumed_bits==0, it's just slower than necessary for that case */
   1116 					const unsigned n = FLAC__BITS_PER_WORD - cbits;
   1117 					const brword word = br->buffer[cwords];
   1118 					if(parameter < n) {
   1119 						uval <<= parameter;
   1120 						uval |= (word & (FLAC__WORD_ALL_ONES >> cbits)) >> (n-parameter);
   1121 						cbits += parameter;
   1122 					}
   1123 					else {
   1124 						uval <<= n;
   1125 						uval |= word & (FLAC__WORD_ALL_ONES >> cbits);
   1126 						crc16_update_word_(br, word);
   1127 						cwords++;
   1128 						cbits = parameter - n;
   1129 						if(cbits) { /* parameter > n, i.e. if there are still bits left to read, there have to be less than 32 so they will all be in the next word */
   1130 							uval <<= cbits;
   1131 							uval |= (br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits));
   1132 						}
   1133 					}
   1134 				}
   1135 				else {
   1136 					cbits = parameter;
   1137 					uval <<= parameter;
   1138 					uval |= br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits);
   1139 				}
   1140 			}
   1141 			else {
   1142 				/* in this case we're starting our read at a partial tail word;
   1143 				 * the reader has guaranteed that we have at least 'parameter'
   1144 				 * bits available to read, which makes this case simpler.
   1145 				 */
   1146 				uval <<= parameter;
   1147 				if(cbits) {
   1148 					/* this also works when consumed_bits==0, it's just a little slower than necessary for that case */
   1149 					FLAC__ASSERT(cbits + parameter <= br->bytes*8);
   1150 					uval |= (br->buffer[cwords] & (FLAC__WORD_ALL_ONES >> cbits)) >> (FLAC__BITS_PER_WORD-cbits-parameter);
   1151 					cbits += parameter;
   1152 				}
   1153 				else {
   1154 					cbits = parameter;
   1155 					uval |= br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits);
   1156 				}
   1157 			}
   1158 		}
   1159 
   1160 		ucbits -= parameter;
   1161 
   1162 		/* compose the value */
   1163 		*vals = (int)(uval >> 1 ^ -(int)(uval & 1));
   1164 
   1165 		/* are we done? */
   1166 		--nvals;
   1167 		if(nvals == 0) {
   1168 			br->consumed_bits = cbits;
   1169 			br->consumed_words = cwords;
   1170 			return true;
   1171 		}
   1172 
   1173 		uval = 0;
   1174 		++vals;
   1175 
   1176 	}
   1177 }
   1178 #endif
   1179 
   1180 #if 0 /* UNUSED */
   1181 FLAC__bool FLAC__bitreader_read_golomb_signed(FLAC__BitReader *br, int *val, unsigned parameter)
   1182 {
   1183 	FLAC__uint32 lsbs = 0, msbs = 0;
   1184 	unsigned bit, uval, k;
   1185 
   1186 	FLAC__ASSERT(0 != br);
   1187 	FLAC__ASSERT(0 != br->buffer);
   1188 
   1189 	k = FLAC__bitmath_ilog2(parameter);
   1190 
   1191 	/* read the unary MSBs and end bit */
   1192 	if(!FLAC__bitreader_read_unary_unsigned(br, &msbs))
   1193 		return false;
   1194 
   1195 	/* read the binary LSBs */
   1196 	if(!FLAC__bitreader_read_raw_uint32(br, &lsbs, k))
   1197 		return false;
   1198 
   1199 	if(parameter == 1u<<k) {
   1200 		/* compose the value */
   1201 		uval = (msbs << k) | lsbs;
   1202 	}
   1203 	else {
   1204 		unsigned d = (1 << (k+1)) - parameter;
   1205 		if(lsbs >= d) {
   1206 			if(!FLAC__bitreader_read_bit(br, &bit))
   1207 				return false;
   1208 			lsbs <<= 1;
   1209 			lsbs |= bit;
   1210 			lsbs -= d;
   1211 		}
   1212 		/* compose the value */
   1213 		uval = msbs * parameter + lsbs;
   1214 	}
   1215 
   1216 	/* unfold unsigned to signed */
   1217 	if(uval & 1)
   1218 		*val = -((int)(uval >> 1)) - 1;
   1219 	else
   1220 		*val = (int)(uval >> 1);
   1221 
   1222 	return true;
   1223 }
   1224 
   1225 FLAC__bool FLAC__bitreader_read_golomb_unsigned(FLAC__BitReader *br, unsigned *val, unsigned parameter)
   1226 {
   1227 	FLAC__uint32 lsbs, msbs = 0;
   1228 	unsigned bit, k;
   1229 
   1230 	FLAC__ASSERT(0 != br);
   1231 	FLAC__ASSERT(0 != br->buffer);
   1232 
   1233 	k = FLAC__bitmath_ilog2(parameter);
   1234 
   1235 	/* read the unary MSBs and end bit */
   1236 	if(!FLAC__bitreader_read_unary_unsigned(br, &msbs))
   1237 		return false;
   1238 
   1239 	/* read the binary LSBs */
   1240 	if(!FLAC__bitreader_read_raw_uint32(br, &lsbs, k))
   1241 		return false;
   1242 
   1243 	if(parameter == 1u<<k) {
   1244 		/* compose the value */
   1245 		*val = (msbs << k) | lsbs;
   1246 	}
   1247 	else {
   1248 		unsigned d = (1 << (k+1)) - parameter;
   1249 		if(lsbs >= d) {
   1250 			if(!FLAC__bitreader_read_bit(br, &bit))
   1251 				return false;
   1252 			lsbs <<= 1;
   1253 			lsbs |= bit;
   1254 			lsbs -= d;
   1255 		}
   1256 		/* compose the value */
   1257 		*val = msbs * parameter + lsbs;
   1258 	}
   1259 
   1260 	return true;
   1261 }
   1262 #endif /* UNUSED */
   1263 
   1264 /* on return, if *val == 0xffffffff then the utf-8 sequence was invalid, but the return value will be true */
   1265 FLAC__bool FLAC__bitreader_read_utf8_uint32(FLAC__BitReader *br, FLAC__uint32 *val, FLAC__byte *raw, unsigned *rawlen)
   1266 {
   1267 	FLAC__uint32 v = 0;
   1268 	FLAC__uint32 x;
   1269 	unsigned i;
   1270 
   1271 	if(!FLAC__bitreader_read_raw_uint32(br, &x, 8))
   1272 		return false;
   1273 	if(raw)
   1274 		raw[(*rawlen)++] = (FLAC__byte)x;
   1275 	if(!(x & 0x80)) { /* 0xxxxxxx */
   1276 		v = x;
   1277 		i = 0;
   1278 	}
   1279 	else if(x & 0xC0 && !(x & 0x20)) { /* 110xxxxx */
   1280 		v = x & 0x1F;
   1281 		i = 1;
   1282 	}
   1283 	else if(x & 0xE0 && !(x & 0x10)) { /* 1110xxxx */
   1284 		v = x & 0x0F;
   1285 		i = 2;
   1286 	}
   1287 	else if(x & 0xF0 && !(x & 0x08)) { /* 11110xxx */
   1288 		v = x & 0x07;
   1289 		i = 3;
   1290 	}
   1291 	else if(x & 0xF8 && !(x & 0x04)) { /* 111110xx */
   1292 		v = x & 0x03;
   1293 		i = 4;
   1294 	}
   1295 	else if(x & 0xFC && !(x & 0x02)) { /* 1111110x */
   1296 		v = x & 0x01;
   1297 		i = 5;
   1298 	}
   1299 	else {
   1300 		*val = 0xffffffff;
   1301 		return true;
   1302 	}
   1303 	for( ; i; i--) {
   1304 		if(!FLAC__bitreader_read_raw_uint32(br, &x, 8))
   1305 			return false;
   1306 		if(raw)
   1307 			raw[(*rawlen)++] = (FLAC__byte)x;
   1308 		if(!(x & 0x80) || (x & 0x40)) { /* 10xxxxxx */
   1309 			*val = 0xffffffff;
   1310 			return true;
   1311 		}
   1312 		v <<= 6;
   1313 		v |= (x & 0x3F);
   1314 	}
   1315 	*val = v;
   1316 	return true;
   1317 }
   1318 
   1319 /* on return, if *val == 0xffffffffffffffff then the utf-8 sequence was invalid, but the return value will be true */
   1320 FLAC__bool FLAC__bitreader_read_utf8_uint64(FLAC__BitReader *br, FLAC__uint64 *val, FLAC__byte *raw, unsigned *rawlen)
   1321 {
   1322 	FLAC__uint64 v = 0;
   1323 	FLAC__uint32 x;
   1324 	unsigned i;
   1325 
   1326 	if(!FLAC__bitreader_read_raw_uint32(br, &x, 8))
   1327 		return false;
   1328 	if(raw)
   1329 		raw[(*rawlen)++] = (FLAC__byte)x;
   1330 	if(!(x & 0x80)) { /* 0xxxxxxx */
   1331 		v = x;
   1332 		i = 0;
   1333 	}
   1334 	else if(x & 0xC0 && !(x & 0x20)) { /* 110xxxxx */
   1335 		v = x & 0x1F;
   1336 		i = 1;
   1337 	}
   1338 	else if(x & 0xE0 && !(x & 0x10)) { /* 1110xxxx */
   1339 		v = x & 0x0F;
   1340 		i = 2;
   1341 	}
   1342 	else if(x & 0xF0 && !(x & 0x08)) { /* 11110xxx */
   1343 		v = x & 0x07;
   1344 		i = 3;
   1345 	}
   1346 	else if(x & 0xF8 && !(x & 0x04)) { /* 111110xx */
   1347 		v = x & 0x03;
   1348 		i = 4;
   1349 	}
   1350 	else if(x & 0xFC && !(x & 0x02)) { /* 1111110x */
   1351 		v = x & 0x01;
   1352 		i = 5;
   1353 	}
   1354 	else if(x & 0xFE && !(x & 0x01)) { /* 11111110 */
   1355 		v = 0;
   1356 		i = 6;
   1357 	}
   1358 	else {
   1359 		*val = FLAC__U64L(0xffffffffffffffff);
   1360 		return true;
   1361 	}
   1362 	for( ; i; i--) {
   1363 		if(!FLAC__bitreader_read_raw_uint32(br, &x, 8))
   1364 			return false;
   1365 		if(raw)
   1366 			raw[(*rawlen)++] = (FLAC__byte)x;
   1367 		if(!(x & 0x80) || (x & 0x40)) { /* 10xxxxxx */
   1368 			*val = FLAC__U64L(0xffffffffffffffff);
   1369 			return true;
   1370 		}
   1371 		v <<= 6;
   1372 		v |= (x & 0x3F);
   1373 	}
   1374 	*val = v;
   1375 	return true;
   1376 }
   1377