1 ; vim:filetype=nasm ts=8 2 3 ; libFLAC - Free Lossless Audio Codec library 4 ; Copyright (C) 2001,2002,2003,2004,2005,2006,2007 Josh Coalson 5 ; 6 ; Redistribution and use in source and binary forms, with or without 7 ; modification, are permitted provided that the following conditions 8 ; are met: 9 ; 10 ; - Redistributions of source code must retain the above copyright 11 ; notice, this list of conditions and the following disclaimer. 12 ; 13 ; - Redistributions in binary form must reproduce the above copyright 14 ; notice, this list of conditions and the following disclaimer in the 15 ; documentation and/or other materials provided with the distribution. 16 ; 17 ; - Neither the name of the Xiph.org Foundation nor the names of its 18 ; contributors may be used to endorse or promote products derived from 19 ; this software without specific prior written permission. 20 ; 21 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 ; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR 25 ; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 ; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 ; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 ; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 ; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 ; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 ; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 33 %include "nasm.h" 34 35 data_section 36 37 cextern FLAC__crc16_table ; unsigned FLAC__crc16_table[256]; 38 cextern bitreader_read_from_client_ ; FLAC__bool bitreader_read_from_client_(FLAC__BitReader *br); 39 40 cglobal FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap 41 42 code_section 43 44 45 ; ********************************************************************** 46 ; 47 ; void FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter) 48 ; 49 ; Some details like assertions and other checking is performed by the caller. 50 ALIGN 16 51 cident FLAC__bitreader_read_rice_signed_block_asm_ia32_bswap 52 53 ;ASSERT(0 != br); 54 ;ASSERT(0 != br->buffer); 55 ; WATCHOUT: code only works if sizeof(brword)==32; we can make things much faster with this assertion 56 ;ASSERT(FLAC__BITS_PER_WORD == 32); 57 ;ASSERT(parameter < 32); 58 ; the above two asserts also guarantee that the binary part never straddles more than 2 words, so we don't have to loop to read it 59 60 ;; peppered throughout the code at major checkpoints are keys like this as to where things are at that point in time 61 ;; [esp + 16] unsigned parameter 62 ;; [esp + 12] unsigned nvals 63 ;; [esp + 8] int vals[] 64 ;; [esp + 4] FLAC__BitReader *br 65 mov eax, [esp + 12] ; if(nvals == 0) 66 test eax, eax 67 ja .nvals_gt_0 68 mov eax, 1 ; return true; 69 ret 70 71 .nvals_gt_0: 72 push ebp 73 push ebx 74 push esi 75 push edi 76 sub esp, 4 77 ;; [esp + 36] unsigned parameter 78 ;; [esp + 32] unsigned nvals 79 ;; [esp + 28] int vals[] 80 ;; [esp + 24] FLAC__BitReader *br 81 ;; [esp] ucbits 82 mov ebp, [esp + 24] ; ebp <- br == br->buffer 83 mov esi, [ebp + 16] ; esi <- br->consumed_words (aka 'cwords' in the C version) 84 mov ecx, [ebp + 20] ; ecx <- br->consumed_bits (aka 'cbits' in the C version) 85 xor edi, edi ; edi <- 0 'uval' 86 ;; ecx cbits 87 ;; esi cwords 88 ;; edi uval 89 ;; ebp br 90 ;; [ebp] br->buffer 91 ;; [ebp + 8] br->words 92 ;; [ebp + 12] br->bytes 93 ;; [ebp + 16] br->consumed_words 94 ;; [ebp + 20] br->consumed_bits 95 ;; [ebp + 24] br->read_crc 96 ;; [ebp + 28] br->crc16_align 97 98 ; ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits; 99 mov eax, [ebp + 8] ; eax <- br->words 100 sub eax, esi ; eax <- br->words-cwords 101 shl eax, 2 ; eax <- (br->words-cwords)*FLAC__BYTES_PER_WORD 102 add eax, [ebp + 12] ; eax <- (br->words-cwords)*FLAC__BYTES_PER_WORD + br->bytes 103 shl eax, 3 ; eax <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 104 sub eax, ecx ; eax <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits 105 mov [esp], eax ; ucbits <- eax 106 107 ALIGN 16 108 .val_loop: ; while(1) { 109 110 ; 111 ; read unary part 112 ; 113 .unary_loop: ; while(1) { 114 ;; ecx cbits 115 ;; esi cwords 116 ;; edi uval 117 ;; ebp br 118 cmp esi, [ebp + 8] ; while(cwords < br->words) /* if we've not consumed up to a partial tail word... */ 119 jae near .c1_next1 120 .c1_loop: ; { 121 mov ebx, [ebp] 122 mov eax, [ebx + 4*esi] ; b = br->buffer[cwords] 123 mov edx, eax ; edx = br->buffer[cwords] (saved for later use) 124 shl eax, cl ; b = br->buffer[cwords] << cbits 125 test eax, eax ; (still have to test since cbits may be 0, thus ZF not updated for shl eax,0) 126 jz near .c1_next2 ; if(b) { 127 bsr ebx, eax 128 not ebx 129 and ebx, 31 ; ebx = 'i' = # of leading 0 bits in 'b' (eax) 130 add ecx, ebx ; cbits += i; 131 add edi, ebx ; uval += i; 132 add ecx, byte 1 ; cbits++; /* skip over stop bit */ 133 test ecx, ~31 134 jz near .break1 ; if(cbits >= FLAC__BITS_PER_WORD) { /* faster way of testing if(cbits == FLAC__BITS_PER_WORD) */ 135 ; crc16_update_word_(br, br->buffer[cwords]); 136 push edi ; [need more registers] 137 bswap edx ; edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier 138 mov ecx, [ebp + 28] ; ecx <- br->crc16_align 139 mov eax, [ebp + 24] ; ax <- br->read_crc (a.k.a. crc) 140 %ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE 141 mov edi, _FLAC__crc16_table 142 %else 143 mov edi, FLAC__crc16_table 144 %endif 145 ;; eax (ax) crc a.k.a. br->read_crc 146 ;; ebx (bl) intermediate result index into FLAC__crc16_table[] 147 ;; ecx br->crc16_align 148 ;; edx byteswapped brword to CRC 149 ;; esi cwords 150 ;; edi unsigned FLAC__crc16_table[] 151 ;; ebp br 152 test ecx, ecx ; switch(br->crc16_align) ... 153 jnz .c0b4 ; [br->crc16_align is 0 the vast majority of the time so we optimize the common case] 154 .c0b0: xor dl, ah ; dl <- (crc>>8)^(word>>24) 155 movzx ebx, dl 156 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word>>24)] 157 shl eax, 8 ; ax <- (crc<<8) 158 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)] 159 .c0b1: xor dh, ah ; dh <- (crc>>8)^((word>>16)&0xff)) 160 movzx ebx, dh 161 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))] 162 shl eax, 8 ; ax <- (crc<<8) 163 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))] 164 shr edx, 16 165 .c0b2: xor dl, ah ; dl <- (crc>>8)^((word>>8)&0xff)) 166 movzx ebx, dl 167 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))] 168 shl eax, 8 ; ax <- (crc<<8) 169 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))] 170 .c0b3: xor dh, ah ; dh <- (crc>>8)^(word&0xff) 171 movzx ebx, dh 172 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)] 173 shl eax, 8 ; ax <- (crc<<8) 174 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)] 175 movzx eax, ax 176 mov [ebp + 24], eax ; br->read_crc <- crc 177 pop edi 178 179 add esi, byte 1 ; cwords++; 180 xor ecx, ecx ; cbits = 0; 181 ; } 182 jmp near .break1 ; goto break1; 183 ;; this section relocated out of the way for performance 184 .c0b4: 185 mov [ebp + 28], dword 0 ; br->crc16_align <- 0 186 cmp ecx, 8 187 je .c0b1 188 shr edx, 16 189 cmp ecx, 16 190 je .c0b2 191 jmp .c0b3 192 193 ;; this section relocated out of the way for performance 194 .c1b4: 195 mov [ebp + 28], dword 0 ; br->crc16_align <- 0 196 cmp ecx, 8 197 je .c1b1 198 shr edx, 16 199 cmp ecx, 16 200 je .c1b2 201 jmp .c1b3 202 203 .c1_next2: ; } else { 204 ;; ecx cbits 205 ;; edx current brword 'b' 206 ;; esi cwords 207 ;; edi uval 208 ;; ebp br 209 add edi, 32 210 sub edi, ecx ; uval += FLAC__BITS_PER_WORD - cbits; 211 ; crc16_update_word_(br, br->buffer[cwords]); 212 push edi ; [need more registers] 213 bswap edx ; edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier 214 mov ecx, [ebp + 28] ; ecx <- br->crc16_align 215 mov eax, [ebp + 24] ; ax <- br->read_crc (a.k.a. crc) 216 %ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE 217 mov edi, _FLAC__crc16_table 218 %else 219 mov edi, FLAC__crc16_table 220 %endif 221 ;; eax (ax) crc a.k.a. br->read_crc 222 ;; ebx (bl) intermediate result index into FLAC__crc16_table[] 223 ;; ecx br->crc16_align 224 ;; edx byteswapped brword to CRC 225 ;; esi cwords 226 ;; edi unsigned FLAC__crc16_table[] 227 ;; ebp br 228 test ecx, ecx ; switch(br->crc16_align) ... 229 jnz .c1b4 ; [br->crc16_align is 0 the vast majority of the time so we optimize the common case] 230 .c1b0: xor dl, ah ; dl <- (crc>>8)^(word>>24) 231 movzx ebx, dl 232 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word>>24)] 233 shl eax, 8 ; ax <- (crc<<8) 234 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)] 235 .c1b1: xor dh, ah ; dh <- (crc>>8)^((word>>16)&0xff)) 236 movzx ebx, dh 237 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))] 238 shl eax, 8 ; ax <- (crc<<8) 239 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))] 240 shr edx, 16 241 .c1b2: xor dl, ah ; dl <- (crc>>8)^((word>>8)&0xff)) 242 movzx ebx, dl 243 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))] 244 shl eax, 8 ; ax <- (crc<<8) 245 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))] 246 .c1b3: xor dh, ah ; dh <- (crc>>8)^(word&0xff) 247 movzx ebx, dh 248 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)] 249 shl eax, 8 ; ax <- (crc<<8) 250 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)] 251 movzx eax, ax 252 mov [ebp + 24], eax ; br->read_crc <- crc 253 pop edi 254 255 add esi, byte 1 ; cwords++; 256 xor ecx, ecx ; cbits = 0; 257 ; /* didn't find stop bit yet, have to keep going... */ 258 ; } 259 260 cmp esi, [ebp + 8] ; } while(cwords < br->words) /* if we've not consumed up to a partial tail word... */ 261 jb near .c1_loop 262 263 .c1_next1: 264 ; at this point we've eaten up all the whole words; have to try 265 ; reading through any tail bytes before calling the read callback. 266 ; this is a repeat of the above logic adjusted for the fact we 267 ; don't have a whole word. note though if the client is feeding 268 ; us data a byte at a time (unlikely), br->consumed_bits may not 269 ; be zero. 270 ;; ecx cbits 271 ;; esi cwords 272 ;; edi uval 273 ;; ebp br 274 mov edx, [ebp + 12] ; edx <- br->bytes 275 test edx, edx 276 jz .read1 ; if(br->bytes) { [NOTE: this case is rare so it doesn't have to be all that fast ] 277 mov ebx, [ebp] 278 shl edx, 3 ; edx <- const unsigned end = br->bytes * 8; 279 mov eax, [ebx + 4*esi] ; b = br->buffer[cwords] 280 xchg edx, ecx ; [edx <- cbits , ecx <- end] 281 mov ebx, 0xffffffff ; ebx <- FLAC__WORD_ALL_ONES 282 shr ebx, cl ; ebx <- FLAC__WORD_ALL_ONES >> end 283 not ebx ; ebx <- ~(FLAC__WORD_ALL_ONES >> end) 284 xchg edx, ecx ; [edx <- end , ecx <- cbits] 285 and eax, ebx ; b = (br->buffer[cwords] & ~(FLAC__WORD_ALL_ONES >> end)); 286 shl eax, cl ; b = (br->buffer[cwords] & ~(FLAC__WORD_ALL_ONES >> end)) << cbits; 287 test eax, eax ; (still have to test since cbits may be 0, thus ZF not updated for shl eax,0) 288 jz .c1_next3 ; if(b) { 289 bsr ebx, eax 290 not ebx 291 and ebx, 31 ; ebx = 'i' = # of leading 0 bits in 'b' (eax) 292 add ecx, ebx ; cbits += i; 293 add edi, ebx ; uval += i; 294 add ecx, byte 1 ; cbits++; /* skip over stop bit */ 295 jmp short .break1 ; goto break1; 296 .c1_next3: ; } else { 297 sub edi, ecx 298 add edi, edx ; uval += end - cbits; 299 add ecx, edx ; cbits += end 300 ; /* didn't find stop bit yet, have to keep going... */ 301 ; } 302 ; } 303 .read1: 304 ; flush registers and read; bitreader_read_from_client_() does 305 ; not touch br->consumed_bits at all but we still need to set 306 ; it in case it fails and we have to return false. 307 ;; ecx cbits 308 ;; esi cwords 309 ;; edi uval 310 ;; ebp br 311 mov [ebp + 16], esi ; br->consumed_words = cwords; 312 mov [ebp + 20], ecx ; br->consumed_bits = cbits; 313 push ecx ; /* save */ 314 push ebp ; /* push br argument */ 315 %ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE 316 call _bitreader_read_from_client_ 317 %else 318 call bitreader_read_from_client_ 319 %endif 320 pop edx ; /* discard, unused */ 321 pop ecx ; /* restore */ 322 mov esi, [ebp + 16] ; cwords = br->consumed_words; 323 ; ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits; 324 mov ebx, [ebp + 8] ; ebx <- br->words 325 sub ebx, esi ; ebx <- br->words-cwords 326 shl ebx, 2 ; ebx <- (br->words-cwords)*FLAC__BYTES_PER_WORD 327 add ebx, [ebp + 12] ; ebx <- (br->words-cwords)*FLAC__BYTES_PER_WORD + br->bytes 328 shl ebx, 3 ; ebx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 329 sub ebx, ecx ; ebx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits 330 add ebx, edi ; ebx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits + uval 331 ; + uval to offset our count by the # of unary bits already 332 ; consumed before the read, because we will add these back 333 ; in all at once at break1 334 mov [esp], ebx ; ucbits <- ebx 335 test eax, eax ; if(!bitreader_read_from_client_(br)) 336 jnz near .unary_loop 337 jmp .end ; return false; /* eax (the return value) is already 0 */ 338 ; } /* end while(1) unary part */ 339 340 ALIGN 16 341 .break1: 342 ;; ecx cbits 343 ;; esi cwords 344 ;; edi uval 345 ;; ebp br 346 ;; [esp] ucbits 347 sub [esp], edi ; ucbits -= uval; 348 sub dword [esp], byte 1 ; ucbits--; /* account for stop bit */ 349 350 ; 351 ; read binary part 352 ; 353 mov ebx, [esp + 36] ; ebx <- parameter 354 test ebx, ebx ; if(parameter) { 355 jz near .break2 356 .read2: 357 cmp [esp], ebx ; while(ucbits < parameter) { 358 jae .c2_next1 359 ; flush registers and read; bitreader_read_from_client_() does 360 ; not touch br->consumed_bits at all but we still need to set 361 ; it in case it fails and we have to return false. 362 mov [ebp + 16], esi ; br->consumed_words = cwords; 363 mov [ebp + 20], ecx ; br->consumed_bits = cbits; 364 push ecx ; /* save */ 365 push ebp ; /* push br argument */ 366 %ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE 367 call _bitreader_read_from_client_ 368 %else 369 call bitreader_read_from_client_ 370 %endif 371 pop edx ; /* discard, unused */ 372 pop ecx ; /* restore */ 373 mov esi, [ebp + 16] ; cwords = br->consumed_words; 374 ; ucbits = (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits; 375 mov edx, [ebp + 8] ; edx <- br->words 376 sub edx, esi ; edx <- br->words-cwords 377 shl edx, 2 ; edx <- (br->words-cwords)*FLAC__BYTES_PER_WORD 378 add edx, [ebp + 12] ; edx <- (br->words-cwords)*FLAC__BYTES_PER_WORD + br->bytes 379 shl edx, 3 ; edx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 380 sub edx, ecx ; edx <- (br->words-cwords)*FLAC__BITS_PER_WORD + br->bytes*8 - cbits 381 mov [esp], edx ; ucbits <- edx 382 test eax, eax ; if(!bitreader_read_from_client_(br)) 383 jnz .read2 384 jmp .end ; return false; /* eax (the return value) is already 0 */ 385 ; } 386 .c2_next1: 387 ;; ebx parameter 388 ;; ecx cbits 389 ;; esi cwords 390 ;; edi uval 391 ;; ebp br 392 ;; [esp] ucbits 393 cmp esi, [ebp + 8] ; if(cwords < br->words) { /* if we've not consumed up to a partial tail word... */ 394 jae near .c2_next2 395 test ecx, ecx ; if(cbits) { 396 jz near .c2_next3 ; /* this also works when consumed_bits==0, it's just a little slower than necessary for that case */ 397 mov eax, 32 398 mov edx, [ebp] 399 sub eax, ecx ; const unsigned n = FLAC__BITS_PER_WORD - cbits; 400 mov edx, [edx + 4*esi] ; const brword word = br->buffer[cwords]; 401 cmp ebx, eax ; if(parameter < n) { 402 jae .c2_next4 403 ; uval <<= parameter; 404 ; uval |= (word & (FLAC__WORD_ALL_ONES >> cbits)) >> (n-parameter); 405 shl edx, cl 406 xchg ebx, ecx 407 shld edi, edx, cl 408 add ebx, ecx ; cbits += parameter; 409 xchg ebx, ecx ; ebx <- parameter, ecx <- cbits 410 jmp .break2 ; goto break2; 411 ; } 412 .c2_next4: 413 ; uval <<= n; 414 ; uval |= word & (FLAC__WORD_ALL_ONES >> cbits); 415 %if 1 416 rol edx, cl ; @@@@@@OPT: may be faster to use rol to save edx so we can restore it for CRC'ing 417 ; @@@@@@OPT: or put parameter in ch instead and free up ebx completely again 418 %else 419 shl edx, cl 420 %endif 421 xchg eax, ecx 422 shld edi, edx, cl 423 xchg eax, ecx 424 %if 1 425 ror edx, cl ; restored. 426 %else 427 mov edx, [ebp] 428 mov edx, [edx + 4*esi] 429 %endif 430 ; crc16_update_word_(br, br->buffer[cwords]); 431 push edi ; [need more registers] 432 push ebx ; [need more registers] 433 push eax ; [need more registers] 434 bswap edx ; edx = br->buffer[cwords] swapped; now we can CRC the bytes from LSByte to MSByte which makes things much easier 435 mov ecx, [ebp + 28] ; ecx <- br->crc16_align 436 mov eax, [ebp + 24] ; ax <- br->read_crc (a.k.a. crc) 437 %ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE 438 mov edi, _FLAC__crc16_table 439 %else 440 mov edi, FLAC__crc16_table 441 %endif 442 ;; eax (ax) crc a.k.a. br->read_crc 443 ;; ebx (bl) intermediate result index into FLAC__crc16_table[] 444 ;; ecx br->crc16_align 445 ;; edx byteswapped brword to CRC 446 ;; esi cwords 447 ;; edi unsigned FLAC__crc16_table[] 448 ;; ebp br 449 test ecx, ecx ; switch(br->crc16_align) ... 450 jnz .c2b4 ; [br->crc16_align is 0 the vast majority of the time so we optimize the common case] 451 .c2b0: xor dl, ah ; dl <- (crc>>8)^(word>>24) 452 movzx ebx, dl 453 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word>>24)] 454 shl eax, 8 ; ax <- (crc<<8) 455 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word>>24)] 456 .c2b1: xor dh, ah ; dh <- (crc>>8)^((word>>16)&0xff)) 457 movzx ebx, dh 458 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))] 459 shl eax, 8 ; ax <- (crc<<8) 460 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>16)&0xff))] 461 shr edx, 16 462 .c2b2: xor dl, ah ; dl <- (crc>>8)^((word>>8)&0xff)) 463 movzx ebx, dl 464 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))] 465 shl eax, 8 ; ax <- (crc<<8) 466 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^((word>>8)&0xff))] 467 .c2b3: xor dh, ah ; dh <- (crc>>8)^(word&0xff) 468 movzx ebx, dh 469 mov ecx, [ebx*4 + edi] ; cx <- FLAC__crc16_table[(crc>>8)^(word&0xff)] 470 shl eax, 8 ; ax <- (crc<<8) 471 xor eax, ecx ; crc <- ax <- (crc<<8) ^ FLAC__crc16_table[(crc>>8)^(word&0xff)] 472 movzx eax, ax 473 mov [ebp + 24], eax ; br->read_crc <- crc 474 pop eax 475 pop ebx 476 pop edi 477 add esi, byte 1 ; cwords++; 478 mov ecx, ebx 479 sub ecx, eax ; cbits = parameter - n; 480 jz .break2 ; if(cbits) { /* parameter > n, i.e. if there are still bits left to read, there have to be less than 32 so they will all be in the next word */ 481 ; uval <<= cbits; 482 ; uval |= (br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits)); 483 mov eax, [ebp] 484 mov eax, [eax + 4*esi] 485 shld edi, eax, cl 486 ; } 487 jmp .break2 ; goto break2; 488 489 ;; this section relocated out of the way for performance 490 .c2b4: 491 mov [ebp + 28], dword 0 ; br->crc16_align <- 0 492 cmp ecx, 8 493 je .c2b1 494 shr edx, 16 495 cmp ecx, 16 496 je .c2b2 497 jmp .c2b3 498 499 .c2_next3: ; } else { 500 mov ecx, ebx ; cbits = parameter; 501 ; uval <<= cbits; 502 ; uval |= (br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits)); 503 mov eax, [ebp] 504 mov eax, [eax + 4*esi] 505 shld edi, eax, cl 506 jmp .break2 ; goto break2; 507 ; } 508 .c2_next2: ; } else { 509 ; in this case we're starting our read at a partial tail word; 510 ; the reader has guaranteed that we have at least 'parameter' 511 ; bits available to read, which makes this case simpler. 512 ; uval <<= parameter; 513 ; if(cbits) { 514 ; /* this also works when consumed_bits==0, it's just a little slower than necessary for that case */ 515 ; uval |= (br->buffer[cwords] & (FLAC__WORD_ALL_ONES >> cbits)) >> (FLAC__BITS_PER_WORD-cbits-parameter); 516 ; cbits += parameter; 517 ; goto break2; 518 ; } else { 519 ; cbits = parameter; 520 ; uval |= br->buffer[cwords] >> (FLAC__BITS_PER_WORD-cbits); 521 ; goto break2; 522 ; } 523 ; the above is much shorter in assembly: 524 mov eax, [ebp] 525 mov eax, [eax + 4*esi] ; eax <- br->buffer[cwords] 526 shl eax, cl ; eax <- br->buffer[cwords] << cbits 527 add ecx, ebx ; cbits += parameter 528 xchg ebx, ecx ; ebx <- cbits, ecx <- parameter 529 shld edi, eax, cl ; uval <<= parameter <<< 'parameter' bits of tail word 530 xchg ebx, ecx ; ebx <- parameter, ecx <- cbits 531 ; } 532 ; } 533 .break2: 534 sub [esp], ebx ; ucbits -= parameter; 535 536 ; 537 ; compose the value 538 ; 539 mov ebx, [esp + 28] ; ebx <- vals 540 mov edx, edi ; edx <- uval 541 and edi, 1 ; edi <- uval & 1 542 shr edx, 1 ; edx <- uval >> 1 543 neg edi ; edi <- -(int)(uval & 1) 544 xor edx, edi ; edx <- (uval >> 1 ^ -(int)(uval & 1)) 545 mov [ebx], edx ; *vals <- edx 546 sub dword [esp + 32], byte 1 ; --nvals; 547 jz .finished ; if(nvals == 0) /* jump to finish */ 548 xor edi, edi ; uval = 0; 549 add dword [esp + 28], 4 ; ++vals 550 jmp .val_loop ; } 551 552 .finished: 553 mov [ebp + 16], esi ; br->consumed_words = cwords; 554 mov [ebp + 20], ecx ; br->consumed_bits = cbits; 555 mov eax, 1 556 .end: 557 add esp, 4 558 pop edi 559 pop esi 560 pop ebx 561 pop ebp 562 ret 563 564 end 565 566 %ifdef OBJ_FORMAT_elf 567 section .note.GNU-stack noalloc 568 %endif 569