1 /* 2 * Copyright (c) 2013 ARM Ltd 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. The name of the company may not be used to endorse or promote 14 * products derived from this software without specific prior written 15 * permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include "arm_asm.h" 30 31 #ifdef __ARMEB__ 32 #define S2LOMEM lsl 33 #define S2LOMEMEQ lsleq 34 #define S2HIMEM lsr 35 #define MSB 0x000000ff 36 #define LSB 0xff000000 37 #define BYTE0_OFFSET 24 38 #define BYTE1_OFFSET 16 39 #define BYTE2_OFFSET 8 40 #define BYTE3_OFFSET 0 41 #else /* not __ARMEB__ */ 42 #define S2LOMEM lsr 43 #define S2LOMEMEQ lsreq 44 #define S2HIMEM lsl 45 #define BYTE0_OFFSET 0 46 #define BYTE1_OFFSET 8 47 #define BYTE2_OFFSET 16 48 #define BYTE3_OFFSET 24 49 #define MSB 0xff000000 50 #define LSB 0x000000ff 51 #endif /* not __ARMEB__ */ 52 53 .syntax unified 54 55 #if defined (__thumb__) 56 .thumb 57 .thumb_func 58 #endif 59 .global strcmp 60 .type strcmp, %function 61 strcmp: 62 63 #if (defined (__thumb__) && !defined (__thumb2__)) 64 1: 65 ldrb r2, [r0] 66 ldrb r3, [r1] 67 adds r0, r0, #1 68 adds r1, r1, #1 69 cmp r2, #0 70 beq 2f 71 cmp r2, r3 72 beq 1b 73 2: 74 subs r0, r2, r3 75 bx lr 76 #elif (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED)) 77 1: 78 ldrb r2, [r0], #1 79 ldrb r3, [r1], #1 80 cmp r2, #1 81 it cs 82 cmpcs r2, r3 83 beq 1b 84 subs r0, r2, r3 85 RETURN 86 87 88 #elif (defined (_ISA_THUMB_2) || defined (_ISA_ARM_6)) 89 /* Use LDRD whenever possible. */ 90 91 /* The main thing to look out for when comparing large blocks is that 92 the loads do not cross a page boundary when loading past the index 93 of the byte with the first difference or the first string-terminator. 94 95 For example, if the strings are identical and the string-terminator 96 is at index k, byte by byte comparison will not load beyond address 97 s1+k and s2+k; word by word comparison may load up to 3 bytes beyond 98 k; double word - up to 7 bytes. If the load of these bytes crosses 99 a page boundary, it might cause a memory fault (if the page is not mapped) 100 that would not have happened in byte by byte comparison. 101 102 If an address is (double) word aligned, then a load of a (double) word 103 from that address will not cross a page boundary. 104 Therefore, the algorithm below considers word and double-word alignment 105 of strings separately. */ 106 107 /* High-level description of the algorithm. 108 109 * The fast path: if both strings are double-word aligned, 110 use LDRD to load two words from each string in every loop iteration. 111 * If the strings have the same offset from a word boundary, 112 use LDRB to load and compare byte by byte until 113 the first string is aligned to a word boundary (at most 3 bytes). 114 This is optimized for quick return on short unaligned strings. 115 * If the strings have the same offset from a double-word boundary, 116 use LDRD to load two words from each string in every loop iteration, as in the fast path. 117 * If the strings do not have the same offset from a double-word boundary, 118 load a word from the second string before the loop to initialize the queue. 119 Use LDRD to load two words from every string in every loop iteration. 120 Inside the loop, load the second word from the second string only after comparing 121 the first word, using the queued value, to guarantee safety across page boundaries. 122 * If the strings do not have the same offset from a word boundary, 123 use LDR and a shift queue. Order of loads and comparisons matters, 124 similarly to the previous case. 125 126 * Use UADD8 and SEL to compare words, and use REV and CLZ to compute the return value. 127 * The only difference between ARM and Thumb modes is the use of CBZ instruction. 128 * The only difference between big and little endian is the use of REV in little endian 129 to compute the return value, instead of MOV. 130 * No preload. [TODO.] 131 */ 132 133 .macro m_cbz reg label 134 #ifdef __thumb2__ 135 cbz \reg, \label 136 #else /* not defined __thumb2__ */ 137 cmp \reg, #0 138 beq \label 139 #endif /* not defined __thumb2__ */ 140 .endm /* m_cbz */ 141 142 .macro m_cbnz reg label 143 #ifdef __thumb2__ 144 cbnz \reg, \label 145 #else /* not defined __thumb2__ */ 146 cmp \reg, #0 147 bne \label 148 #endif /* not defined __thumb2__ */ 149 .endm /* m_cbnz */ 150 151 .macro init 152 /* Macro to save temporary registers and prepare magic values. */ 153 subs sp, sp, #16 154 strd r4, r5, [sp, #8] 155 strd r6, r7, [sp] 156 mvn r6, #0 /* all F */ 157 mov r7, #0 /* all 0 */ 158 .endm /* init */ 159 160 .macro magic_compare_and_branch w1 w2 label 161 /* Macro to compare registers w1 and w2 and conditionally branch to label. */ 162 cmp \w1, \w2 /* Are w1 and w2 the same? */ 163 magic_find_zero_bytes \w1 164 it eq 165 cmpeq ip, #0 /* Is there a zero byte in w1? */ 166 bne \label 167 .endm /* magic_compare_and_branch */ 168 169 .macro magic_find_zero_bytes w1 170 /* Macro to find all-zero bytes in w1, result is in ip. */ 171 #if (defined (__ARM_FEATURE_DSP)) 172 uadd8 ip, \w1, r6 173 sel ip, r7, r6 174 #else /* not defined (__ARM_FEATURE_DSP) */ 175 /* __ARM_FEATURE_DSP is not defined for some Cortex-M processors. 176 Coincidently, these processors only have Thumb-2 mode, where we can use the 177 the (large) magic constant available directly as an immediate in instructions. 178 Note that we cannot use the magic constant in ARM mode, where we need 179 to create the constant in a register. */ 180 sub ip, \w1, #0x01010101 181 bic ip, ip, \w1 182 and ip, ip, #0x80808080 183 #endif /* not defined (__ARM_FEATURE_DSP) */ 184 .endm /* magic_find_zero_bytes */ 185 186 .macro setup_return w1 w2 187 #ifdef __ARMEB__ 188 mov r1, \w1 189 mov r2, \w2 190 #else /* not __ARMEB__ */ 191 rev r1, \w1 192 rev r2, \w2 193 #endif /* not __ARMEB__ */ 194 .endm /* setup_return */ 195 196 /* 197 optpld r0, #0 198 optpld r1, #0 199 */ 200 201 /* Are both strings double-word aligned? */ 202 orr ip, r0, r1 203 tst ip, #7 204 bne do_align 205 206 /* Fast path. */ 207 init 208 209 doubleword_aligned: 210 211 /* Get here when the strings to compare are double-word aligned. */ 212 /* Compare two words in every iteration. */ 213 .p2align 2 214 2: 215 /* 216 optpld r0, #16 217 optpld r1, #16 218 */ 219 220 /* Load the next double-word from each string. */ 221 ldrd r2, r3, [r0], #8 222 ldrd r4, r5, [r1], #8 223 224 magic_compare_and_branch w1=r2, w2=r4, label=return_24 225 magic_compare_and_branch w1=r3, w2=r5, label=return_35 226 b 2b 227 228 do_align: 229 /* Is the first string word-aligned? */ 230 ands ip, r0, #3 231 beq word_aligned_r0 232 233 /* Fast compare byte by byte until the first string is word-aligned. */ 234 /* The offset of r0 from a word boundary is in ip. Thus, the number of bytes 235 to read until the next word boudnary is 4-ip. */ 236 bic r0, r0, #3 237 ldr r2, [r0], #4 238 lsls ip, ip, #31 239 beq byte2 240 bcs byte3 241 242 byte1: 243 ldrb ip, [r1], #1 244 uxtb r3, r2, ror #BYTE1_OFFSET 245 subs ip, r3, ip 246 bne fast_return 247 m_cbz reg=r3, label=fast_return 248 249 byte2: 250 ldrb ip, [r1], #1 251 uxtb r3, r2, ror #BYTE2_OFFSET 252 subs ip, r3, ip 253 bne fast_return 254 m_cbz reg=r3, label=fast_return 255 256 byte3: 257 ldrb ip, [r1], #1 258 uxtb r3, r2, ror #BYTE3_OFFSET 259 subs ip, r3, ip 260 bne fast_return 261 m_cbnz reg=r3, label=word_aligned_r0 262 263 fast_return: 264 mov r0, ip 265 bx lr 266 267 word_aligned_r0: 268 init 269 /* The first string is word-aligned. */ 270 /* Is the second string word-aligned? */ 271 ands ip, r1, #3 272 bne strcmp_unaligned 273 274 word_aligned: 275 /* The strings are word-aligned. */ 276 /* Is the first string double-word aligned? */ 277 tst r0, #4 278 beq doubleword_aligned_r0 279 280 /* If r0 is not double-word aligned yet, align it by loading 281 and comparing the next word from each string. */ 282 ldr r2, [r0], #4 283 ldr r4, [r1], #4 284 magic_compare_and_branch w1=r2 w2=r4 label=return_24 285 286 doubleword_aligned_r0: 287 /* Get here when r0 is double-word aligned. */ 288 /* Is r1 doubleword_aligned? */ 289 tst r1, #4 290 beq doubleword_aligned 291 292 /* Get here when the strings to compare are word-aligned, 293 r0 is double-word aligned, but r1 is not double-word aligned. */ 294 295 /* Initialize the queue. */ 296 ldr r5, [r1], #4 297 298 /* Compare two words in every iteration. */ 299 .p2align 2 300 3: 301 /* 302 optpld r0, #16 303 optpld r1, #16 304 */ 305 306 /* Load the next double-word from each string and compare. */ 307 ldrd r2, r3, [r0], #8 308 magic_compare_and_branch w1=r2 w2=r5 label=return_25 309 ldrd r4, r5, [r1], #8 310 magic_compare_and_branch w1=r3 w2=r4 label=return_34 311 b 3b 312 313 .macro miscmp_word offsetlo offsethi 314 /* Macro to compare misaligned strings. */ 315 /* r0, r1 are word-aligned, and at least one of the strings 316 is not double-word aligned. */ 317 /* Compare one word in every loop iteration. */ 318 /* OFFSETLO is the original bit-offset of r1 from a word-boundary, 319 OFFSETHI is 32 - OFFSETLO (i.e., offset from the next word). */ 320 321 /* Initialize the shift queue. */ 322 ldr r5, [r1], #4 323 324 /* Compare one word from each string in every loop iteration. */ 325 .p2align 2 326 7: 327 ldr r3, [r0], #4 328 S2LOMEM r5, r5, #\offsetlo 329 magic_find_zero_bytes w1=r3 330 cmp r7, ip, S2HIMEM #\offsetlo 331 and r2, r3, r6, S2LOMEM #\offsetlo 332 it eq 333 cmpeq r2, r5 334 bne return_25 335 ldr r5, [r1], #4 336 cmp ip, #0 337 eor r3, r2, r3 338 S2HIMEM r2, r5, #\offsethi 339 it eq 340 cmpeq r3, r2 341 bne return_32 342 b 7b 343 .endm /* miscmp_word */ 344 345 strcmp_unaligned: 346 /* r0 is word-aligned, r1 is at offset ip from a word. */ 347 /* Align r1 to the (previous) word-boundary. */ 348 bic r1, r1, #3 349 350 /* Unaligned comparison word by word using LDRs. */ 351 cmp ip, #2 352 beq miscmp_word_16 /* If ip == 2. */ 353 bge miscmp_word_24 /* If ip == 3. */ 354 miscmp_word offsetlo=8 offsethi=24 /* If ip == 1. */ 355 miscmp_word_16: miscmp_word offsetlo=16 offsethi=16 356 miscmp_word_24: miscmp_word offsetlo=24 offsethi=8 357 358 359 return_32: 360 setup_return w1=r3, w2=r2 361 b do_return 362 return_34: 363 setup_return w1=r3, w2=r4 364 b do_return 365 return_25: 366 setup_return w1=r2, w2=r5 367 b do_return 368 return_35: 369 setup_return w1=r3, w2=r5 370 b do_return 371 return_24: 372 setup_return w1=r2, w2=r4 373 374 do_return: 375 376 #ifdef __ARMEB__ 377 mov r0, ip 378 #else /* not __ARMEB__ */ 379 rev r0, ip 380 #endif /* not __ARMEB__ */ 381 382 /* Restore temporaries early, before computing the return value. */ 383 ldrd r6, r7, [sp] 384 ldrd r4, r5, [sp, #8] 385 adds sp, sp, #16 386 387 /* There is a zero or a different byte between r1 and r2. */ 388 /* r0 contains a mask of all-zero bytes in r1. */ 389 /* Using r0 and not ip here because cbz requires low register. */ 390 m_cbz reg=r0, label=compute_return_value 391 clz r0, r0 392 /* r0 contains the number of bits on the left of the first all-zero byte in r1. */ 393 rsb r0, r0, #24 394 /* Here, r0 contains the number of bits on the right of the first all-zero byte in r1. */ 395 lsr r1, r1, r0 396 lsr r2, r2, r0 397 398 compute_return_value: 399 movs r0, #1 400 cmp r1, r2 401 /* The return value is computed as follows. 402 If r1>r2 then (C==1 and Z==0) and LS doesn't hold and r0 is #1 at return. 403 If r1<r2 then (C==0 and Z==0) and we execute SBC with carry_in=0, 404 which means r0:=r0-r0-1 and r0 is #-1 at return. 405 If r1=r2 then (C==1 and Z==1) and we execute SBC with carry_in=1, 406 which means r0:=r0-r0 and r0 is #0 at return. 407 (C==0 and Z==1) cannot happen because the carry bit is "not borrow". */ 408 it ls 409 sbcls r0, r0, r0 410 bx lr 411 412 413 #else /* !(defined (_ISA_THUMB_2) || defined (_ISA_ARM_6) 414 defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || 415 (defined (__thumb__) && !defined (__thumb2__))) */ 416 417 /* Use LDR whenever possible. */ 418 419 #ifdef __thumb2__ 420 #define magic1(REG) 0x01010101 421 #define magic2(REG) 0x80808080 422 #else 423 #define magic1(REG) REG 424 #define magic2(REG) REG, lsl #7 425 #endif 426 427 optpld r0 428 optpld r1 429 eor r2, r0, r1 430 tst r2, #3 431 /* Strings not at same byte offset from a word boundary. */ 432 bne strcmp_unaligned 433 ands r2, r0, #3 434 bic r0, r0, #3 435 bic r1, r1, #3 436 ldr ip, [r0], #4 437 it eq 438 ldreq r3, [r1], #4 439 beq 1f 440 /* Although s1 and s2 have identical initial alignment, they are 441 not currently word aligned. Rather than comparing bytes, 442 make sure that any bytes fetched from before the addressed 443 bytes are forced to 0xff. Then they will always compare 444 equal. */ 445 eor r2, r2, #3 446 lsl r2, r2, #3 447 mvn r3, MSB 448 S2LOMEM r2, r3, r2 449 ldr r3, [r1], #4 450 orr ip, ip, r2 451 orr r3, r3, r2 452 1: 453 #ifndef __thumb2__ 454 /* Load the 'magic' constant 0x01010101. */ 455 str r4, [sp, #-4]! 456 mov r4, #1 457 orr r4, r4, r4, lsl #8 458 orr r4, r4, r4, lsl #16 459 #endif 460 .p2align 2 461 4: 462 optpld r0, #8 463 optpld r1, #8 464 sub r2, ip, magic1(r4) 465 cmp ip, r3 466 itttt eq 467 /* check for any zero bytes in first word */ 468 biceq r2, r2, ip 469 tsteq r2, magic2(r4) 470 ldreq ip, [r0], #4 471 ldreq r3, [r1], #4 472 beq 4b 473 2: 474 /* There's a zero or a different byte in the word */ 475 S2HIMEM r0, ip, #24 476 S2LOMEM ip, ip, #8 477 cmp r0, #1 478 it cs 479 cmpcs r0, r3, S2HIMEM #24 480 it eq 481 S2LOMEMEQ r3, r3, #8 482 beq 2b 483 /* On a big-endian machine, r0 contains the desired byte in bits 484 0-7; on a little-endian machine they are in bits 24-31. In 485 both cases the other bits in r0 are all zero. For r3 the 486 interesting byte is at the other end of the word, but the 487 other bits are not necessarily zero. We need a signed result 488 representing the differnece in the unsigned bytes, so for the 489 little-endian case we can't just shift the interesting bits 490 up. */ 491 #ifdef __ARMEB__ 492 sub r0, r0, r3, lsr #24 493 #else 494 and r3, r3, #255 495 #ifdef __thumb2__ 496 /* No RSB instruction in Thumb2 */ 497 lsr r0, r0, #24 498 sub r0, r0, r3 499 #else 500 rsb r0, r3, r0, lsr #24 501 #endif 502 #endif 503 #ifndef __thumb2__ 504 ldr r4, [sp], #4 505 #endif 506 RETURN 507 508 509 strcmp_unaligned: 510 511 #if 0 512 /* The assembly code below is based on the following alogrithm. */ 513 #ifdef __ARMEB__ 514 #define RSHIFT << 515 #define LSHIFT >> 516 #else 517 #define RSHIFT >> 518 #define LSHIFT << 519 #endif 520 521 #define body(shift) \ 522 mask = 0xffffffffU RSHIFT shift; \ 523 w1 = *wp1++; \ 524 w2 = *wp2++; \ 525 do \ 526 { \ 527 t1 = w1 & mask; \ 528 if (__builtin_expect(t1 != w2 RSHIFT shift, 0)) \ 529 { \ 530 w2 RSHIFT= shift; \ 531 break; \ 532 } \ 533 if (__builtin_expect(((w1 - b1) & ~w1) & (b1 << 7), 0)) \ 534 { \ 535 /* See comment in assembler below re syndrome on big-endian */\ 536 if ((((w1 - b1) & ~w1) & (b1 << 7)) & mask) \ 537 w2 RSHIFT= shift; \ 538 else \ 539 { \ 540 w2 = *wp2; \ 541 t1 = w1 RSHIFT (32 - shift); \ 542 w2 = (w2 LSHIFT (32 - shift)) RSHIFT (32 - shift); \ 543 } \ 544 break; \ 545 } \ 546 w2 = *wp2++; \ 547 t1 ^= w1; \ 548 if (__builtin_expect(t1 != w2 LSHIFT (32 - shift), 0)) \ 549 { \ 550 t1 = w1 >> (32 - shift); \ 551 w2 = (w2 << (32 - shift)) RSHIFT (32 - shift); \ 552 break; \ 553 } \ 554 w1 = *wp1++; \ 555 } while (1) 556 557 const unsigned* wp1; 558 const unsigned* wp2; 559 unsigned w1, w2; 560 unsigned mask; 561 unsigned shift; 562 unsigned b1 = 0x01010101; 563 char c1, c2; 564 unsigned t1; 565 566 while (((unsigned) s1) & 3) 567 { 568 c1 = *s1++; 569 c2 = *s2++; 570 if (c1 == 0 || c1 != c2) 571 return c1 - (int)c2; 572 } 573 wp1 = (unsigned*) (((unsigned)s1) & ~3); 574 wp2 = (unsigned*) (((unsigned)s2) & ~3); 575 t1 = ((unsigned) s2) & 3; 576 if (t1 == 1) 577 { 578 body(8); 579 } 580 else if (t1 == 2) 581 { 582 body(16); 583 } 584 else 585 { 586 body (24); 587 } 588 589 do 590 { 591 #ifdef __ARMEB__ 592 c1 = (char) t1 >> 24; 593 c2 = (char) w2 >> 24; 594 #else /* not __ARMEB__ */ 595 c1 = (char) t1; 596 c2 = (char) w2; 597 #endif /* not __ARMEB__ */ 598 t1 RSHIFT= 8; 599 w2 RSHIFT= 8; 600 } while (c1 != 0 && c1 == c2); 601 return c1 - c2; 602 #endif /* 0 */ 603 604 605 wp1 .req r0 606 wp2 .req r1 607 b1 .req r2 608 w1 .req r4 609 w2 .req r5 610 t1 .req ip 611 @ r3 is scratch 612 613 /* First of all, compare bytes until wp1(sp1) is word-aligned. */ 614 1: 615 tst wp1, #3 616 beq 2f 617 ldrb r2, [wp1], #1 618 ldrb r3, [wp2], #1 619 cmp r2, #1 620 it cs 621 cmpcs r2, r3 622 beq 1b 623 sub r0, r2, r3 624 RETURN 625 626 2: 627 str r5, [sp, #-4]! 628 str r4, [sp, #-4]! 629 //stmfd sp!, {r4, r5} 630 mov b1, #1 631 orr b1, b1, b1, lsl #8 632 orr b1, b1, b1, lsl #16 633 634 and t1, wp2, #3 635 bic wp2, wp2, #3 636 ldr w1, [wp1], #4 637 ldr w2, [wp2], #4 638 cmp t1, #2 639 beq 2f 640 bhi 3f 641 642 /* Critical inner Loop: Block with 3 bytes initial overlap */ 643 .p2align 2 644 1: 645 bic t1, w1, MSB 646 cmp t1, w2, S2LOMEM #8 647 sub r3, w1, b1 648 bic r3, r3, w1 649 bne 4f 650 ands r3, r3, b1, lsl #7 651 it eq 652 ldreq w2, [wp2], #4 653 bne 5f 654 eor t1, t1, w1 655 cmp t1, w2, S2HIMEM #24 656 bne 6f 657 ldr w1, [wp1], #4 658 b 1b 659 4: 660 S2LOMEM w2, w2, #8 661 b 8f 662 663 5: 664 #ifdef __ARMEB__ 665 /* The syndrome value may contain false ones if the string ends 666 with the bytes 0x01 0x00 */ 667 tst w1, #0xff000000 668 itt ne 669 tstne w1, #0x00ff0000 670 tstne w1, #0x0000ff00 671 beq 7f 672 #else 673 bics r3, r3, #0xff000000 674 bne 7f 675 #endif 676 ldrb w2, [wp2] 677 S2LOMEM t1, w1, #24 678 #ifdef __ARMEB__ 679 lsl w2, w2, #24 680 #endif 681 b 8f 682 683 6: 684 S2LOMEM t1, w1, #24 685 and w2, w2, LSB 686 b 8f 687 688 /* Critical inner Loop: Block with 2 bytes initial overlap */ 689 .p2align 2 690 2: 691 S2HIMEM t1, w1, #16 692 sub r3, w1, b1 693 S2LOMEM t1, t1, #16 694 bic r3, r3, w1 695 cmp t1, w2, S2LOMEM #16 696 bne 4f 697 ands r3, r3, b1, lsl #7 698 it eq 699 ldreq w2, [wp2], #4 700 bne 5f 701 eor t1, t1, w1 702 cmp t1, w2, S2HIMEM #16 703 bne 6f 704 ldr w1, [wp1], #4 705 b 2b 706 707 5: 708 #ifdef __ARMEB__ 709 /* The syndrome value may contain false ones if the string ends 710 with the bytes 0x01 0x00 */ 711 tst w1, #0xff000000 712 it ne 713 tstne w1, #0x00ff0000 714 beq 7f 715 #else 716 lsls r3, r3, #16 717 bne 7f 718 #endif 719 ldrh w2, [wp2] 720 S2LOMEM t1, w1, #16 721 #ifdef __ARMEB__ 722 lsl w2, w2, #16 723 #endif 724 b 8f 725 726 6: 727 S2HIMEM w2, w2, #16 728 S2LOMEM t1, w1, #16 729 4: 730 S2LOMEM w2, w2, #16 731 b 8f 732 733 /* Critical inner Loop: Block with 1 byte initial overlap */ 734 .p2align 2 735 3: 736 and t1, w1, LSB 737 cmp t1, w2, S2LOMEM #24 738 sub r3, w1, b1 739 bic r3, r3, w1 740 bne 4f 741 ands r3, r3, b1, lsl #7 742 it eq 743 ldreq w2, [wp2], #4 744 bne 5f 745 eor t1, t1, w1 746 cmp t1, w2, S2HIMEM #8 747 bne 6f 748 ldr w1, [wp1], #4 749 b 3b 750 4: 751 S2LOMEM w2, w2, #24 752 b 8f 753 5: 754 /* The syndrome value may contain false ones if the string ends 755 with the bytes 0x01 0x00 */ 756 tst w1, LSB 757 beq 7f 758 ldr w2, [wp2], #4 759 6: 760 S2LOMEM t1, w1, #8 761 bic w2, w2, MSB 762 b 8f 763 7: 764 mov r0, #0 765 //ldmfd sp!, {r4, r5} 766 ldr r4, [sp], #4 767 ldr r5, [sp], #4 768 RETURN 769 8: 770 and r2, t1, LSB 771 and r0, w2, LSB 772 cmp r0, #1 773 it cs 774 cmpcs r0, r2 775 itt eq 776 S2LOMEMEQ t1, t1, #8 777 S2LOMEMEQ w2, w2, #8 778 beq 8b 779 sub r0, r2, r0 780 //ldmfd sp!, {r4, r5} 781 ldr r4, [sp], #4 782 ldr r5, [sp], #4 783 RETURN 784 785 #endif /* !(defined (_ISA_THUMB_2) || defined (_ISA_ARM_6) 786 defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || 787 (defined (__thumb__) && !defined (__thumb2__))) */ 788