1 /* 2 * Copyright (c) 2013 ARM Ltd 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. The name of the company may not be used to endorse or promote 14 * products derived from this software without specific prior written 15 * permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <private/bionic_asm.h> 30 31 #ifdef __ARMEB__ 32 #define S2LOMEM lsl 33 #define S2LOMEMEQ lsleq 34 #define S2HIMEM lsr 35 #define MSB 0x000000ff 36 #define LSB 0xff000000 37 #define BYTE0_OFFSET 24 38 #define BYTE1_OFFSET 16 39 #define BYTE2_OFFSET 8 40 #define BYTE3_OFFSET 0 41 #else /* not __ARMEB__ */ 42 #define S2LOMEM lsr 43 #define S2LOMEMEQ lsreq 44 #define S2HIMEM lsl 45 #define BYTE0_OFFSET 0 46 #define BYTE1_OFFSET 8 47 #define BYTE2_OFFSET 16 48 #define BYTE3_OFFSET 24 49 #define MSB 0xff000000 50 #define LSB 0x000000ff 51 #endif /* not __ARMEB__ */ 52 53 .syntax unified 54 55 #if defined (__thumb__) 56 .thumb 57 .thumb_func 58 #endif 59 60 ENTRY(strcmp) 61 /* Use LDRD whenever possible. */ 62 63 /* The main thing to look out for when comparing large blocks is that 64 the loads do not cross a page boundary when loading past the index 65 of the byte with the first difference or the first string-terminator. 66 67 For example, if the strings are identical and the string-terminator 68 is at index k, byte by byte comparison will not load beyond address 69 s1+k and s2+k; word by word comparison may load up to 3 bytes beyond 70 k; double word - up to 7 bytes. If the load of these bytes crosses 71 a page boundary, it might cause a memory fault (if the page is not mapped) 72 that would not have happened in byte by byte comparison. 73 74 If an address is (double) word aligned, then a load of a (double) word 75 from that address will not cross a page boundary. 76 Therefore, the algorithm below considers word and double-word alignment 77 of strings separately. */ 78 79 /* High-level description of the algorithm. 80 81 * The fast path: if both strings are double-word aligned, 82 use LDRD to load two words from each string in every loop iteration. 83 * If the strings have the same offset from a word boundary, 84 use LDRB to load and compare byte by byte until 85 the first string is aligned to a word boundary (at most 3 bytes). 86 This is optimized for quick return on short unaligned strings. 87 * If the strings have the same offset from a double-word boundary, 88 use LDRD to load two words from each string in every loop iteration, as in the fast path. 89 * If the strings do not have the same offset from a double-word boundary, 90 load a word from the second string before the loop to initialize the queue. 91 Use LDRD to load two words from every string in every loop iteration. 92 Inside the loop, load the second word from the second string only after comparing 93 the first word, using the queued value, to guarantee safety across page boundaries. 94 * If the strings do not have the same offset from a word boundary, 95 use LDR and a shift queue. Order of loads and comparisons matters, 96 similarly to the previous case. 97 98 * Use UADD8 and SEL to compare words, and use REV and CLZ to compute the return value. 99 * The only difference between ARM and Thumb modes is the use of CBZ instruction. 100 * The only difference between big and little endian is the use of REV in little endian 101 to compute the return value, instead of MOV. 102 */ 103 104 .macro m_cbz reg label 105 #ifdef __thumb2__ 106 cbz \reg, \label 107 #else /* not defined __thumb2__ */ 108 cmp \reg, #0 109 beq \label 110 #endif /* not defined __thumb2__ */ 111 .endm /* m_cbz */ 112 113 .macro m_cbnz reg label 114 #ifdef __thumb2__ 115 cbnz \reg, \label 116 #else /* not defined __thumb2__ */ 117 cmp \reg, #0 118 bne \label 119 #endif /* not defined __thumb2__ */ 120 .endm /* m_cbnz */ 121 122 .macro init 123 /* Macro to save temporary registers and prepare magic values. */ 124 subs sp, sp, #16 125 .cfi_def_cfa_offset 16 126 strd r4, r5, [sp, #8] 127 .cfi_rel_offset r4, 0 128 .cfi_rel_offset r5, 4 129 strd r6, r7, [sp] 130 .cfi_rel_offset r6, 8 131 .cfi_rel_offset r7, 12 132 mvn r6, #0 /* all F */ 133 mov r7, #0 /* all 0 */ 134 .endm /* init */ 135 136 .macro magic_compare_and_branch w1 w2 label 137 /* Macro to compare registers w1 and w2 and conditionally branch to label. */ 138 cmp \w1, \w2 /* Are w1 and w2 the same? */ 139 magic_find_zero_bytes \w1 140 it eq 141 cmpeq ip, #0 /* Is there a zero byte in w1? */ 142 bne \label 143 .endm /* magic_compare_and_branch */ 144 145 .macro magic_find_zero_bytes w1 146 /* Macro to find all-zero bytes in w1, result is in ip. */ 147 uadd8 ip, \w1, r6 148 sel ip, r7, r6 149 .endm /* magic_find_zero_bytes */ 150 151 .macro setup_return w1 w2 152 #ifdef __ARMEB__ 153 mov r1, \w1 154 mov r2, \w2 155 #else /* not __ARMEB__ */ 156 rev r1, \w1 157 rev r2, \w2 158 #endif /* not __ARMEB__ */ 159 .endm /* setup_return */ 160 161 pld [r0, #0] 162 pld [r1, #0] 163 164 /* Are both strings double-word aligned? */ 165 orr ip, r0, r1 166 tst ip, #7 167 bne .L_do_align 168 169 /* Fast path. */ 170 init 171 172 .L_doubleword_aligned: 173 174 /* Get here when the strings to compare are double-word aligned. */ 175 /* Compare two words in every iteration. */ 176 .p2align 2 177 2: 178 pld [r0, #16] 179 pld [r1, #16] 180 181 /* Load the next double-word from each string. */ 182 ldrd r2, r3, [r0], #8 183 ldrd r4, r5, [r1], #8 184 185 magic_compare_and_branch w1=r2, w2=r4, label=.L_return_24 186 magic_compare_and_branch w1=r3, w2=r5, label=.L_return_35 187 b 2b 188 189 .L_do_align: 190 /* Is the first string word-aligned? */ 191 ands ip, r0, #3 192 beq .L_word_aligned_r0 193 194 /* Fast compare byte by byte until the first string is word-aligned. */ 195 /* The offset of r0 from a word boundary is in ip. Thus, the number of bytes 196 to read until the next word boundary is 4-ip. */ 197 bic r0, r0, #3 198 ldr r2, [r0], #4 199 lsls ip, ip, #31 200 beq .L_byte2 201 bcs .L_byte3 202 203 .L_byte1: 204 ldrb ip, [r1], #1 205 uxtb r3, r2, ror #BYTE1_OFFSET 206 subs ip, r3, ip 207 bne .L_fast_return 208 m_cbz reg=r3, label=.L_fast_return 209 210 .L_byte2: 211 ldrb ip, [r1], #1 212 uxtb r3, r2, ror #BYTE2_OFFSET 213 subs ip, r3, ip 214 bne .L_fast_return 215 m_cbz reg=r3, label=.L_fast_return 216 217 .L_byte3: 218 ldrb ip, [r1], #1 219 uxtb r3, r2, ror #BYTE3_OFFSET 220 subs ip, r3, ip 221 bne .L_fast_return 222 m_cbnz reg=r3, label=.L_word_aligned_r0 223 224 .L_fast_return: 225 mov r0, ip 226 bx lr 227 228 .L_word_aligned_r0: 229 init 230 /* The first string is word-aligned. */ 231 /* Is the second string word-aligned? */ 232 ands ip, r1, #3 233 bne .L_strcmp_unaligned 234 235 .L_word_aligned: 236 /* The strings are word-aligned. */ 237 /* Is the first string double-word aligned? */ 238 tst r0, #4 239 beq .L_doubleword_aligned_r0 240 241 /* If r0 is not double-word aligned yet, align it by loading 242 and comparing the next word from each string. */ 243 ldr r2, [r0], #4 244 ldr r4, [r1], #4 245 magic_compare_and_branch w1=r2 w2=r4 label=.L_return_24 246 247 .L_doubleword_aligned_r0: 248 /* Get here when r0 is double-word aligned. */ 249 /* Is r1 doubleword_aligned? */ 250 tst r1, #4 251 beq .L_doubleword_aligned 252 253 /* Get here when the strings to compare are word-aligned, 254 r0 is double-word aligned, but r1 is not double-word aligned. */ 255 256 /* Initialize the queue. */ 257 ldr r5, [r1], #4 258 259 /* Compare two words in every iteration. */ 260 .p2align 2 261 3: 262 pld [r0, #16] 263 pld [r1, #16] 264 265 /* Load the next double-word from each string and compare. */ 266 ldrd r2, r3, [r0], #8 267 magic_compare_and_branch w1=r2 w2=r5 label=.L_return_25 268 ldrd r4, r5, [r1], #8 269 magic_compare_and_branch w1=r3 w2=r4 label=.L_return_34 270 b 3b 271 272 .macro miscmp_word offsetlo offsethi 273 /* Macro to compare misaligned strings. */ 274 /* r0, r1 are word-aligned, and at least one of the strings 275 is not double-word aligned. */ 276 /* Compare one word in every loop iteration. */ 277 /* OFFSETLO is the original bit-offset of r1 from a word-boundary, 278 OFFSETHI is 32 - OFFSETLO (i.e., offset from the next word). */ 279 280 /* Initialize the shift queue. */ 281 ldr r5, [r1], #4 282 283 /* Compare one word from each string in every loop iteration. */ 284 .p2align 2 285 7: 286 ldr r3, [r0], #4 287 S2LOMEM r5, r5, #\offsetlo 288 magic_find_zero_bytes w1=r3 289 cmp r7, ip, S2HIMEM #\offsetlo 290 and r2, r3, r6, S2LOMEM #\offsetlo 291 it eq 292 cmpeq r2, r5 293 bne .L_return_25 294 ldr r5, [r1], #4 295 cmp ip, #0 296 eor r3, r2, r3 297 S2HIMEM r2, r5, #\offsethi 298 it eq 299 cmpeq r3, r2 300 bne .L_return_32 301 b 7b 302 .endm /* miscmp_word */ 303 304 .L_return_32: 305 setup_return w1=r3, w2=r2 306 b .L_do_return 307 .L_return_34: 308 setup_return w1=r3, w2=r4 309 b .L_do_return 310 .L_return_25: 311 setup_return w1=r2, w2=r5 312 b .L_do_return 313 .L_return_35: 314 setup_return w1=r3, w2=r5 315 b .L_do_return 316 .L_return_24: 317 setup_return w1=r2, w2=r4 318 319 .L_do_return: 320 321 #ifdef __ARMEB__ 322 mov r0, ip 323 #else /* not __ARMEB__ */ 324 rev r0, ip 325 #endif /* not __ARMEB__ */ 326 327 /* Restore temporaries early, before computing the return value. */ 328 ldrd r6, r7, [sp] 329 ldrd r4, r5, [sp, #8] 330 adds sp, sp, #16 331 .cfi_def_cfa_offset 0 332 .cfi_restore r4 333 .cfi_restore r5 334 .cfi_restore r6 335 .cfi_restore r7 336 337 /* There is a zero or a different byte between r1 and r2. */ 338 /* r0 contains a mask of all-zero bytes in r1. */ 339 /* Using r0 and not ip here because cbz requires low register. */ 340 m_cbz reg=r0, label=.L_compute_return_value 341 clz r0, r0 342 /* r0 contains the number of bits on the left of the first all-zero byte in r1. */ 343 rsb r0, r0, #24 344 /* Here, r0 contains the number of bits on the right of the first all-zero byte in r1. */ 345 lsr r1, r1, r0 346 lsr r2, r2, r0 347 348 .L_compute_return_value: 349 movs r0, #1 350 cmp r1, r2 351 /* The return value is computed as follows. 352 If r1>r2 then (C==1 and Z==0) and LS doesn't hold and r0 is #1 at return. 353 If r1<r2 then (C==0 and Z==0) and we execute SBC with carry_in=0, 354 which means r0:=r0-r0-1 and r0 is #-1 at return. 355 If r1=r2 then (C==1 and Z==1) and we execute SBC with carry_in=1, 356 which means r0:=r0-r0 and r0 is #0 at return. 357 (C==0 and Z==1) cannot happen because the carry bit is "not borrow". */ 358 it ls 359 sbcls r0, r0, r0 360 bx lr 361 362 /* The code from the previous version of strcmp.S handles all of the 363 * cases where the first string and seconds string cannot both be 364 * aligned to a word boundary faster than the new algorithm. See 365 * bionic/libc/arch-arm/cortex-a15/bionic/strcmp.S for the unedited 366 * version of the code. 367 */ 368 .L_strcmp_unaligned: 369 wp1 .req r0 370 wp2 .req r1 371 b1 .req r2 372 w1 .req r4 373 w2 .req r5 374 t1 .req ip 375 @ r3 is scratch 376 377 2: 378 mov b1, #1 379 orr b1, b1, b1, lsl #8 380 orr b1, b1, b1, lsl #16 381 382 and t1, wp2, #3 383 bic wp2, wp2, #3 384 ldr w1, [wp1], #4 385 ldr w2, [wp2], #4 386 cmp t1, #2 387 beq 2f 388 bhi 3f 389 390 /* Critical inner Loop: Block with 3 bytes initial overlap */ 391 .p2align 2 392 1: 393 bic t1, w1, #MSB 394 cmp t1, w2, S2LOMEM #8 395 sub r3, w1, b1 396 bic r3, r3, w1 397 bne 4f 398 ands r3, r3, b1, lsl #7 399 it eq 400 ldreq w2, [wp2], #4 401 bne 5f 402 eor t1, t1, w1 403 cmp t1, w2, S2HIMEM #24 404 bne 6f 405 ldr w1, [wp1], #4 406 b 1b 407 4: 408 S2LOMEM w2, w2, #8 409 b 8f 410 411 5: 412 #ifdef __ARMEB__ 413 /* The syndrome value may contain false ones if the string ends 414 * with the bytes 0x01 0x00 415 */ 416 tst w1, #0xff000000 417 itt ne 418 tstne w1, #0x00ff0000 419 tstne w1, #0x0000ff00 420 beq 7f 421 #else 422 bics r3, r3, #0xff000000 423 bne 7f 424 #endif 425 ldrb w2, [wp2] 426 S2LOMEM t1, w1, #24 427 #ifdef __ARMEB__ 428 lsl w2, w2, #24 429 #endif 430 b 8f 431 432 6: 433 S2LOMEM t1, w1, #24 434 and w2, w2, #LSB 435 b 8f 436 437 /* Critical inner Loop: Block with 2 bytes initial overlap */ 438 .p2align 2 439 2: 440 S2HIMEM t1, w1, #16 441 sub r3, w1, b1 442 S2LOMEM t1, t1, #16 443 bic r3, r3, w1 444 cmp t1, w2, S2LOMEM #16 445 bne 4f 446 ands r3, r3, b1, lsl #7 447 it eq 448 ldreq w2, [wp2], #4 449 bne 5f 450 eor t1, t1, w1 451 cmp t1, w2, S2HIMEM #16 452 bne 6f 453 ldr w1, [wp1], #4 454 b 2b 455 456 5: 457 #ifdef __ARMEB__ 458 /* The syndrome value may contain false ones if the string ends 459 * with the bytes 0x01 0x00 460 */ 461 tst w1, #0xff000000 462 it ne 463 tstne w1, #0x00ff0000 464 beq 7f 465 #else 466 lsls r3, r3, #16 467 bne 7f 468 #endif 469 ldrh w2, [wp2] 470 S2LOMEM t1, w1, #16 471 #ifdef __ARMEB__ 472 lsl w2, w2, #16 473 #endif 474 b 8f 475 476 6: 477 S2HIMEM w2, w2, #16 478 S2LOMEM t1, w1, #16 479 4: 480 S2LOMEM w2, w2, #16 481 b 8f 482 483 /* Critical inner Loop: Block with 1 byte initial overlap */ 484 .p2align 2 485 3: 486 and t1, w1, #LSB 487 cmp t1, w2, S2LOMEM #24 488 sub r3, w1, b1 489 bic r3, r3, w1 490 bne 4f 491 ands r3, r3, b1, lsl #7 492 it eq 493 ldreq w2, [wp2], #4 494 bne 5f 495 eor t1, t1, w1 496 cmp t1, w2, S2HIMEM #8 497 bne 6f 498 ldr w1, [wp1], #4 499 b 3b 500 4: 501 S2LOMEM w2, w2, #24 502 b 8f 503 5: 504 /* The syndrome value may contain false ones if the string ends 505 * with the bytes 0x01 0x00 506 */ 507 tst w1, #LSB 508 beq 7f 509 ldr w2, [wp2], #4 510 6: 511 S2LOMEM t1, w1, #8 512 bic w2, w2, #MSB 513 b 8f 514 7: 515 mov r0, #0 516 517 /* Restore registers and stack. */ 518 ldrd r6, r7, [sp] 519 ldrd r4, r5, [sp, #8] 520 adds sp, sp, #16 521 .cfi_def_cfa_offset 0 522 .cfi_restore r4 523 .cfi_restore r5 524 .cfi_restore r6 525 .cfi_restore r7 526 527 bx lr 528 529 8: 530 and r2, t1, #LSB 531 and r0, w2, #LSB 532 cmp r0, #1 533 it cs 534 cmpcs r0, r2 535 itt eq 536 S2LOMEMEQ t1, t1, #8 537 S2LOMEMEQ w2, w2, #8 538 beq 8b 539 sub r0, r2, r0 540 541 /* Restore registers and stack. */ 542 ldrd r6, r7, [sp] 543 ldrd r4, r5, [sp, #8] 544 adds sp, sp, #16 545 546 bx lr 547 END(strcmp) 548