1 /* 2 Copyright (c) 2011 Intel Corporation 3 All rights reserved. 4 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #ifndef L 32 # define L(label) .L##label 33 #endif 34 35 #ifndef cfi_startproc 36 # define cfi_startproc .cfi_startproc 37 #endif 38 39 #ifndef cfi_endproc 40 # define cfi_endproc .cfi_endproc 41 #endif 42 43 #ifndef cfi_rel_offset 44 # define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 45 #endif 46 47 #ifndef cfi_restore 48 # define cfi_restore(reg) .cfi_restore reg 49 #endif 50 51 #ifndef cfi_adjust_cfa_offset 52 # define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 53 #endif 54 55 #ifndef ENTRY 56 # define ENTRY(name) \ 57 .type name, @function; \ 58 .globl name; \ 59 .p2align 4; \ 60 name: \ 61 cfi_startproc 62 #endif 63 64 #ifndef END 65 # define END(name) \ 66 cfi_endproc; \ 67 .size name, .-name 68 #endif 69 70 #define CFI_PUSH(REG) \ 71 cfi_adjust_cfa_offset (4); \ 72 cfi_rel_offset (REG, 0) 73 74 #define CFI_POP(REG) \ 75 cfi_adjust_cfa_offset (-4); \ 76 cfi_restore (REG) 77 78 #define PUSH(REG) pushl REG; CFI_PUSH (REG) 79 #define POP(REG) popl REG; CFI_POP (REG) 80 81 #define ENTRANCE PUSH(%esi); PUSH(%edi) 82 #define RETURN POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi); 83 #define PARMS 4 84 #define STR1 PARMS 85 #define STR2 STR1+4 86 87 .text 88 ENTRY (wcscmp) 89 /* 90 * This implementation uses SSE to compare up to 16 bytes at a time. 91 */ 92 mov STR1(%esp), %edx 93 mov STR2(%esp), %eax 94 95 mov (%eax), %ecx 96 cmp %ecx, (%edx) 97 jne L(neq) 98 test %ecx, %ecx 99 jz L(eq) 100 101 mov 4(%eax), %ecx 102 cmp %ecx, 4(%edx) 103 jne L(neq) 104 test %ecx, %ecx 105 jz L(eq) 106 107 mov 8(%eax), %ecx 108 cmp %ecx, 8(%edx) 109 jne L(neq) 110 test %ecx, %ecx 111 jz L(eq) 112 113 mov 12(%eax), %ecx 114 cmp %ecx, 12(%edx) 115 jne L(neq) 116 test %ecx, %ecx 117 jz L(eq) 118 119 ENTRANCE 120 add $16, %eax 121 add $16, %edx 122 123 mov %eax, %esi 124 mov %edx, %edi 125 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ 126 mov %al, %ch 127 mov %dl, %cl 128 and $63, %eax /* esi alignment in cache line */ 129 and $63, %edx /* edi alignment in cache line */ 130 and $15, %cl 131 jz L(continue_00) 132 cmp $16, %edx 133 jb L(continue_0) 134 cmp $32, %edx 135 jb L(continue_16) 136 cmp $48, %edx 137 jb L(continue_32) 138 139 L(continue_48): 140 and $15, %ch 141 jz L(continue_48_00) 142 cmp $16, %eax 143 jb L(continue_0_48) 144 cmp $32, %eax 145 jb L(continue_16_48) 146 cmp $48, %eax 147 jb L(continue_32_48) 148 149 .p2align 4 150 L(continue_48_48): 151 mov (%esi), %ecx 152 cmp %ecx, (%edi) 153 jne L(nequal) 154 test %ecx, %ecx 155 jz L(equal) 156 157 mov 4(%esi), %ecx 158 cmp %ecx, 4(%edi) 159 jne L(nequal) 160 test %ecx, %ecx 161 jz L(equal) 162 163 mov 8(%esi), %ecx 164 cmp %ecx, 8(%edi) 165 jne L(nequal) 166 test %ecx, %ecx 167 jz L(equal) 168 169 mov 12(%esi), %ecx 170 cmp %ecx, 12(%edi) 171 jne L(nequal) 172 test %ecx, %ecx 173 jz L(equal) 174 175 movdqu 16(%edi), %xmm1 176 movdqu 16(%esi), %xmm2 177 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 178 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 179 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 180 pmovmskb %xmm1, %edx 181 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 182 jnz L(less4_double_words_16) 183 184 movdqu 32(%edi), %xmm1 185 movdqu 32(%esi), %xmm2 186 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 187 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 188 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 189 pmovmskb %xmm1, %edx 190 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 191 jnz L(less4_double_words_32) 192 193 movdqu 48(%edi), %xmm1 194 movdqu 48(%esi), %xmm2 195 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 196 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 197 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 198 pmovmskb %xmm1, %edx 199 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 200 jnz L(less4_double_words_48) 201 202 add $64, %esi 203 add $64, %edi 204 jmp L(continue_48_48) 205 206 L(continue_0): 207 and $15, %ch 208 jz L(continue_0_00) 209 cmp $16, %eax 210 jb L(continue_0_0) 211 cmp $32, %eax 212 jb L(continue_0_16) 213 cmp $48, %eax 214 jb L(continue_0_32) 215 216 .p2align 4 217 L(continue_0_48): 218 mov (%esi), %ecx 219 cmp %ecx, (%edi) 220 jne L(nequal) 221 test %ecx, %ecx 222 jz L(equal) 223 224 mov 4(%esi), %ecx 225 cmp %ecx, 4(%edi) 226 jne L(nequal) 227 test %ecx, %ecx 228 jz L(equal) 229 230 mov 8(%esi), %ecx 231 cmp %ecx, 8(%edi) 232 jne L(nequal) 233 test %ecx, %ecx 234 jz L(equal) 235 236 mov 12(%esi), %ecx 237 cmp %ecx, 12(%edi) 238 jne L(nequal) 239 test %ecx, %ecx 240 jz L(equal) 241 242 movdqu 16(%edi), %xmm1 243 movdqu 16(%esi), %xmm2 244 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 245 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 246 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 247 pmovmskb %xmm1, %edx 248 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 249 jnz L(less4_double_words_16) 250 251 movdqu 32(%edi), %xmm1 252 movdqu 32(%esi), %xmm2 253 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 254 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 255 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 256 pmovmskb %xmm1, %edx 257 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 258 jnz L(less4_double_words_32) 259 260 mov 48(%esi), %ecx 261 cmp %ecx, 48(%edi) 262 jne L(nequal) 263 test %ecx, %ecx 264 jz L(equal) 265 266 mov 52(%esi), %ecx 267 cmp %ecx, 52(%edi) 268 jne L(nequal) 269 test %ecx, %ecx 270 jz L(equal) 271 272 mov 56(%esi), %ecx 273 cmp %ecx, 56(%edi) 274 jne L(nequal) 275 test %ecx, %ecx 276 jz L(equal) 277 278 mov 60(%esi), %ecx 279 cmp %ecx, 60(%edi) 280 jne L(nequal) 281 test %ecx, %ecx 282 jz L(equal) 283 284 add $64, %esi 285 add $64, %edi 286 jmp L(continue_0_48) 287 288 .p2align 4 289 L(continue_00): 290 and $15, %ch 291 jz L(continue_00_00) 292 cmp $16, %eax 293 jb L(continue_00_0) 294 cmp $32, %eax 295 jb L(continue_00_16) 296 cmp $48, %eax 297 jb L(continue_00_32) 298 299 .p2align 4 300 L(continue_00_48): 301 pcmpeqd (%edi), %xmm0 302 mov (%edi), %eax 303 pmovmskb %xmm0, %ecx 304 test %ecx, %ecx 305 jnz L(less4_double_words1) 306 307 cmp (%esi), %eax 308 jne L(nequal) 309 310 mov 4(%edi), %eax 311 cmp 4(%esi), %eax 312 jne L(nequal) 313 314 mov 8(%edi), %eax 315 cmp 8(%esi), %eax 316 jne L(nequal) 317 318 mov 12(%edi), %eax 319 cmp 12(%esi), %eax 320 jne L(nequal) 321 322 movdqu 16(%esi), %xmm2 323 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 324 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */ 325 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 326 pmovmskb %xmm2, %edx 327 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 328 jnz L(less4_double_words_16) 329 330 movdqu 32(%esi), %xmm2 331 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 332 pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */ 333 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 334 pmovmskb %xmm2, %edx 335 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 336 jnz L(less4_double_words_32) 337 338 movdqu 48(%esi), %xmm2 339 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 340 pcmpeqd 48(%edi), %xmm2 /* compare first 4 double_words for equality */ 341 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 342 pmovmskb %xmm2, %edx 343 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 344 jnz L(less4_double_words_48) 345 346 add $64, %esi 347 add $64, %edi 348 jmp L(continue_00_48) 349 350 .p2align 4 351 L(continue_32): 352 and $15, %ch 353 jz L(continue_32_00) 354 cmp $16, %eax 355 jb L(continue_0_32) 356 cmp $32, %eax 357 jb L(continue_16_32) 358 cmp $48, %eax 359 jb L(continue_32_32) 360 361 .p2align 4 362 L(continue_32_48): 363 mov (%esi), %ecx 364 cmp %ecx, (%edi) 365 jne L(nequal) 366 test %ecx, %ecx 367 jz L(equal) 368 369 mov 4(%esi), %ecx 370 cmp %ecx, 4(%edi) 371 jne L(nequal) 372 test %ecx, %ecx 373 jz L(equal) 374 375 mov 8(%esi), %ecx 376 cmp %ecx, 8(%edi) 377 jne L(nequal) 378 test %ecx, %ecx 379 jz L(equal) 380 381 mov 12(%esi), %ecx 382 cmp %ecx, 12(%edi) 383 jne L(nequal) 384 test %ecx, %ecx 385 jz L(equal) 386 387 mov 16(%esi), %ecx 388 cmp %ecx, 16(%edi) 389 jne L(nequal) 390 test %ecx, %ecx 391 jz L(equal) 392 393 mov 20(%esi), %ecx 394 cmp %ecx, 20(%edi) 395 jne L(nequal) 396 test %ecx, %ecx 397 jz L(equal) 398 399 mov 24(%esi), %ecx 400 cmp %ecx, 24(%edi) 401 jne L(nequal) 402 test %ecx, %ecx 403 jz L(equal) 404 405 mov 28(%esi), %ecx 406 cmp %ecx, 28(%edi) 407 jne L(nequal) 408 test %ecx, %ecx 409 jz L(equal) 410 411 movdqu 32(%edi), %xmm1 412 movdqu 32(%esi), %xmm2 413 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 414 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 415 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 416 pmovmskb %xmm1, %edx 417 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 418 jnz L(less4_double_words_32) 419 420 movdqu 48(%edi), %xmm1 421 movdqu 48(%esi), %xmm2 422 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 423 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 424 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 425 pmovmskb %xmm1, %edx 426 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 427 jnz L(less4_double_words_48) 428 429 add $64, %esi 430 add $64, %edi 431 jmp L(continue_32_48) 432 433 .p2align 4 434 L(continue_16): 435 and $15, %ch 436 jz L(continue_16_00) 437 cmp $16, %eax 438 jb L(continue_0_16) 439 cmp $32, %eax 440 jb L(continue_16_16) 441 cmp $48, %eax 442 jb L(continue_16_32) 443 444 .p2align 4 445 L(continue_16_48): 446 mov (%esi), %ecx 447 cmp %ecx, (%edi) 448 jne L(nequal) 449 test %ecx, %ecx 450 jz L(equal) 451 452 mov 4(%esi), %ecx 453 cmp %ecx, 4(%edi) 454 jne L(nequal) 455 test %ecx, %ecx 456 jz L(equal) 457 458 mov 8(%esi), %ecx 459 cmp %ecx, 8(%edi) 460 jne L(nequal) 461 test %ecx, %ecx 462 jz L(equal) 463 464 mov 12(%esi), %ecx 465 cmp %ecx, 12(%edi) 466 jne L(nequal) 467 test %ecx, %ecx 468 jz L(equal) 469 470 movdqu 16(%edi), %xmm1 471 movdqu 16(%esi), %xmm2 472 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 473 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 474 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 475 pmovmskb %xmm1, %edx 476 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 477 jnz L(less4_double_words_16) 478 479 mov 32(%esi), %ecx 480 cmp %ecx, 32(%edi) 481 jne L(nequal) 482 test %ecx, %ecx 483 jz L(equal) 484 485 mov 36(%esi), %ecx 486 cmp %ecx, 36(%edi) 487 jne L(nequal) 488 test %ecx, %ecx 489 jz L(equal) 490 491 mov 40(%esi), %ecx 492 cmp %ecx, 40(%edi) 493 jne L(nequal) 494 test %ecx, %ecx 495 jz L(equal) 496 497 mov 44(%esi), %ecx 498 cmp %ecx, 44(%edi) 499 jne L(nequal) 500 test %ecx, %ecx 501 jz L(equal) 502 503 movdqu 48(%edi), %xmm1 504 movdqu 48(%esi), %xmm2 505 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 506 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 507 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 508 pmovmskb %xmm1, %edx 509 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 510 jnz L(less4_double_words_48) 511 512 add $64, %esi 513 add $64, %edi 514 jmp L(continue_16_48) 515 516 .p2align 4 517 L(continue_00_00): 518 movdqa (%edi), %xmm1 519 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 520 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ 521 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 522 pmovmskb %xmm1, %edx 523 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 524 jnz L(less4_double_words) 525 526 movdqa 16(%edi), %xmm3 527 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ 528 pcmpeqd 16(%esi), %xmm3 /* compare first 4 double_words for equality */ 529 psubb %xmm0, %xmm3 /* packed sub of comparison results*/ 530 pmovmskb %xmm3, %edx 531 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 532 jnz L(less4_double_words_16) 533 534 movdqa 32(%edi), %xmm5 535 pcmpeqd %xmm5, %xmm0 /* Any null double_word? */ 536 pcmpeqd 32(%esi), %xmm5 /* compare first 4 double_words for equality */ 537 psubb %xmm0, %xmm5 /* packed sub of comparison results*/ 538 pmovmskb %xmm5, %edx 539 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 540 jnz L(less4_double_words_32) 541 542 movdqa 48(%edi), %xmm1 543 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 544 pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */ 545 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 546 pmovmskb %xmm1, %edx 547 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 548 jnz L(less4_double_words_48) 549 550 add $64, %esi 551 add $64, %edi 552 jmp L(continue_00_00) 553 554 .p2align 4 555 L(continue_00_32): 556 movdqu (%esi), %xmm2 557 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 558 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */ 559 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 560 pmovmskb %xmm2, %edx 561 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 562 jnz L(less4_double_words) 563 564 add $16, %esi 565 add $16, %edi 566 jmp L(continue_00_48) 567 568 .p2align 4 569 L(continue_00_16): 570 movdqu (%esi), %xmm2 571 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 572 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */ 573 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 574 pmovmskb %xmm2, %edx 575 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 576 jnz L(less4_double_words) 577 578 movdqu 16(%esi), %xmm2 579 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 580 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */ 581 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 582 pmovmskb %xmm2, %edx 583 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 584 jnz L(less4_double_words_16) 585 586 add $32, %esi 587 add $32, %edi 588 jmp L(continue_00_48) 589 590 .p2align 4 591 L(continue_00_0): 592 movdqu (%esi), %xmm2 593 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 594 pcmpeqd (%edi), %xmm2 /* compare first 4 double_words for equality */ 595 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 596 pmovmskb %xmm2, %edx 597 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 598 jnz L(less4_double_words) 599 600 movdqu 16(%esi), %xmm2 601 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 602 pcmpeqd 16(%edi), %xmm2 /* compare first 4 double_words for equality */ 603 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 604 pmovmskb %xmm2, %edx 605 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 606 jnz L(less4_double_words_16) 607 608 movdqu 32(%esi), %xmm2 609 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 610 pcmpeqd 32(%edi), %xmm2 /* compare first 4 double_words for equality */ 611 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 612 pmovmskb %xmm2, %edx 613 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 614 jnz L(less4_double_words_32) 615 616 add $48, %esi 617 add $48, %edi 618 jmp L(continue_00_48) 619 620 .p2align 4 621 L(continue_48_00): 622 pcmpeqd (%esi), %xmm0 623 mov (%edi), %eax 624 pmovmskb %xmm0, %ecx 625 test %ecx, %ecx 626 jnz L(less4_double_words1) 627 628 cmp (%esi), %eax 629 jne L(nequal) 630 631 mov 4(%edi), %eax 632 cmp 4(%esi), %eax 633 jne L(nequal) 634 635 mov 8(%edi), %eax 636 cmp 8(%esi), %eax 637 jne L(nequal) 638 639 mov 12(%edi), %eax 640 cmp 12(%esi), %eax 641 jne L(nequal) 642 643 movdqu 16(%edi), %xmm1 644 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 645 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */ 646 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 647 pmovmskb %xmm1, %edx 648 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 649 jnz L(less4_double_words_16) 650 651 movdqu 32(%edi), %xmm1 652 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 653 pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */ 654 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 655 pmovmskb %xmm1, %edx 656 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 657 jnz L(less4_double_words_32) 658 659 movdqu 48(%edi), %xmm1 660 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 661 pcmpeqd 48(%esi), %xmm1 /* compare first 4 double_words for equality */ 662 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 663 pmovmskb %xmm1, %edx 664 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 665 jnz L(less4_double_words_48) 666 667 add $64, %esi 668 add $64, %edi 669 jmp L(continue_48_00) 670 671 .p2align 4 672 L(continue_32_00): 673 movdqu (%edi), %xmm1 674 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 675 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ 676 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 677 pmovmskb %xmm1, %edx 678 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 679 jnz L(less4_double_words) 680 681 add $16, %esi 682 add $16, %edi 683 jmp L(continue_48_00) 684 685 .p2align 4 686 L(continue_16_00): 687 movdqu (%edi), %xmm1 688 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 689 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ 690 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 691 pmovmskb %xmm1, %edx 692 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 693 jnz L(less4_double_words) 694 695 movdqu 16(%edi), %xmm1 696 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 697 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */ 698 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 699 pmovmskb %xmm1, %edx 700 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 701 jnz L(less4_double_words_16) 702 703 add $32, %esi 704 add $32, %edi 705 jmp L(continue_48_00) 706 707 .p2align 4 708 L(continue_0_00): 709 movdqu (%edi), %xmm1 710 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 711 pcmpeqd (%esi), %xmm1 /* compare first 4 double_words for equality */ 712 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 713 pmovmskb %xmm1, %edx 714 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 715 jnz L(less4_double_words) 716 717 movdqu 16(%edi), %xmm1 718 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 719 pcmpeqd 16(%esi), %xmm1 /* compare first 4 double_words for equality */ 720 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 721 pmovmskb %xmm1, %edx 722 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 723 jnz L(less4_double_words_16) 724 725 movdqu 32(%edi), %xmm1 726 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 727 pcmpeqd 32(%esi), %xmm1 /* compare first 4 double_words for equality */ 728 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 729 pmovmskb %xmm1, %edx 730 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 731 jnz L(less4_double_words_32) 732 733 add $48, %esi 734 add $48, %edi 735 jmp L(continue_48_00) 736 737 .p2align 4 738 L(continue_32_32): 739 movdqu (%edi), %xmm1 740 movdqu (%esi), %xmm2 741 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 742 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 743 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 744 pmovmskb %xmm1, %edx 745 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 746 jnz L(less4_double_words) 747 748 add $16, %esi 749 add $16, %edi 750 jmp L(continue_48_48) 751 752 .p2align 4 753 L(continue_16_16): 754 movdqu (%edi), %xmm1 755 movdqu (%esi), %xmm2 756 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 757 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 758 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 759 pmovmskb %xmm1, %edx 760 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 761 jnz L(less4_double_words) 762 763 movdqu 16(%edi), %xmm3 764 movdqu 16(%esi), %xmm4 765 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ 766 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ 767 psubb %xmm0, %xmm3 /* packed sub of comparison results*/ 768 pmovmskb %xmm3, %edx 769 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 770 jnz L(less4_double_words_16) 771 772 add $32, %esi 773 add $32, %edi 774 jmp L(continue_48_48) 775 776 .p2align 4 777 L(continue_0_0): 778 movdqu (%edi), %xmm1 779 movdqu (%esi), %xmm2 780 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 781 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 782 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 783 pmovmskb %xmm1, %edx 784 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 785 jnz L(less4_double_words) 786 787 movdqu 16(%edi), %xmm3 788 movdqu 16(%esi), %xmm4 789 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ 790 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ 791 psubb %xmm0, %xmm3 /* packed sub of comparison results*/ 792 pmovmskb %xmm3, %edx 793 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 794 jnz L(less4_double_words_16) 795 796 movdqu 32(%edi), %xmm1 797 movdqu 32(%esi), %xmm2 798 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 799 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 800 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 801 pmovmskb %xmm1, %edx 802 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 803 jnz L(less4_double_words_32) 804 805 add $48, %esi 806 add $48, %edi 807 jmp L(continue_48_48) 808 809 .p2align 4 810 L(continue_0_16): 811 movdqu (%edi), %xmm1 812 movdqu (%esi), %xmm2 813 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 814 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 815 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 816 pmovmskb %xmm1, %edx 817 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 818 jnz L(less4_double_words) 819 820 movdqu 16(%edi), %xmm1 821 movdqu 16(%esi), %xmm2 822 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 823 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 824 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 825 pmovmskb %xmm1, %edx 826 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 827 jnz L(less4_double_words_16) 828 829 add $32, %esi 830 add $32, %edi 831 jmp L(continue_32_48) 832 833 .p2align 4 834 L(continue_0_32): 835 movdqu (%edi), %xmm1 836 movdqu (%esi), %xmm2 837 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 838 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 839 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 840 pmovmskb %xmm1, %edx 841 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 842 jnz L(less4_double_words) 843 844 add $16, %esi 845 add $16, %edi 846 jmp L(continue_16_48) 847 848 .p2align 4 849 L(continue_16_32): 850 movdqu (%edi), %xmm1 851 movdqu (%esi), %xmm2 852 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 853 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 854 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 855 pmovmskb %xmm1, %edx 856 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 857 jnz L(less4_double_words) 858 859 add $16, %esi 860 add $16, %edi 861 jmp L(continue_32_48) 862 863 .p2align 4 864 L(less4_double_words1): 865 cmp (%esi), %eax 866 jne L(nequal) 867 test %eax, %eax 868 jz L(equal) 869 870 mov 4(%esi), %ecx 871 cmp %ecx, 4(%edi) 872 jne L(nequal) 873 test %ecx, %ecx 874 jz L(equal) 875 876 mov 8(%esi), %ecx 877 cmp %ecx, 8(%edi) 878 jne L(nequal) 879 test %ecx, %ecx 880 jz L(equal) 881 882 mov 12(%esi), %ecx 883 cmp %ecx, 12(%edi) 884 jne L(nequal) 885 xor %eax, %eax 886 RETURN 887 888 .p2align 4 889 L(less4_double_words): 890 xor %eax, %eax 891 test %dl, %dl 892 jz L(next_two_double_words) 893 and $15, %dl 894 jz L(second_double_word) 895 mov (%esi), %ecx 896 cmp %ecx, (%edi) 897 jne L(nequal) 898 RETURN 899 900 .p2align 4 901 L(second_double_word): 902 mov 4(%esi), %ecx 903 cmp %ecx, 4(%edi) 904 jne L(nequal) 905 RETURN 906 907 .p2align 4 908 L(next_two_double_words): 909 and $15, %dh 910 jz L(fourth_double_word) 911 mov 8(%esi), %ecx 912 cmp %ecx, 8(%edi) 913 jne L(nequal) 914 RETURN 915 916 .p2align 4 917 L(fourth_double_word): 918 mov 12(%esi), %ecx 919 cmp %ecx, 12(%edi) 920 jne L(nequal) 921 RETURN 922 923 .p2align 4 924 L(less4_double_words_16): 925 xor %eax, %eax 926 test %dl, %dl 927 jz L(next_two_double_words_16) 928 and $15, %dl 929 jz L(second_double_word_16) 930 mov 16(%esi), %ecx 931 cmp %ecx, 16(%edi) 932 jne L(nequal) 933 RETURN 934 935 .p2align 4 936 L(second_double_word_16): 937 mov 20(%esi), %ecx 938 cmp %ecx, 20(%edi) 939 jne L(nequal) 940 RETURN 941 942 .p2align 4 943 L(next_two_double_words_16): 944 and $15, %dh 945 jz L(fourth_double_word_16) 946 mov 24(%esi), %ecx 947 cmp %ecx, 24(%edi) 948 jne L(nequal) 949 RETURN 950 951 .p2align 4 952 L(fourth_double_word_16): 953 mov 28(%esi), %ecx 954 cmp %ecx, 28(%edi) 955 jne L(nequal) 956 RETURN 957 958 .p2align 4 959 L(less4_double_words_32): 960 xor %eax, %eax 961 test %dl, %dl 962 jz L(next_two_double_words_32) 963 and $15, %dl 964 jz L(second_double_word_32) 965 mov 32(%esi), %ecx 966 cmp %ecx, 32(%edi) 967 jne L(nequal) 968 RETURN 969 970 .p2align 4 971 L(second_double_word_32): 972 mov 36(%esi), %ecx 973 cmp %ecx, 36(%edi) 974 jne L(nequal) 975 RETURN 976 977 .p2align 4 978 L(next_two_double_words_32): 979 and $15, %dh 980 jz L(fourth_double_word_32) 981 mov 40(%esi), %ecx 982 cmp %ecx, 40(%edi) 983 jne L(nequal) 984 RETURN 985 986 .p2align 4 987 L(fourth_double_word_32): 988 mov 44(%esi), %ecx 989 cmp %ecx, 44(%edi) 990 jne L(nequal) 991 RETURN 992 993 .p2align 4 994 L(less4_double_words_48): 995 xor %eax, %eax 996 test %dl, %dl 997 jz L(next_two_double_words_48) 998 and $15, %dl 999 jz L(second_double_word_48) 1000 mov 48(%esi), %ecx 1001 cmp %ecx, 48(%edi) 1002 jne L(nequal) 1003 RETURN 1004 1005 .p2align 4 1006 L(second_double_word_48): 1007 mov 52(%esi), %ecx 1008 cmp %ecx, 52(%edi) 1009 jne L(nequal) 1010 RETURN 1011 1012 .p2align 4 1013 L(next_two_double_words_48): 1014 and $15, %dh 1015 jz L(fourth_double_word_48) 1016 mov 56(%esi), %ecx 1017 cmp %ecx, 56(%edi) 1018 jne L(nequal) 1019 RETURN 1020 1021 .p2align 4 1022 L(fourth_double_word_48): 1023 mov 60(%esi), %ecx 1024 cmp %ecx, 60(%edi) 1025 jne L(nequal) 1026 RETURN 1027 1028 .p2align 4 1029 L(nequal): 1030 mov $1, %eax 1031 jg L(return) 1032 neg %eax 1033 RETURN 1034 1035 .p2align 4 1036 L(return): 1037 RETURN 1038 1039 .p2align 4 1040 L(equal): 1041 xorl %eax, %eax 1042 RETURN 1043 1044 CFI_POP (%edi) 1045 CFI_POP (%esi) 1046 1047 .p2align 4 1048 L(neq): 1049 mov $1, %eax 1050 jg L(neq_bigger) 1051 neg %eax 1052 1053 L(neq_bigger): 1054 ret 1055 1056 .p2align 4 1057 L(eq): 1058 xorl %eax, %eax 1059 ret 1060 1061 END (wcscmp) 1062 1063