1 /* 2 Copyright (c) 2011 Intel Corporation 3 All rights reserved. 4 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #ifndef L 32 # define L(label) .L##label 33 #endif 34 35 #ifndef cfi_startproc 36 # define cfi_startproc .cfi_startproc 37 #endif 38 39 #ifndef cfi_endproc 40 # define cfi_endproc .cfi_endproc 41 #endif 42 43 #ifndef cfi_rel_offset 44 # define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 45 #endif 46 47 #ifndef cfi_restore 48 # define cfi_restore(reg) .cfi_restore reg 49 #endif 50 51 #ifndef cfi_adjust_cfa_offset 52 # define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 53 #endif 54 55 #ifndef ENTRY 56 # define ENTRY(name) \ 57 .type name, @function; \ 58 .globl name; \ 59 .p2align 4; \ 60 name: \ 61 cfi_startproc 62 #endif 63 64 #ifndef END 65 # define END(name) \ 66 cfi_endproc; \ 67 .size name, .-name 68 #endif 69 70 #define CFI_PUSH(REG) \ 71 cfi_adjust_cfa_offset (4); \ 72 cfi_rel_offset (REG, 0) 73 74 #define CFI_POP(REG) \ 75 cfi_adjust_cfa_offset (-4); \ 76 cfi_restore (REG) 77 78 #define PUSH(REG) pushl REG; CFI_PUSH (REG) 79 #define POP(REG) popl REG; CFI_POP (REG) 80 81 #define PARMS 8 82 #define ENTRANCE PUSH(%edi); 83 #define RETURN POP(%edi); ret; CFI_PUSH(%edi); 84 85 #define STR1 PARMS 86 #define STR2 STR1+4 87 88 .text 89 ENTRY (wcsrchr) 90 91 ENTRANCE 92 mov STR1(%esp), %ecx 93 movd STR2(%esp), %xmm1 94 95 mov %ecx, %edi 96 punpckldq %xmm1, %xmm1 97 pxor %xmm2, %xmm2 98 punpckldq %xmm1, %xmm1 99 100 /* ECX has OFFSET. */ 101 and $63, %ecx 102 cmp $48, %ecx 103 ja L(crosscache) 104 105 /* unaligned string. */ 106 movdqu (%edi), %xmm0 107 pcmpeqd %xmm0, %xmm2 108 pcmpeqd %xmm1, %xmm0 109 /* Find where NULL is. */ 110 pmovmskb %xmm2, %ecx 111 /* Check if there is a match. */ 112 pmovmskb %xmm0, %eax 113 add $16, %edi 114 115 test %eax, %eax 116 jnz L(unaligned_match1) 117 118 test %ecx, %ecx 119 jnz L(return_null) 120 121 and $-16, %edi 122 123 PUSH (%esi) 124 125 xor %edx, %edx 126 jmp L(loop) 127 128 CFI_POP (%esi) 129 130 .p2align 4 131 L(unaligned_match1): 132 test %ecx, %ecx 133 jnz L(prolog_find_zero_1) 134 135 PUSH (%esi) 136 137 /* Save current match */ 138 mov %eax, %edx 139 mov %edi, %esi 140 and $-16, %edi 141 jmp L(loop) 142 143 CFI_POP (%esi) 144 145 .p2align 4 146 L(crosscache): 147 /* Hancle unaligned string. */ 148 and $15, %ecx 149 and $-16, %edi 150 pxor %xmm3, %xmm3 151 movdqa (%edi), %xmm0 152 pcmpeqd %xmm0, %xmm3 153 pcmpeqd %xmm1, %xmm0 154 /* Find where NULL is. */ 155 pmovmskb %xmm3, %edx 156 /* Check if there is a match. */ 157 pmovmskb %xmm0, %eax 158 /* Remove the leading bytes. */ 159 shr %cl, %edx 160 shr %cl, %eax 161 add $16, %edi 162 163 test %eax, %eax 164 jnz L(unaligned_match) 165 166 test %edx, %edx 167 jnz L(return_null) 168 169 PUSH (%esi) 170 171 xor %edx, %edx 172 jmp L(loop) 173 174 CFI_POP (%esi) 175 176 .p2align 4 177 L(unaligned_match): 178 test %edx, %edx 179 jnz L(prolog_find_zero) 180 181 PUSH (%esi) 182 183 mov %eax, %edx 184 lea (%edi, %ecx), %esi 185 186 /* Loop start on aligned string. */ 187 .p2align 4 188 L(loop): 189 movdqa (%edi), %xmm0 190 pcmpeqd %xmm0, %xmm2 191 add $16, %edi 192 pcmpeqd %xmm1, %xmm0 193 pmovmskb %xmm2, %ecx 194 pmovmskb %xmm0, %eax 195 or %eax, %ecx 196 jnz L(matches) 197 198 movdqa (%edi), %xmm3 199 pcmpeqd %xmm3, %xmm2 200 add $16, %edi 201 pcmpeqd %xmm1, %xmm3 202 pmovmskb %xmm2, %ecx 203 pmovmskb %xmm3, %eax 204 or %eax, %ecx 205 jnz L(matches) 206 207 movdqa (%edi), %xmm4 208 pcmpeqd %xmm4, %xmm2 209 add $16, %edi 210 pcmpeqd %xmm1, %xmm4 211 pmovmskb %xmm2, %ecx 212 pmovmskb %xmm4, %eax 213 or %eax, %ecx 214 jnz L(matches) 215 216 movdqa (%edi), %xmm5 217 pcmpeqd %xmm5, %xmm2 218 add $16, %edi 219 pcmpeqd %xmm1, %xmm5 220 pmovmskb %xmm2, %ecx 221 pmovmskb %xmm5, %eax 222 or %eax, %ecx 223 jz L(loop) 224 225 .p2align 4 226 L(matches): 227 test %eax, %eax 228 jnz L(match) 229 L(return_value): 230 test %edx, %edx 231 jz L(return_null_1) 232 mov %edx, %eax 233 mov %esi, %edi 234 235 POP (%esi) 236 237 test %ah, %ah 238 jnz L(match_third_or_fourth_wchar) 239 test $15 << 4, %al 240 jnz L(match_second_wchar) 241 lea -16(%edi), %eax 242 RETURN 243 244 CFI_PUSH (%esi) 245 246 .p2align 4 247 L(return_null_1): 248 POP (%esi) 249 250 xor %eax, %eax 251 RETURN 252 253 CFI_PUSH (%esi) 254 255 .p2align 4 256 L(match): 257 pmovmskb %xmm2, %ecx 258 test %ecx, %ecx 259 jnz L(find_zero) 260 /* save match info */ 261 mov %eax, %edx 262 mov %edi, %esi 263 jmp L(loop) 264 265 .p2align 4 266 L(find_zero): 267 test %cl, %cl 268 jz L(find_zero_in_third_or_fourth_wchar) 269 test $15, %cl 270 jz L(find_zero_in_second_wchar) 271 and $1, %eax 272 jz L(return_value) 273 274 POP (%esi) 275 276 lea -16(%edi), %eax 277 RETURN 278 279 CFI_PUSH (%esi) 280 281 .p2align 4 282 L(find_zero_in_second_wchar): 283 and $(1 << 5) - 1, %eax 284 jz L(return_value) 285 286 POP (%esi) 287 288 test $15 << 4, %al 289 jnz L(match_second_wchar) 290 lea -16(%edi), %eax 291 RETURN 292 293 CFI_PUSH (%esi) 294 295 .p2align 4 296 L(find_zero_in_third_or_fourth_wchar): 297 test $15, %ch 298 jz L(find_zero_in_fourth_wchar) 299 and $(1 << 9) - 1, %eax 300 jz L(return_value) 301 302 POP (%esi) 303 304 test %ah, %ah 305 jnz L(match_third_wchar) 306 test $15 << 4, %al 307 jnz L(match_second_wchar) 308 lea -16(%edi), %eax 309 RETURN 310 311 CFI_PUSH (%esi) 312 313 .p2align 4 314 L(find_zero_in_fourth_wchar): 315 316 POP (%esi) 317 318 test %ah, %ah 319 jnz L(match_third_or_fourth_wchar) 320 test $15 << 4, %al 321 jnz L(match_second_wchar) 322 lea -16(%edi), %eax 323 RETURN 324 325 CFI_PUSH (%esi) 326 327 .p2align 4 328 L(match_second_wchar): 329 lea -12(%edi), %eax 330 RETURN 331 332 .p2align 4 333 L(match_third_or_fourth_wchar): 334 test $15 << 4, %ah 335 jnz L(match_fourth_wchar) 336 lea -8(%edi), %eax 337 RETURN 338 339 .p2align 4 340 L(match_third_wchar): 341 lea -8(%edi), %eax 342 RETURN 343 344 .p2align 4 345 L(match_fourth_wchar): 346 lea -4(%edi), %eax 347 RETURN 348 349 .p2align 4 350 L(return_null): 351 xor %eax, %eax 352 RETURN 353 354 .p2align 4 355 L(prolog_find_zero): 356 add %ecx, %edi 357 mov %edx, %ecx 358 L(prolog_find_zero_1): 359 test %cl, %cl 360 jz L(prolog_find_zero_in_third_or_fourth_wchar) 361 test $15, %cl 362 jz L(prolog_find_zero_in_second_wchar) 363 and $1, %eax 364 jz L(return_null) 365 366 lea -16(%edi), %eax 367 RETURN 368 369 .p2align 4 370 L(prolog_find_zero_in_second_wchar): 371 and $(1 << 5) - 1, %eax 372 jz L(return_null) 373 374 test $15 << 4, %al 375 jnz L(match_second_wchar) 376 lea -16(%edi), %eax 377 RETURN 378 379 .p2align 4 380 L(prolog_find_zero_in_third_or_fourth_wchar): 381 test $15, %ch 382 jz L(prolog_find_zero_in_fourth_wchar) 383 and $(1 << 9) - 1, %eax 384 jz L(return_null) 385 386 test %ah, %ah 387 jnz L(match_third_wchar) 388 test $15 << 4, %al 389 jnz L(match_second_wchar) 390 lea -16(%edi), %eax 391 RETURN 392 393 .p2align 4 394 L(prolog_find_zero_in_fourth_wchar): 395 test %ah, %ah 396 jnz L(match_third_or_fourth_wchar) 397 test $15 << 4, %al 398 jnz L(match_second_wchar) 399 lea -16(%edi), %eax 400 RETURN 401 402 END (wcsrchr) 403