1 /* 2 Copyright (c) 2011, Intel Corporation 3 All rights reserved. 4 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #ifndef L 32 # define L(label) .L##label 33 #endif 34 35 #ifndef cfi_startproc 36 # define cfi_startproc .cfi_startproc 37 #endif 38 39 #ifndef cfi_endproc 40 # define cfi_endproc .cfi_endproc 41 #endif 42 43 #ifndef cfi_rel_offset 44 # define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 45 #endif 46 47 #ifndef cfi_restore 48 # define cfi_restore(reg) .cfi_restore reg 49 #endif 50 51 #ifndef cfi_adjust_cfa_offset 52 # define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 53 #endif 54 55 #ifndef ENTRY 56 # define ENTRY(name) \ 57 .type name, @function; \ 58 .globl name; \ 59 .p2align 4; \ 60 name: \ 61 cfi_startproc 62 #endif 63 64 #ifndef END 65 # define END(name) \ 66 cfi_endproc; \ 67 .size name, .-name 68 #endif 69 70 #define CFI_PUSH(REG) \ 71 cfi_adjust_cfa_offset (4); \ 72 cfi_rel_offset (REG, 0) 73 74 #define CFI_POP(REG) \ 75 cfi_adjust_cfa_offset (-4); \ 76 cfi_restore (REG) 77 78 #define PUSH(REG) pushl REG; CFI_PUSH (REG) 79 #define POP(REG) popl REG; CFI_POP (REG) 80 81 #define PARMS 8 82 #define ENTRANCE PUSH(%edi) 83 #define RETURN POP (%edi); ret; CFI_PUSH (%edi); 84 85 86 #define STR1 PARMS 87 #define STR2 STR1+4 88 89 .text 90 ENTRY (strchr) 91 92 ENTRANCE 93 mov STR1(%esp), %ecx 94 movd STR2(%esp), %xmm1 95 96 pxor %xmm2, %xmm2 97 mov %ecx, %edi 98 punpcklbw %xmm1, %xmm1 99 punpcklbw %xmm1, %xmm1 100 /* ECX has OFFSET. */ 101 and $15, %ecx 102 pshufd $0, %xmm1, %xmm1 103 je L(loop) 104 105 /* Handle unaligned string. */ 106 and $-16, %edi 107 movdqa (%edi), %xmm0 108 pcmpeqb %xmm0, %xmm2 109 pcmpeqb %xmm1, %xmm0 110 /* Find where NULL is. */ 111 pmovmskb %xmm2, %edx 112 /* Check if there is a match. */ 113 pmovmskb %xmm0, %eax 114 /* Remove the leading bytes. */ 115 sarl %cl, %edx 116 sarl %cl, %eax 117 test %eax, %eax 118 jz L(unaligned_no_match) 119 add %ecx, %edi 120 test %edx, %edx 121 jz L(match_case1) 122 jmp L(match_case2) 123 124 .p2align 4 125 L(unaligned_no_match): 126 test %edx, %edx 127 jne L(return_null) 128 129 pxor %xmm2, %xmm2 130 add $16, %edi 131 132 .p2align 4 133 /* Loop start on aligned string. */ 134 L(loop): 135 movdqa (%edi), %xmm0 136 pcmpeqb %xmm0, %xmm2 137 pcmpeqb %xmm1, %xmm0 138 pmovmskb %xmm2, %edx 139 pmovmskb %xmm0, %eax 140 test %eax, %eax 141 jnz L(matches) 142 test %edx, %edx 143 jnz L(return_null) 144 add $16, %edi 145 146 movdqa (%edi), %xmm0 147 pcmpeqb %xmm0, %xmm2 148 pcmpeqb %xmm1, %xmm0 149 pmovmskb %xmm2, %edx 150 pmovmskb %xmm0, %eax 151 test %eax, %eax 152 jnz L(matches) 153 test %edx, %edx 154 jnz L(return_null) 155 add $16, %edi 156 157 movdqa (%edi), %xmm0 158 pcmpeqb %xmm0, %xmm2 159 pcmpeqb %xmm1, %xmm0 160 pmovmskb %xmm2, %edx 161 pmovmskb %xmm0, %eax 162 test %eax, %eax 163 jnz L(matches) 164 test %edx, %edx 165 jnz L(return_null) 166 add $16, %edi 167 168 movdqa (%edi), %xmm0 169 pcmpeqb %xmm0, %xmm2 170 pcmpeqb %xmm1, %xmm0 171 pmovmskb %xmm2, %edx 172 pmovmskb %xmm0, %eax 173 test %eax, %eax 174 jnz L(matches) 175 test %edx, %edx 176 jnz L(return_null) 177 add $16, %edi 178 jmp L(loop) 179 180 L(matches): 181 /* There is a match. First find where NULL is. */ 182 test %edx, %edx 183 jz L(match_case1) 184 185 .p2align 4 186 L(match_case2): 187 test %al, %al 188 jz L(match_higth_case2) 189 190 mov %al, %cl 191 and $15, %cl 192 jnz L(match_case2_4) 193 194 mov %dl, %ch 195 and $15, %ch 196 jnz L(return_null) 197 198 test $0x10, %al 199 jnz L(Exit5) 200 test $0x10, %dl 201 jnz L(return_null) 202 test $0x20, %al 203 jnz L(Exit6) 204 test $0x20, %dl 205 jnz L(return_null) 206 test $0x40, %al 207 jnz L(Exit7) 208 test $0x40, %dl 209 jnz L(return_null) 210 lea 7(%edi), %eax 211 RETURN 212 213 .p2align 4 214 L(match_case2_4): 215 test $0x01, %al 216 jnz L(Exit1) 217 test $0x01, %dl 218 jnz L(return_null) 219 test $0x02, %al 220 jnz L(Exit2) 221 test $0x02, %dl 222 jnz L(return_null) 223 test $0x04, %al 224 jnz L(Exit3) 225 test $0x04, %dl 226 jnz L(return_null) 227 lea 3(%edi), %eax 228 RETURN 229 230 .p2align 4 231 L(match_higth_case2): 232 test %dl, %dl 233 jnz L(return_null) 234 235 mov %ah, %cl 236 and $15, %cl 237 jnz L(match_case2_12) 238 239 mov %dh, %ch 240 and $15, %ch 241 jnz L(return_null) 242 243 test $0x10, %ah 244 jnz L(Exit13) 245 test $0x10, %dh 246 jnz L(return_null) 247 test $0x20, %ah 248 jnz L(Exit14) 249 test $0x20, %dh 250 jnz L(return_null) 251 test $0x40, %ah 252 jnz L(Exit15) 253 test $0x40, %dh 254 jnz L(return_null) 255 lea 15(%edi), %eax 256 RETURN 257 258 .p2align 4 259 L(match_case2_12): 260 test $0x01, %ah 261 jnz L(Exit9) 262 test $0x01, %dh 263 jnz L(return_null) 264 test $0x02, %ah 265 jnz L(Exit10) 266 test $0x02, %dh 267 jnz L(return_null) 268 test $0x04, %ah 269 jnz L(Exit11) 270 test $0x04, %dh 271 jnz L(return_null) 272 lea 11(%edi), %eax 273 RETURN 274 275 .p2align 4 276 L(match_case1): 277 test %al, %al 278 jz L(match_higth_case1) 279 280 test $0x01, %al 281 jnz L(Exit1) 282 test $0x02, %al 283 jnz L(Exit2) 284 test $0x04, %al 285 jnz L(Exit3) 286 test $0x08, %al 287 jnz L(Exit4) 288 test $0x10, %al 289 jnz L(Exit5) 290 test $0x20, %al 291 jnz L(Exit6) 292 test $0x40, %al 293 jnz L(Exit7) 294 lea 7(%edi), %eax 295 RETURN 296 297 .p2align 4 298 L(match_higth_case1): 299 test $0x01, %ah 300 jnz L(Exit9) 301 test $0x02, %ah 302 jnz L(Exit10) 303 test $0x04, %ah 304 jnz L(Exit11) 305 test $0x08, %ah 306 jnz L(Exit12) 307 test $0x10, %ah 308 jnz L(Exit13) 309 test $0x20, %ah 310 jnz L(Exit14) 311 test $0x40, %ah 312 jnz L(Exit15) 313 lea 15(%edi), %eax 314 RETURN 315 316 .p2align 4 317 L(Exit1): 318 lea (%edi), %eax 319 RETURN 320 321 .p2align 4 322 L(Exit2): 323 lea 1(%edi), %eax 324 RETURN 325 326 .p2align 4 327 L(Exit3): 328 lea 2(%edi), %eax 329 RETURN 330 331 .p2align 4 332 L(Exit4): 333 lea 3(%edi), %eax 334 RETURN 335 336 .p2align 4 337 L(Exit5): 338 lea 4(%edi), %eax 339 RETURN 340 341 .p2align 4 342 L(Exit6): 343 lea 5(%edi), %eax 344 RETURN 345 346 .p2align 4 347 L(Exit7): 348 lea 6(%edi), %eax 349 RETURN 350 351 .p2align 4 352 L(Exit9): 353 lea 8(%edi), %eax 354 RETURN 355 356 .p2align 4 357 L(Exit10): 358 lea 9(%edi), %eax 359 RETURN 360 361 .p2align 4 362 L(Exit11): 363 lea 10(%edi), %eax 364 RETURN 365 366 .p2align 4 367 L(Exit12): 368 lea 11(%edi), %eax 369 RETURN 370 371 .p2align 4 372 L(Exit13): 373 lea 12(%edi), %eax 374 RETURN 375 376 .p2align 4 377 L(Exit14): 378 lea 13(%edi), %eax 379 RETURN 380 381 .p2align 4 382 L(Exit15): 383 lea 14(%edi), %eax 384 RETURN 385 386 .p2align 4 387 L(return_null): 388 xor %eax, %eax 389 RETURN 390 391 END (strchr) 392