1 /* 2 Copyright (c) 2011 Intel Corporation 3 All rights reserved. 4 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #ifndef USE_AS_WCSCAT 32 33 # ifndef L 34 # define L(label) .L##label 35 # endif 36 37 # ifndef cfi_startproc 38 # define cfi_startproc .cfi_startproc 39 # endif 40 41 # ifndef cfi_endproc 42 # define cfi_endproc .cfi_endproc 43 # endif 44 45 # ifndef ENTRY 46 # define ENTRY(name) \ 47 .type name, @function; \ 48 .globl name; \ 49 .p2align 4; \ 50 name: \ 51 cfi_startproc 52 # endif 53 54 # ifndef END 55 # define END(name) \ 56 cfi_endproc; \ 57 .size name, .-name 58 # endif 59 60 # define PARMS 4 61 # define STR PARMS 62 # define RETURN ret 63 64 .text 65 ENTRY (wcslen) 66 mov STR(%esp), %edx 67 #endif 68 cmp $0, (%edx) 69 jz L(exit_tail0) 70 cmp $0, 4(%edx) 71 jz L(exit_tail1) 72 cmp $0, 8(%edx) 73 jz L(exit_tail2) 74 cmp $0, 12(%edx) 75 jz L(exit_tail3) 76 cmp $0, 16(%edx) 77 jz L(exit_tail4) 78 cmp $0, 20(%edx) 79 jz L(exit_tail5) 80 cmp $0, 24(%edx) 81 jz L(exit_tail6) 82 cmp $0, 28(%edx) 83 jz L(exit_tail7) 84 85 pxor %xmm0, %xmm0 86 87 lea 32(%edx), %eax 88 lea -16(%eax), %ecx 89 and $-16, %eax 90 91 pcmpeqd (%eax), %xmm0 92 pmovmskb %xmm0, %edx 93 pxor %xmm1, %xmm1 94 lea 16(%eax), %eax 95 test %edx, %edx 96 jnz L(exit) 97 98 pcmpeqd (%eax), %xmm1 99 pmovmskb %xmm1, %edx 100 pxor %xmm2, %xmm2 101 lea 16(%eax), %eax 102 test %edx, %edx 103 jnz L(exit) 104 105 pcmpeqd (%eax), %xmm2 106 pmovmskb %xmm2, %edx 107 pxor %xmm3, %xmm3 108 lea 16(%eax), %eax 109 test %edx, %edx 110 jnz L(exit) 111 112 pcmpeqd (%eax), %xmm3 113 pmovmskb %xmm3, %edx 114 lea 16(%eax), %eax 115 test %edx, %edx 116 jnz L(exit) 117 118 pcmpeqd (%eax), %xmm0 119 pmovmskb %xmm0, %edx 120 lea 16(%eax), %eax 121 test %edx, %edx 122 jnz L(exit) 123 124 pcmpeqd (%eax), %xmm1 125 pmovmskb %xmm1, %edx 126 lea 16(%eax), %eax 127 test %edx, %edx 128 jnz L(exit) 129 130 pcmpeqd (%eax), %xmm2 131 pmovmskb %xmm2, %edx 132 lea 16(%eax), %eax 133 test %edx, %edx 134 jnz L(exit) 135 136 pcmpeqd (%eax), %xmm3 137 pmovmskb %xmm3, %edx 138 lea 16(%eax), %eax 139 test %edx, %edx 140 jnz L(exit) 141 142 pcmpeqd (%eax), %xmm0 143 pmovmskb %xmm0, %edx 144 lea 16(%eax), %eax 145 test %edx, %edx 146 jnz L(exit) 147 148 pcmpeqd (%eax), %xmm1 149 pmovmskb %xmm1, %edx 150 lea 16(%eax), %eax 151 test %edx, %edx 152 jnz L(exit) 153 154 pcmpeqd (%eax), %xmm2 155 pmovmskb %xmm2, %edx 156 lea 16(%eax), %eax 157 test %edx, %edx 158 jnz L(exit) 159 160 pcmpeqd (%eax), %xmm3 161 pmovmskb %xmm3, %edx 162 lea 16(%eax), %eax 163 test %edx, %edx 164 jnz L(exit) 165 166 pcmpeqd (%eax), %xmm0 167 pmovmskb %xmm0, %edx 168 lea 16(%eax), %eax 169 test %edx, %edx 170 jnz L(exit) 171 172 pcmpeqd (%eax), %xmm1 173 pmovmskb %xmm1, %edx 174 lea 16(%eax), %eax 175 test %edx, %edx 176 jnz L(exit) 177 178 pcmpeqd (%eax), %xmm2 179 pmovmskb %xmm2, %edx 180 lea 16(%eax), %eax 181 test %edx, %edx 182 jnz L(exit) 183 184 pcmpeqd (%eax), %xmm3 185 pmovmskb %xmm3, %edx 186 lea 16(%eax), %eax 187 test %edx, %edx 188 jnz L(exit) 189 190 and $-0x40, %eax 191 192 .p2align 4 193 L(aligned_64_loop): 194 movaps (%eax), %xmm0 195 movaps 16(%eax), %xmm1 196 movaps 32(%eax), %xmm2 197 movaps 48(%eax), %xmm6 198 199 pminub %xmm1, %xmm0 200 pminub %xmm6, %xmm2 201 pminub %xmm0, %xmm2 202 pcmpeqd %xmm3, %xmm2 203 pmovmskb %xmm2, %edx 204 lea 64(%eax), %eax 205 test %edx, %edx 206 jz L(aligned_64_loop) 207 208 pcmpeqd -64(%eax), %xmm3 209 pmovmskb %xmm3, %edx 210 lea 48(%ecx), %ecx 211 test %edx, %edx 212 jnz L(exit) 213 214 pcmpeqd %xmm1, %xmm3 215 pmovmskb %xmm3, %edx 216 lea -16(%ecx), %ecx 217 test %edx, %edx 218 jnz L(exit) 219 220 pcmpeqd -32(%eax), %xmm3 221 pmovmskb %xmm3, %edx 222 lea -16(%ecx), %ecx 223 test %edx, %edx 224 jnz L(exit) 225 226 pcmpeqd %xmm6, %xmm3 227 pmovmskb %xmm3, %edx 228 lea -16(%ecx), %ecx 229 test %edx, %edx 230 jnz L(exit) 231 232 jmp L(aligned_64_loop) 233 234 .p2align 4 235 L(exit): 236 sub %ecx, %eax 237 shr $2, %eax 238 test %dl, %dl 239 jz L(exit_high) 240 241 mov %dl, %cl 242 and $15, %cl 243 jz L(exit_1) 244 RETURN 245 246 .p2align 4 247 L(exit_high): 248 mov %dh, %ch 249 and $15, %ch 250 jz L(exit_3) 251 add $2, %eax 252 RETURN 253 254 .p2align 4 255 L(exit_1): 256 add $1, %eax 257 RETURN 258 259 .p2align 4 260 L(exit_3): 261 add $3, %eax 262 RETURN 263 264 .p2align 4 265 L(exit_tail0): 266 xor %eax, %eax 267 RETURN 268 269 .p2align 4 270 L(exit_tail1): 271 mov $1, %eax 272 RETURN 273 274 .p2align 4 275 L(exit_tail2): 276 mov $2, %eax 277 RETURN 278 279 .p2align 4 280 L(exit_tail3): 281 mov $3, %eax 282 RETURN 283 284 .p2align 4 285 L(exit_tail4): 286 mov $4, %eax 287 RETURN 288 289 .p2align 4 290 L(exit_tail5): 291 mov $5, %eax 292 RETURN 293 294 .p2align 4 295 L(exit_tail6): 296 mov $6, %eax 297 RETURN 298 299 .p2align 4 300 L(exit_tail7): 301 mov $7, %eax 302 #ifndef USE_AS_WCSCAT 303 RETURN 304 305 END (wcslen) 306 #endif 307