1 /* 2 Copyright (c) 2014, Intel Corporation 3 All rights reserved. 4 5 Redistribution and use in source and binary forms, with or without 6 modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #ifndef USE_AS_STRCAT 32 33 #ifndef STRLEN 34 # define STRLEN strlen 35 #endif 36 37 #ifndef L 38 # define L(label) .L##label 39 #endif 40 41 #ifndef cfi_startproc 42 # define cfi_startproc .cfi_startproc 43 #endif 44 45 #ifndef cfi_endproc 46 # define cfi_endproc .cfi_endproc 47 #endif 48 49 #ifndef ENTRY 50 # define ENTRY(name) \ 51 .type name, @function; \ 52 .globl name; \ 53 .p2align 4; \ 54 name: \ 55 cfi_startproc 56 #endif 57 58 #ifndef END 59 # define END(name) \ 60 cfi_endproc; \ 61 .size name, .-name 62 #endif 63 #define RETURN ret 64 .section .text.sse2,"ax",@progbits 65 ENTRY (STRLEN) 66 /* end ifndef USE_AS_STRCAT */ 67 #endif 68 xor %rax, %rax 69 mov %edi, %ecx 70 and $0x3f, %ecx 71 pxor %xmm0, %xmm0 72 cmp $0x30, %ecx 73 ja L(next) 74 movdqu (%rdi), %xmm1 75 pcmpeqb %xmm1, %xmm0 76 pmovmskb %xmm0, %edx 77 test %edx, %edx 78 jnz L(exit_less16) 79 mov %rdi, %rax 80 and $-16, %rax 81 jmp L(align16_start) 82 L(next): 83 mov %rdi, %rax 84 and $-16, %rax 85 pcmpeqb (%rax), %xmm0 86 mov $-1, %r10d 87 sub %rax, %rcx 88 shl %cl, %r10d 89 pmovmskb %xmm0, %edx 90 and %r10d, %edx 91 jnz L(exit) 92 L(align16_start): 93 pxor %xmm0, %xmm0 94 pxor %xmm1, %xmm1 95 pxor %xmm2, %xmm2 96 pxor %xmm3, %xmm3 97 pcmpeqb 16(%rax), %xmm0 98 pmovmskb %xmm0, %edx 99 test %edx, %edx 100 jnz L(exit16) 101 102 pcmpeqb 32(%rax), %xmm1 103 pmovmskb %xmm1, %edx 104 test %edx, %edx 105 jnz L(exit32) 106 107 pcmpeqb 48(%rax), %xmm2 108 pmovmskb %xmm2, %edx 109 test %edx, %edx 110 jnz L(exit48) 111 112 pcmpeqb 64(%rax), %xmm3 113 pmovmskb %xmm3, %edx 114 test %edx, %edx 115 jnz L(exit64) 116 117 pcmpeqb 80(%rax), %xmm0 118 add $64, %rax 119 pmovmskb %xmm0, %edx 120 test %edx, %edx 121 jnz L(exit16) 122 123 pcmpeqb 32(%rax), %xmm1 124 pmovmskb %xmm1, %edx 125 test %edx, %edx 126 jnz L(exit32) 127 128 pcmpeqb 48(%rax), %xmm2 129 pmovmskb %xmm2, %edx 130 test %edx, %edx 131 jnz L(exit48) 132 133 pcmpeqb 64(%rax), %xmm3 134 pmovmskb %xmm3, %edx 135 test %edx, %edx 136 jnz L(exit64) 137 138 pcmpeqb 80(%rax), %xmm0 139 add $64, %rax 140 pmovmskb %xmm0, %edx 141 test %edx, %edx 142 jnz L(exit16) 143 144 pcmpeqb 32(%rax), %xmm1 145 pmovmskb %xmm1, %edx 146 test %edx, %edx 147 jnz L(exit32) 148 149 pcmpeqb 48(%rax), %xmm2 150 pmovmskb %xmm2, %edx 151 test %edx, %edx 152 jnz L(exit48) 153 154 pcmpeqb 64(%rax), %xmm3 155 pmovmskb %xmm3, %edx 156 test %edx, %edx 157 jnz L(exit64) 158 159 pcmpeqb 80(%rax), %xmm0 160 add $64, %rax 161 pmovmskb %xmm0, %edx 162 test %edx, %edx 163 jnz L(exit16) 164 165 pcmpeqb 32(%rax), %xmm1 166 pmovmskb %xmm1, %edx 167 test %edx, %edx 168 jnz L(exit32) 169 170 pcmpeqb 48(%rax), %xmm2 171 pmovmskb %xmm2, %edx 172 test %edx, %edx 173 jnz L(exit48) 174 175 pcmpeqb 64(%rax), %xmm3 176 pmovmskb %xmm3, %edx 177 test %edx, %edx 178 jnz L(exit64) 179 180 181 test $0x3f, %rax 182 jz L(align64_loop) 183 184 pcmpeqb 80(%rax), %xmm0 185 add $80, %rax 186 pmovmskb %xmm0, %edx 187 test %edx, %edx 188 jnz L(exit) 189 190 test $0x3f, %rax 191 jz L(align64_loop) 192 193 pcmpeqb 16(%rax), %xmm1 194 add $16, %rax 195 pmovmskb %xmm1, %edx 196 test %edx, %edx 197 jnz L(exit) 198 199 test $0x3f, %rax 200 jz L(align64_loop) 201 202 pcmpeqb 16(%rax), %xmm2 203 add $16, %rax 204 pmovmskb %xmm2, %edx 205 test %edx, %edx 206 jnz L(exit) 207 208 test $0x3f, %rax 209 jz L(align64_loop) 210 211 pcmpeqb 16(%rax), %xmm3 212 add $16, %rax 213 pmovmskb %xmm3, %edx 214 test %edx, %edx 215 jnz L(exit) 216 217 add $16, %rax 218 .p2align 4 219 L(align64_loop): 220 movaps (%rax), %xmm4 221 pminub 16(%rax), %xmm4 222 movaps 32(%rax), %xmm5 223 pminub 48(%rax), %xmm5 224 add $64, %rax 225 pminub %xmm4, %xmm5 226 pcmpeqb %xmm0, %xmm5 227 pmovmskb %xmm5, %edx 228 test %edx, %edx 229 jz L(align64_loop) 230 231 232 pcmpeqb -64(%rax), %xmm0 233 sub $80, %rax 234 pmovmskb %xmm0, %edx 235 test %edx, %edx 236 jnz L(exit16) 237 238 pcmpeqb 32(%rax), %xmm1 239 pmovmskb %xmm1, %edx 240 test %edx, %edx 241 jnz L(exit32) 242 243 pcmpeqb 48(%rax), %xmm2 244 pmovmskb %xmm2, %edx 245 test %edx, %edx 246 jnz L(exit48) 247 248 pcmpeqb 64(%rax), %xmm3 249 pmovmskb %xmm3, %edx 250 sub %rdi, %rax 251 bsf %rdx, %rdx 252 add %rdx, %rax 253 add $64, %rax 254 RETURN 255 256 .p2align 4 257 L(exit): 258 sub %rdi, %rax 259 L(exit_less16): 260 bsf %rdx, %rdx 261 add %rdx, %rax 262 RETURN 263 .p2align 4 264 L(exit16): 265 sub %rdi, %rax 266 bsf %rdx, %rdx 267 add %rdx, %rax 268 add $16, %rax 269 RETURN 270 .p2align 4 271 L(exit32): 272 sub %rdi, %rax 273 bsf %rdx, %rdx 274 add %rdx, %rax 275 add $32, %rax 276 RETURN 277 .p2align 4 278 L(exit48): 279 sub %rdi, %rax 280 bsf %rdx, %rdx 281 add %rdx, %rax 282 add $48, %rax 283 RETURN 284 .p2align 4 285 L(exit64): 286 sub %rdi, %rax 287 bsf %rdx, %rdx 288 add %rdx, %rax 289 add $64, %rax 290 #ifndef USE_AS_STRCAT 291 RETURN 292 293 END (STRLEN) 294 #endif 295