1 #define STRLEN sse2_strlen_atom 2 3 #ifndef L 4 # define L(label) .L##label 5 #endif 6 7 #ifndef cfi_startproc 8 # define cfi_startproc .cfi_startproc 9 #endif 10 11 #ifndef cfi_endproc 12 # define cfi_endproc .cfi_endproc 13 #endif 14 15 #ifndef cfi_rel_offset 16 # define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 17 #endif 18 19 #ifndef cfi_restore 20 # define cfi_restore(reg) .cfi_restore reg 21 #endif 22 23 #ifndef cfi_adjust_cfa_offset 24 # define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 25 #endif 26 27 #ifndef cfi_remember_state 28 # define cfi_remember_state .cfi_remember_state 29 #endif 30 31 #ifndef cfi_restore_state 32 # define cfi_restore_state .cfi_restore_state 33 #endif 34 35 #ifndef ENTRY 36 # define ENTRY(name) \ 37 .type name, @function; \ 38 .globl name; \ 39 .p2align 4; \ 40 name: \ 41 cfi_startproc 42 #endif 43 44 #ifndef END 45 # define END(name) \ 46 cfi_endproc; \ 47 .size name, .-name 48 #endif 49 50 #define CFI_PUSH(REG) \ 51 cfi_adjust_cfa_offset (4); \ 52 cfi_rel_offset (REG, 0) 53 54 #define CFI_POP(REG) \ 55 cfi_adjust_cfa_offset (-4); \ 56 cfi_restore (REG) 57 58 #define PUSH(REG) pushl REG; CFI_PUSH (REG) 59 #define POP(REG) popl REG; CFI_POP (REG) 60 #define PARMS 4 61 #define STR PARMS 62 #define ENTRANCE 63 #define RETURN ret 64 65 .text 66 ENTRY (STRLEN) 67 ENTRANCE 68 mov STR(%esp), %edx 69 xor %eax, %eax 70 cmpb $0, (%edx) 71 jz L(exit_tail0) 72 cmpb $0, 1(%edx) 73 jz L(exit_tail1) 74 cmpb $0, 2(%edx) 75 jz L(exit_tail2) 76 cmpb $0, 3(%edx) 77 jz L(exit_tail3) 78 cmpb $0, 4(%edx) 79 jz L(exit_tail4) 80 cmpb $0, 5(%edx) 81 jz L(exit_tail5) 82 cmpb $0, 6(%edx) 83 jz L(exit_tail6) 84 cmpb $0, 7(%edx) 85 jz L(exit_tail7) 86 cmpb $0, 8(%edx) 87 jz L(exit_tail8) 88 cmpb $0, 9(%edx) 89 jz L(exit_tail9) 90 cmpb $0, 10(%edx) 91 jz L(exit_tail10) 92 cmpb $0, 11(%edx) 93 jz L(exit_tail11) 94 cmpb $0, 12(%edx) 95 jz L(exit_tail12) 96 cmpb $0, 13(%edx) 97 jz L(exit_tail13) 98 cmpb $0, 14(%edx) 99 jz L(exit_tail14) 100 cmpb $0, 15(%edx) 101 jz L(exit_tail15) 102 pxor %xmm0, %xmm0 103 mov %edx, %eax 104 mov %edx, %ecx 105 and $-16, %eax 106 add $16, %ecx 107 add $16, %eax 108 109 pcmpeqb (%eax), %xmm0 110 pmovmskb %xmm0, %edx 111 pxor %xmm1, %xmm1 112 test %edx, %edx 113 lea 16(%eax), %eax 114 jnz L(exit) 115 116 pcmpeqb (%eax), %xmm1 117 pmovmskb %xmm1, %edx 118 pxor %xmm2, %xmm2 119 test %edx, %edx 120 lea 16(%eax), %eax 121 jnz L(exit) 122 123 124 pcmpeqb (%eax), %xmm2 125 pmovmskb %xmm2, %edx 126 pxor %xmm3, %xmm3 127 test %edx, %edx 128 lea 16(%eax), %eax 129 jnz L(exit) 130 131 pcmpeqb (%eax), %xmm3 132 pmovmskb %xmm3, %edx 133 test %edx, %edx 134 lea 16(%eax), %eax 135 jnz L(exit) 136 137 pcmpeqb (%eax), %xmm0 138 pmovmskb %xmm0, %edx 139 test %edx, %edx 140 lea 16(%eax), %eax 141 jnz L(exit) 142 143 pcmpeqb (%eax), %xmm1 144 pmovmskb %xmm1, %edx 145 test %edx, %edx 146 lea 16(%eax), %eax 147 jnz L(exit) 148 149 pcmpeqb (%eax), %xmm2 150 pmovmskb %xmm2, %edx 151 test %edx, %edx 152 lea 16(%eax), %eax 153 jnz L(exit) 154 155 pcmpeqb (%eax), %xmm3 156 pmovmskb %xmm3, %edx 157 test %edx, %edx 158 lea 16(%eax), %eax 159 jnz L(exit) 160 161 pcmpeqb (%eax), %xmm0 162 pmovmskb %xmm0, %edx 163 test %edx, %edx 164 lea 16(%eax), %eax 165 jnz L(exit) 166 167 pcmpeqb (%eax), %xmm1 168 pmovmskb %xmm1, %edx 169 test %edx, %edx 170 lea 16(%eax), %eax 171 jnz L(exit) 172 173 pcmpeqb (%eax), %xmm2 174 pmovmskb %xmm2, %edx 175 test %edx, %edx 176 lea 16(%eax), %eax 177 jnz L(exit) 178 179 pcmpeqb (%eax), %xmm3 180 pmovmskb %xmm3, %edx 181 test %edx, %edx 182 lea 16(%eax), %eax 183 jnz L(exit) 184 185 pcmpeqb (%eax), %xmm0 186 pmovmskb %xmm0, %edx 187 test %edx, %edx 188 lea 16(%eax), %eax 189 jnz L(exit) 190 191 pcmpeqb (%eax), %xmm1 192 pmovmskb %xmm1, %edx 193 test %edx, %edx 194 lea 16(%eax), %eax 195 jnz L(exit) 196 197 pcmpeqb (%eax), %xmm2 198 pmovmskb %xmm2, %edx 199 test %edx, %edx 200 lea 16(%eax), %eax 201 jnz L(exit) 202 203 pcmpeqb (%eax), %xmm3 204 pmovmskb %xmm3, %edx 205 test %edx, %edx 206 lea 16(%eax), %eax 207 jnz L(exit) 208 209 and $-0x40, %eax 210 PUSH (%esi) 211 PUSH (%edi) 212 PUSH (%ebx) 213 PUSH (%ebp) 214 xor %ebp, %ebp 215 L(aligned_64): 216 pcmpeqb (%eax), %xmm0 217 pcmpeqb 16(%eax), %xmm1 218 pcmpeqb 32(%eax), %xmm2 219 pcmpeqb 48(%eax), %xmm3 220 pmovmskb %xmm0, %edx 221 pmovmskb %xmm1, %esi 222 pmovmskb %xmm2, %edi 223 pmovmskb %xmm3, %ebx 224 or %edx, %ebp 225 or %esi, %ebp 226 or %edi, %ebp 227 or %ebx, %ebp 228 lea 64(%eax), %eax 229 jz L(aligned_64) 230 L(48leave): 231 test %edx, %edx 232 jnz L(aligned_64_exit_16) 233 test %esi, %esi 234 jnz L(aligned_64_exit_32) 235 test %edi, %edi 236 jnz L(aligned_64_exit_48) 237 mov %ebx, %edx 238 lea (%eax), %eax 239 jmp L(aligned_64_exit) 240 L(aligned_64_exit_48): 241 lea -16(%eax), %eax 242 mov %edi, %edx 243 jmp L(aligned_64_exit) 244 L(aligned_64_exit_32): 245 lea -32(%eax), %eax 246 mov %esi, %edx 247 jmp L(aligned_64_exit) 248 L(aligned_64_exit_16): 249 lea -48(%eax), %eax 250 L(aligned_64_exit): 251 POP (%ebp) 252 POP (%ebx) 253 POP (%edi) 254 POP (%esi) 255 L(exit): 256 sub %ecx, %eax 257 test %dl, %dl 258 jz L(exit_high) 259 test $0x01, %dl 260 jnz L(exit_tail0) 261 262 test $0x02, %dl 263 jnz L(exit_tail1) 264 265 test $0x04, %dl 266 jnz L(exit_tail2) 267 268 test $0x08, %dl 269 jnz L(exit_tail3) 270 271 test $0x10, %dl 272 jnz L(exit_tail4) 273 274 test $0x20, %dl 275 jnz L(exit_tail5) 276 277 test $0x40, %dl 278 jnz L(exit_tail6) 279 add $7, %eax 280 L(exit_tail0): 281 RETURN 282 283 L(exit_high): 284 add $8, %eax 285 test $0x01, %dh 286 jnz L(exit_tail0) 287 288 test $0x02, %dh 289 jnz L(exit_tail1) 290 291 test $0x04, %dh 292 jnz L(exit_tail2) 293 294 test $0x08, %dh 295 jnz L(exit_tail3) 296 297 test $0x10, %dh 298 jnz L(exit_tail4) 299 300 test $0x20, %dh 301 jnz L(exit_tail5) 302 303 test $0x40, %dh 304 jnz L(exit_tail6) 305 add $7, %eax 306 RETURN 307 308 .p2align 4 309 L(exit_tail1): 310 add $1, %eax 311 RETURN 312 313 L(exit_tail2): 314 add $2, %eax 315 RETURN 316 317 L(exit_tail3): 318 add $3, %eax 319 RETURN 320 321 L(exit_tail4): 322 add $4, %eax 323 RETURN 324 325 L(exit_tail5): 326 add $5, %eax 327 RETURN 328 329 L(exit_tail6): 330 add $6, %eax 331 RETURN 332 333 L(exit_tail7): 334 add $7, %eax 335 RETURN 336 337 L(exit_tail8): 338 add $8, %eax 339 RETURN 340 341 L(exit_tail9): 342 add $9, %eax 343 RETURN 344 345 L(exit_tail10): 346 add $10, %eax 347 RETURN 348 349 L(exit_tail11): 350 add $11, %eax 351 RETURN 352 353 L(exit_tail12): 354 add $12, %eax 355 RETURN 356 357 L(exit_tail13): 358 add $13, %eax 359 RETURN 360 361 L(exit_tail14): 362 add $14, %eax 363 RETURN 364 365 L(exit_tail15): 366 add $15, %eax 367 ret 368 369 END (STRLEN) 370