Home | History | Annotate | Download | only in string
      1 #define STRLEN sse2_strlen_atom
      2 
      3 #ifndef L
      4 # define L(label)	.L##label
      5 #endif
      6 
      7 #ifndef cfi_startproc
      8 # define cfi_startproc			.cfi_startproc
      9 #endif
     10 
     11 #ifndef cfi_endproc
     12 # define cfi_endproc			.cfi_endproc
     13 #endif
     14 
     15 #ifndef cfi_rel_offset
     16 # define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
     17 #endif
     18 
     19 #ifndef cfi_restore
     20 # define cfi_restore(reg)		.cfi_restore reg
     21 #endif
     22 
     23 #ifndef cfi_adjust_cfa_offset
     24 # define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
     25 #endif
     26 
     27 #ifndef cfi_remember_state
     28 # define cfi_remember_state		.cfi_remember_state
     29 #endif
     30 
     31 #ifndef cfi_restore_state
     32 # define cfi_restore_state		.cfi_restore_state
     33 #endif
     34 
     35 #ifndef ENTRY
     36 # define ENTRY(name)			\
     37 	.type name,  @function; 	\
     38 	.globl name;			\
     39 	.p2align 4;			\
     40 name:					\
     41 	cfi_startproc
     42 #endif
     43 
     44 #ifndef END
     45 # define END(name)			\
     46 	cfi_endproc;			\
     47 	.size name, .-name
     48 #endif
     49 
     50 #define CFI_PUSH(REG)						\
     51   cfi_adjust_cfa_offset (4);					\
     52   cfi_rel_offset (REG, 0)
     53 
     54 #define CFI_POP(REG)						\
     55   cfi_adjust_cfa_offset (-4);					\
     56   cfi_restore (REG)
     57 
     58 #define PUSH(REG)	pushl REG; CFI_PUSH (REG)
     59 #define POP(REG)	popl REG; CFI_POP (REG)
     60 #define PARMS		4
     61 #define	STR		PARMS
     62 #define ENTRANCE
     63 #define RETURN		ret
     64 
     65 	.text
     66 ENTRY (STRLEN)
     67 	ENTRANCE
     68 	mov	STR(%esp), %edx
     69 	xor	%eax, %eax
     70 	cmpb	$0, (%edx)
     71 	jz	L(exit_tail0)
     72 	cmpb	$0, 1(%edx)
     73 	jz	L(exit_tail1)
     74 	cmpb	$0, 2(%edx)
     75 	jz	L(exit_tail2)
     76 	cmpb	$0, 3(%edx)
     77 	jz	L(exit_tail3)
     78 	cmpb	$0, 4(%edx)
     79 	jz	L(exit_tail4)
     80 	cmpb	$0, 5(%edx)
     81 	jz	L(exit_tail5)
     82 	cmpb	$0, 6(%edx)
     83 	jz	L(exit_tail6)
     84 	cmpb	$0, 7(%edx)
     85 	jz	L(exit_tail7)
     86 	cmpb	$0, 8(%edx)
     87 	jz	L(exit_tail8)
     88 	cmpb	$0, 9(%edx)
     89 	jz	L(exit_tail9)
     90 	cmpb	$0, 10(%edx)
     91 	jz	L(exit_tail10)
     92 	cmpb	$0, 11(%edx)
     93 	jz	L(exit_tail11)
     94 	cmpb	$0, 12(%edx)
     95 	jz	L(exit_tail12)
     96 	cmpb	$0, 13(%edx)
     97 	jz	L(exit_tail13)
     98 	cmpb	$0, 14(%edx)
     99 	jz	L(exit_tail14)
    100 	cmpb	$0, 15(%edx)
    101 	jz	L(exit_tail15)
    102 	pxor	%xmm0, %xmm0
    103 	mov	%edx, %eax
    104 	mov	%edx, %ecx
    105 	and	$-16, %eax
    106 	add	$16, %ecx
    107 	add	$16, %eax
    108 
    109 	pcmpeqb	(%eax), %xmm0
    110 	pmovmskb %xmm0, %edx
    111 	pxor	%xmm1, %xmm1
    112 	test	%edx, %edx
    113 	lea	16(%eax), %eax
    114 	jnz	L(exit)
    115 
    116 	pcmpeqb	(%eax), %xmm1
    117 	pmovmskb %xmm1, %edx
    118 	pxor	%xmm2, %xmm2
    119 	test	%edx, %edx
    120 	lea	16(%eax), %eax
    121 	jnz	L(exit)
    122 
    123 
    124 	pcmpeqb	(%eax), %xmm2
    125 	pmovmskb %xmm2, %edx
    126 	pxor	%xmm3, %xmm3
    127 	test	%edx, %edx
    128 	lea	16(%eax), %eax
    129 	jnz	L(exit)
    130 
    131 	pcmpeqb	(%eax), %xmm3
    132 	pmovmskb %xmm3, %edx
    133 	test	%edx, %edx
    134 	lea	16(%eax), %eax
    135 	jnz	L(exit)
    136 
    137 	pcmpeqb	(%eax), %xmm0
    138 	pmovmskb %xmm0, %edx
    139 	test	%edx, %edx
    140 	lea	16(%eax), %eax
    141 	jnz	L(exit)
    142 
    143 	pcmpeqb	(%eax), %xmm1
    144 	pmovmskb %xmm1, %edx
    145 	test	%edx, %edx
    146 	lea	16(%eax), %eax
    147 	jnz	L(exit)
    148 
    149 	pcmpeqb	(%eax), %xmm2
    150 	pmovmskb %xmm2, %edx
    151 	test	%edx, %edx
    152 	lea	16(%eax), %eax
    153 	jnz	L(exit)
    154 
    155 	pcmpeqb	(%eax), %xmm3
    156 	pmovmskb %xmm3, %edx
    157 	test	%edx, %edx
    158 	lea	16(%eax), %eax
    159 	jnz	L(exit)
    160 
    161 	pcmpeqb	(%eax), %xmm0
    162 	pmovmskb %xmm0, %edx
    163 	test	%edx, %edx
    164 	lea	16(%eax), %eax
    165 	jnz	L(exit)
    166 
    167 	pcmpeqb	(%eax), %xmm1
    168 	pmovmskb %xmm1, %edx
    169 	test	%edx, %edx
    170 	lea	16(%eax), %eax
    171 	jnz	L(exit)
    172 
    173 	pcmpeqb	(%eax), %xmm2
    174 	pmovmskb %xmm2, %edx
    175 	test	%edx, %edx
    176 	lea	16(%eax), %eax
    177 	jnz	L(exit)
    178 
    179 	pcmpeqb	(%eax), %xmm3
    180 	pmovmskb %xmm3, %edx
    181 	test	%edx, %edx
    182 	lea	16(%eax), %eax
    183 	jnz	L(exit)
    184 
    185 	pcmpeqb	(%eax), %xmm0
    186 	pmovmskb %xmm0, %edx
    187 	test	%edx, %edx
    188 	lea	16(%eax), %eax
    189 	jnz	L(exit)
    190 
    191 	pcmpeqb	(%eax), %xmm1
    192 	pmovmskb %xmm1, %edx
    193 	test	%edx, %edx
    194 	lea	16(%eax), %eax
    195 	jnz	L(exit)
    196 
    197 	pcmpeqb	(%eax), %xmm2
    198 	pmovmskb %xmm2, %edx
    199 	test	%edx, %edx
    200 	lea	16(%eax), %eax
    201 	jnz	L(exit)
    202 
    203 	pcmpeqb	(%eax), %xmm3
    204 	pmovmskb %xmm3, %edx
    205 	test	%edx, %edx
    206 	lea	16(%eax), %eax
    207 	jnz	L(exit)
    208 
    209 	and	$-0x40, %eax
    210 	PUSH (%esi)
    211 	PUSH (%edi)
    212 	PUSH (%ebx)
    213 	PUSH (%ebp)
    214 	xor	%ebp, %ebp
    215 L(aligned_64):
    216 	pcmpeqb	(%eax), %xmm0
    217 	pcmpeqb	16(%eax), %xmm1
    218 	pcmpeqb	32(%eax), %xmm2
    219 	pcmpeqb	48(%eax), %xmm3
    220 	pmovmskb %xmm0, %edx
    221 	pmovmskb %xmm1, %esi
    222 	pmovmskb %xmm2, %edi
    223 	pmovmskb %xmm3, %ebx
    224 	or	%edx, %ebp
    225 	or	%esi, %ebp
    226 	or	%edi, %ebp
    227 	or	%ebx, %ebp
    228 	lea	64(%eax), %eax
    229 	jz	L(aligned_64)
    230 L(48leave):
    231 	test	%edx, %edx
    232 	jnz	L(aligned_64_exit_16)
    233 	test	%esi, %esi
    234 	jnz	L(aligned_64_exit_32)
    235 	test	%edi, %edi
    236 	jnz	L(aligned_64_exit_48)
    237 	mov	%ebx, %edx
    238 	lea	(%eax), %eax
    239 	jmp	L(aligned_64_exit)
    240 L(aligned_64_exit_48):
    241 	lea	-16(%eax), %eax
    242 	mov	%edi, %edx
    243 	jmp	L(aligned_64_exit)
    244 L(aligned_64_exit_32):
    245 	lea	-32(%eax), %eax
    246 	mov	%esi, %edx
    247 	jmp	L(aligned_64_exit)
    248 L(aligned_64_exit_16):
    249 	lea	-48(%eax), %eax
    250 L(aligned_64_exit):
    251 	POP (%ebp)
    252 	POP (%ebx)
    253 	POP (%edi)
    254 	POP (%esi)
    255 L(exit):
    256 	sub	%ecx, %eax
    257 	test	%dl, %dl
    258 	jz	L(exit_high)
    259 	test	$0x01, %dl
    260 	jnz	L(exit_tail0)
    261 
    262 	test	$0x02, %dl
    263 	jnz	L(exit_tail1)
    264 
    265 	test	$0x04, %dl
    266 	jnz	L(exit_tail2)
    267 
    268 	test	$0x08, %dl
    269 	jnz	L(exit_tail3)
    270 
    271 	test	$0x10, %dl
    272 	jnz	L(exit_tail4)
    273 
    274 	test	$0x20, %dl
    275 	jnz	L(exit_tail5)
    276 
    277 	test	$0x40, %dl
    278 	jnz	L(exit_tail6)
    279 	add	$7, %eax
    280 L(exit_tail0):
    281 	RETURN
    282 
    283 L(exit_high):
    284 	add	$8, %eax
    285 	test	$0x01, %dh
    286 	jnz	L(exit_tail0)
    287 
    288 	test	$0x02, %dh
    289 	jnz	L(exit_tail1)
    290 
    291 	test	$0x04, %dh
    292 	jnz	L(exit_tail2)
    293 
    294 	test	$0x08, %dh
    295 	jnz	L(exit_tail3)
    296 
    297 	test	$0x10, %dh
    298 	jnz	L(exit_tail4)
    299 
    300 	test	$0x20, %dh
    301 	jnz	L(exit_tail5)
    302 
    303 	test	$0x40, %dh
    304 	jnz	L(exit_tail6)
    305 	add	$7, %eax
    306 	RETURN
    307 
    308 	.p2align 4
    309 L(exit_tail1):
    310 	add	$1, %eax
    311 	RETURN
    312 
    313 L(exit_tail2):
    314 	add	$2, %eax
    315 	RETURN
    316 
    317 L(exit_tail3):
    318 	add	$3, %eax
    319 	RETURN
    320 
    321 L(exit_tail4):
    322 	add	$4, %eax
    323 	RETURN
    324 
    325 L(exit_tail5):
    326 	add	$5, %eax
    327 	RETURN
    328 
    329 L(exit_tail6):
    330 	add	$6, %eax
    331 	RETURN
    332 
    333 L(exit_tail7):
    334 	add	$7, %eax
    335 	RETURN
    336 
    337 L(exit_tail8):
    338 	add	$8, %eax
    339 	RETURN
    340 
    341 L(exit_tail9):
    342 	add	$9, %eax
    343 	RETURN
    344 
    345 L(exit_tail10):
    346 	add	$10, %eax
    347 	RETURN
    348 
    349 L(exit_tail11):
    350 	add	$11, %eax
    351 	RETURN
    352 
    353 L(exit_tail12):
    354 	add	$12, %eax
    355 	RETURN
    356 
    357 L(exit_tail13):
    358 	add	$13, %eax
    359 	RETURN
    360 
    361 L(exit_tail14):
    362 	add	$14, %eax
    363 	RETURN
    364 
    365 L(exit_tail15):
    366 	add	$15, %eax
    367 	ret
    368 
    369 END (STRLEN)
    370