Home | History | Annotate | Download | only in string
      1 /*
      2 Copyright (c) 2010, Intel Corporation
      3 All rights reserved.
      4 
      5 Redistribution and use in source and binary forms, with or without
      6 modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9     * this list of conditions and the following disclaimer.
     10 
     11     * Redistributions in binary form must reproduce the above copyright notice,
     12     * this list of conditions and the following disclaimer in the documentation
     13     * and/or other materials provided with the distribution.
     14 
     15     * Neither the name of Intel Corporation nor the names of its contributors
     16     * may be used to endorse or promote products derived from this software
     17     * without specific prior written permission.
     18 
     19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 */
     30 
     31 #ifndef L
     32 # define L(label)	.L##label
     33 #endif
     34 
     35 #ifndef ALIGN
     36 # define ALIGN(n)	.p2align n
     37 #endif
     38 
     39 #ifndef cfi_startproc
     40 # define cfi_startproc			.cfi_startproc
     41 #endif
     42 
     43 #ifndef cfi_endproc
     44 # define cfi_endproc			.cfi_endproc
     45 #endif
     46 
     47 #ifndef cfi_rel_offset
     48 # define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
     49 #endif
     50 
     51 #ifndef cfi_restore
     52 # define cfi_restore(reg)		.cfi_restore (reg)
     53 #endif
     54 
     55 #ifndef cfi_adjust_cfa_offset
     56 # define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
     57 #endif
     58 
     59 #ifndef ENTRY
     60 # define ENTRY(name)			\
     61 	.type name,  @function; 	\
     62 	.globl name;			\
     63 	.p2align 4;			\
     64 name:					\
     65 	cfi_startproc
     66 #endif
     67 
     68 #ifndef END
     69 # define END(name)			\
     70 	cfi_endproc;			\
     71 	.size name, .-name
     72 #endif
     73 
     74 #define CFI_PUSH(REG)						\
     75   cfi_adjust_cfa_offset (4);					\
     76   cfi_rel_offset (REG, 0)
     77 
     78 #define CFI_POP(REG)						\
     79   cfi_adjust_cfa_offset (-4);					\
     80   cfi_restore (REG)
     81 
     82 #define PUSH(REG)	pushl REG; CFI_PUSH (REG)
     83 #define POP(REG)	popl REG; CFI_POP (REG)
     84 
     85 #ifdef USE_AS_BZERO
     86 # define DEST		PARMS
     87 # define LEN		DEST+4
     88 # define SETRTNVAL
     89 #else
     90 # define DEST		PARMS
     91 # define CHR		DEST+4
     92 # define LEN		CHR+4
     93 # define SETRTNVAL	movl DEST(%esp), %eax
     94 #endif
     95 
     96 #ifdef SHARED
     97 # define ENTRANCE	PUSH (%ebx);
     98 # define RETURN_END	POP (%ebx); ret
     99 # define RETURN		RETURN_END; CFI_PUSH (%ebx)
    100 # define PARMS		8		/* Preserve EBX.  */
    101 # define JMPTBL(I, B)	I - B
    102 
    103 /* Load an entry in a jump table into EBX and branch to it.  TABLE is a
    104    jump table with relative offsets.   */
    105 # define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
    106     /* We first load PC into EBX.  */				\
    107     call	__i686.get_pc_thunk.bx;				\
    108     /* Get the address of the jump table.  */			\
    109     add		$(TABLE - .), %ebx;				\
    110     /* Get the entry and convert the relative offset to the	\
    111        absolute address.  */					\
    112     add		(%ebx,%ecx,4), %ebx;				\
    113     add		%ecx, %edx;					\
    114     /* We loaded the jump table and adjuested EDX. Go.  */	\
    115     jmp		*%ebx
    116 
    117 	.section	.gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
    118 	.globl	__i686.get_pc_thunk.bx
    119 	.hidden	__i686.get_pc_thunk.bx
    120 	ALIGN (4)
    121 	.type	__i686.get_pc_thunk.bx,@function
    122 __i686.get_pc_thunk.bx:
    123 	movl	(%esp), %ebx
    124 	ret
    125 #else
    126 # define ENTRANCE
    127 # define RETURN_END	ret
    128 # define RETURN		RETURN_END
    129 # define PARMS		4
    130 # define JMPTBL(I, B)	I
    131 
    132 /* Branch to an entry in a jump table.  TABLE is a jump table with
    133    absolute offsets.  */
    134 # define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
    135     add		%ecx, %edx;					\
    136     jmp		*TABLE(,%ecx,4)
    137 #endif
    138 
    139 	.section .text.sse2,"ax",@progbits
    140 	ALIGN (4)
    141 ENTRY (sse2_memset5_atom)
    142 	ENTRANCE
    143 
    144 	movl	LEN(%esp), %ecx
    145 #ifdef USE_AS_BZERO
    146 	xor	%eax, %eax
    147 #else
    148 	movzbl	CHR(%esp), %eax
    149 	movb	%al, %ah
    150 	/* Fill the whole EAX with pattern.  */
    151 	movl	%eax, %edx
    152 	shl	$16, %eax
    153 	or	%edx, %eax
    154 #endif
    155 	movl	DEST(%esp), %edx
    156 	cmp	$32, %ecx
    157 	jae	L(32bytesormore)
    158 
    159 L(write_less32bytes):
    160 	BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes))
    161 
    162 
    163 	.pushsection .rodata.sse2,"a",@progbits
    164 	ALIGN (2)
    165 L(table_less_32bytes):
    166 	.int	JMPTBL (L(write_0bytes), L(table_less_32bytes))
    167 	.int	JMPTBL (L(write_1bytes), L(table_less_32bytes))
    168 	.int	JMPTBL (L(write_2bytes), L(table_less_32bytes))
    169 	.int	JMPTBL (L(write_3bytes), L(table_less_32bytes))
    170 	.int	JMPTBL (L(write_4bytes), L(table_less_32bytes))
    171 	.int	JMPTBL (L(write_5bytes), L(table_less_32bytes))
    172 	.int	JMPTBL (L(write_6bytes), L(table_less_32bytes))
    173 	.int	JMPTBL (L(write_7bytes), L(table_less_32bytes))
    174 	.int	JMPTBL (L(write_8bytes), L(table_less_32bytes))
    175 	.int	JMPTBL (L(write_9bytes), L(table_less_32bytes))
    176 	.int	JMPTBL (L(write_10bytes), L(table_less_32bytes))
    177 	.int	JMPTBL (L(write_11bytes), L(table_less_32bytes))
    178 	.int	JMPTBL (L(write_12bytes), L(table_less_32bytes))
    179 	.int	JMPTBL (L(write_13bytes), L(table_less_32bytes))
    180 	.int	JMPTBL (L(write_14bytes), L(table_less_32bytes))
    181 	.int	JMPTBL (L(write_15bytes), L(table_less_32bytes))
    182 	.int	JMPTBL (L(write_16bytes), L(table_less_32bytes))
    183 	.int	JMPTBL (L(write_17bytes), L(table_less_32bytes))
    184 	.int	JMPTBL (L(write_18bytes), L(table_less_32bytes))
    185 	.int	JMPTBL (L(write_19bytes), L(table_less_32bytes))
    186 	.int	JMPTBL (L(write_20bytes), L(table_less_32bytes))
    187 	.int	JMPTBL (L(write_21bytes), L(table_less_32bytes))
    188 	.int	JMPTBL (L(write_22bytes), L(table_less_32bytes))
    189 	.int	JMPTBL (L(write_23bytes), L(table_less_32bytes))
    190 	.int	JMPTBL (L(write_24bytes), L(table_less_32bytes))
    191 	.int	JMPTBL (L(write_25bytes), L(table_less_32bytes))
    192 	.int	JMPTBL (L(write_26bytes), L(table_less_32bytes))
    193 	.int	JMPTBL (L(write_27bytes), L(table_less_32bytes))
    194 	.int	JMPTBL (L(write_28bytes), L(table_less_32bytes))
    195 	.int	JMPTBL (L(write_29bytes), L(table_less_32bytes))
    196 	.int	JMPTBL (L(write_30bytes), L(table_less_32bytes))
    197 	.int	JMPTBL (L(write_31bytes), L(table_less_32bytes))
    198 	.popsection
    199 
    200 	ALIGN (4)
    201 L(write_28bytes):
    202 	movl	%eax, -28(%edx)
    203 L(write_24bytes):
    204 	movl	%eax, -24(%edx)
    205 L(write_20bytes):
    206 	movl	%eax, -20(%edx)
    207 L(write_16bytes):
    208 	movl	%eax, -16(%edx)
    209 L(write_12bytes):
    210 	movl	%eax, -12(%edx)
    211 L(write_8bytes):
    212 	movl	%eax, -8(%edx)
    213 L(write_4bytes):
    214 	movl	%eax, -4(%edx)
    215 L(write_0bytes):
    216 	SETRTNVAL
    217 	RETURN
    218 
    219 	ALIGN (4)
    220 L(write_29bytes):
    221 	movl	%eax, -29(%edx)
    222 L(write_25bytes):
    223 	movl	%eax, -25(%edx)
    224 L(write_21bytes):
    225 	movl	%eax, -21(%edx)
    226 L(write_17bytes):
    227 	movl	%eax, -17(%edx)
    228 L(write_13bytes):
    229 	movl	%eax, -13(%edx)
    230 L(write_9bytes):
    231 	movl	%eax, -9(%edx)
    232 L(write_5bytes):
    233 	movl	%eax, -5(%edx)
    234 L(write_1bytes):
    235 	movb	%al, -1(%edx)
    236 	SETRTNVAL
    237 	RETURN
    238 
    239 	ALIGN (4)
    240 L(write_30bytes):
    241 	movl	%eax, -30(%edx)
    242 L(write_26bytes):
    243 	movl	%eax, -26(%edx)
    244 L(write_22bytes):
    245 	movl	%eax, -22(%edx)
    246 L(write_18bytes):
    247 	movl	%eax, -18(%edx)
    248 L(write_14bytes):
    249 	movl	%eax, -14(%edx)
    250 L(write_10bytes):
    251 	movl	%eax, -10(%edx)
    252 L(write_6bytes):
    253 	movl	%eax, -6(%edx)
    254 L(write_2bytes):
    255 	movw	%ax, -2(%edx)
    256 	SETRTNVAL
    257 	RETURN
    258 
    259 	ALIGN (4)
    260 L(write_31bytes):
    261 	movl	%eax, -31(%edx)
    262 L(write_27bytes):
    263 	movl	%eax, -27(%edx)
    264 L(write_23bytes):
    265 	movl	%eax, -23(%edx)
    266 L(write_19bytes):
    267 	movl	%eax, -19(%edx)
    268 L(write_15bytes):
    269 	movl	%eax, -15(%edx)
    270 L(write_11bytes):
    271 	movl	%eax, -11(%edx)
    272 L(write_7bytes):
    273 	movl	%eax, -7(%edx)
    274 L(write_3bytes):
    275 	movw	%ax, -3(%edx)
    276 	movb	%al, -1(%edx)
    277 	SETRTNVAL
    278 	RETURN
    279 
    280 	ALIGN (4)
    281 /* ECX > 32 and EDX is 4 byte aligned.  */
    282 L(32bytesormore):
    283 	/* Fill xmm0 with the pattern.  */
    284 #ifdef USE_AS_BZERO
    285 	pxor	%xmm0, %xmm0
    286 #else
    287 	movd	%eax, %xmm0
    288 	punpcklbw %xmm0, %xmm0
    289 	pshufd	$0, %xmm0, %xmm0
    290 #endif
    291 	testl	$0xf, %edx
    292 	jz	L(aligned_16)
    293 /* ECX > 32 and EDX is not 16 byte aligned.  */
    294 L(not_aligned_16):
    295 	movdqu	%xmm0, (%edx)
    296 	movl	%edx, %eax
    297 	and	$-16, %edx
    298 	add	$16, %edx
    299 	sub	%edx, %eax
    300 	add	%eax, %ecx
    301 	movd	%xmm0, %eax
    302 
    303 	ALIGN (4)
    304 L(aligned_16):
    305 	cmp	$128, %ecx
    306 	jae	L(128bytesormore)
    307 
    308 L(aligned_16_less128bytes):
    309 	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
    310 
    311 	ALIGN (4)
    312 L(128bytesormore):
    313 #ifdef SHARED_CACHE_SIZE
    314 	PUSH (%ebx)
    315 	mov	$SHARED_CACHE_SIZE, %ebx
    316 #else
    317 # ifdef SHARED
    318 	call	__i686.get_pc_thunk.bx
    319 	add	$_GLOBAL_OFFSET_TABLE_, %ebx
    320 	mov	__x86_shared_cache_size@GOTOFF(%ebx), %ebx
    321 # else
    322 	PUSH (%ebx)
    323 	mov	__x86_shared_cache_size, %ebx
    324 # endif
    325 #endif
    326 	cmp	%ebx, %ecx
    327 	jae	L(128bytesormore_nt_start)
    328 
    329 
    330 #ifdef DATA_CACHE_SIZE
    331 	POP (%ebx)
    332 	cmp	$DATA_CACHE_SIZE, %ecx
    333 #else
    334 # ifdef SHARED
    335 	call	__i686.get_pc_thunk.bx
    336 	add	$_GLOBAL_OFFSET_TABLE_, %ebx
    337 	cmp	__x86_data_cache_size@GOTOFF(%ebx), %ecx
    338 # else
    339 	POP (%ebx)
    340 	cmp	__x86_data_cache_size, %ecx
    341 # endif
    342 #endif
    343 
    344 	jae	L(128bytes_L2_normal)
    345 	subl	$128, %ecx
    346 L(128bytesormore_normal):
    347 	sub	$128, %ecx
    348 	movdqa	%xmm0, (%edx)
    349 	movdqa	%xmm0, 0x10(%edx)
    350 	movdqa	%xmm0, 0x20(%edx)
    351 	movdqa	%xmm0, 0x30(%edx)
    352 	movdqa	%xmm0, 0x40(%edx)
    353 	movdqa	%xmm0, 0x50(%edx)
    354 	movdqa	%xmm0, 0x60(%edx)
    355 	movdqa	%xmm0, 0x70(%edx)
    356 	lea	128(%edx), %edx
    357 	jb	L(128bytesless_normal)
    358 
    359 
    360 	sub	$128, %ecx
    361 	movdqa	%xmm0, (%edx)
    362 	movdqa	%xmm0, 0x10(%edx)
    363 	movdqa	%xmm0, 0x20(%edx)
    364 	movdqa	%xmm0, 0x30(%edx)
    365 	movdqa	%xmm0, 0x40(%edx)
    366 	movdqa	%xmm0, 0x50(%edx)
    367 	movdqa	%xmm0, 0x60(%edx)
    368 	movdqa	%xmm0, 0x70(%edx)
    369 	lea	128(%edx), %edx
    370 	jae	L(128bytesormore_normal)
    371 
    372 L(128bytesless_normal):
    373 	lea	128(%ecx), %ecx
    374 	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
    375 
    376 	ALIGN (4)
    377 L(128bytes_L2_normal):
    378 	prefetcht0	0x380(%edx)
    379 	prefetcht0	0x3c0(%edx)
    380 	sub	$128, %ecx
    381 	movdqa	%xmm0, (%edx)
    382 	movaps	%xmm0, 0x10(%edx)
    383 	movaps	%xmm0, 0x20(%edx)
    384 	movaps	%xmm0, 0x30(%edx)
    385 	movaps	%xmm0, 0x40(%edx)
    386 	movaps	%xmm0, 0x50(%edx)
    387 	movaps	%xmm0, 0x60(%edx)
    388 	movaps	%xmm0, 0x70(%edx)
    389 	add	$128, %edx
    390 	cmp	$128, %ecx
    391 	jae	L(128bytes_L2_normal)
    392 
    393 L(128bytesless_L2_normal):
    394 	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
    395 
    396 L(128bytesormore_nt_start):
    397 	sub	%ebx, %ecx
    398 	ALIGN (4)
    399 L(128bytesormore_shared_cache_loop):
    400 	prefetcht0	0x3c0(%edx)
    401 	prefetcht0	0x380(%edx)
    402 	sub	$0x80, %ebx
    403 	movdqa	%xmm0, (%edx)
    404 	movdqa	%xmm0, 0x10(%edx)
    405 	movdqa	%xmm0, 0x20(%edx)
    406 	movdqa	%xmm0, 0x30(%edx)
    407 	movdqa	%xmm0, 0x40(%edx)
    408 	movdqa	%xmm0, 0x50(%edx)
    409 	movdqa	%xmm0, 0x60(%edx)
    410 	movdqa	%xmm0, 0x70(%edx)
    411 	add	$0x80, %edx
    412 	cmp	$0x80, %ebx
    413 	jae	L(128bytesormore_shared_cache_loop)
    414 	cmp	$0x80, %ecx
    415 	jb	L(shared_cache_loop_end)
    416 	ALIGN (4)
    417 L(128bytesormore_nt):
    418 	sub	$0x80, %ecx
    419 	movntdq	%xmm0, (%edx)
    420 	movntdq	%xmm0, 0x10(%edx)
    421 	movntdq	%xmm0, 0x20(%edx)
    422 	movntdq	%xmm0, 0x30(%edx)
    423 	movntdq	%xmm0, 0x40(%edx)
    424 	movntdq	%xmm0, 0x50(%edx)
    425 	movntdq	%xmm0, 0x60(%edx)
    426 	movntdq	%xmm0, 0x70(%edx)
    427 	add	$0x80, %edx
    428 	cmp	$0x80, %ecx
    429 	jae	L(128bytesormore_nt)
    430 	sfence
    431 L(shared_cache_loop_end):
    432 #if defined DATA_CACHE_SIZE || !defined SHARED
    433 	POP (%ebx)
    434 #endif
    435 	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
    436 
    437 
    438 	.pushsection .rodata.sse2,"a",@progbits
    439 	ALIGN (2)
    440 L(table_16_128bytes):
    441 	.int	JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
    442 	.int	JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes))
    443 	.int	JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes))
    444 	.int	JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes))
    445 	.int	JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
    446 	.int	JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes))
    447 	.int	JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes))
    448 	.int	JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes))
    449 	.int	JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
    450 	.int	JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes))
    451 	.int	JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes))
    452 	.int	JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes))
    453 	.int	JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
    454 	.int	JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes))
    455 	.int	JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes))
    456 	.int	JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes))
    457 	.int	JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
    458 	.int	JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes))
    459 	.int	JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes))
    460 	.int	JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes))
    461 	.int	JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
    462 	.int	JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes))
    463 	.int	JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes))
    464 	.int	JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes))
    465 	.int	JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
    466 	.int	JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes))
    467 	.int	JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes))
    468 	.int	JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes))
    469 	.int	JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
    470 	.int	JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes))
    471 	.int	JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes))
    472 	.int	JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes))
    473 	.int	JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
    474 	.int	JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes))
    475 	.int	JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes))
    476 	.int	JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes))
    477 	.int	JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
    478 	.int	JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes))
    479 	.int	JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes))
    480 	.int	JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes))
    481 	.int	JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
    482 	.int	JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes))
    483 	.int	JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes))
    484 	.int	JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes))
    485 	.int	JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
    486 	.int	JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes))
    487 	.int	JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes))
    488 	.int	JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes))
    489 	.int	JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
    490 	.int	JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes))
    491 	.int	JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes))
    492 	.int	JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes))
    493 	.int	JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
    494 	.int	JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes))
    495 	.int	JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes))
    496 	.int	JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes))
    497 	.int	JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
    498 	.int	JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes))
    499 	.int	JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes))
    500 	.int	JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes))
    501 	.int	JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
    502 	.int	JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes))
    503 	.int	JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes))
    504 	.int	JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes))
    505 	.int	JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
    506 	.int	JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes))
    507 	.int	JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes))
    508 	.int	JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes))
    509 	.int	JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
    510 	.int	JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes))
    511 	.int	JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes))
    512 	.int	JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes))
    513 	.int	JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
    514 	.int	JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes))
    515 	.int	JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes))
    516 	.int	JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes))
    517 	.int	JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
    518 	.int	JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes))
    519 	.int	JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes))
    520 	.int	JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes))
    521 	.int	JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
    522 	.int	JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes))
    523 	.int	JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes))
    524 	.int	JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes))
    525 	.int	JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
    526 	.int	JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes))
    527 	.int	JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes))
    528 	.int	JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes))
    529 	.int	JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
    530 	.int	JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes))
    531 	.int	JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes))
    532 	.int	JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes))
    533 	.int	JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
    534 	.int	JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes))
    535 	.int	JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes))
    536 	.int	JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes))
    537 	.int	JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
    538 	.int	JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes))
    539 	.int	JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes))
    540 	.int	JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes))
    541 	.int	JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
    542 	.int	JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes))
    543 	.int	JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes))
    544 	.int	JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes))
    545 	.int	JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
    546 	.int	JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes))
    547 	.int	JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes))
    548 	.int	JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes))
    549 	.int	JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
    550 	.int	JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes))
    551 	.int	JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes))
    552 	.int	JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes))
    553 	.int	JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
    554 	.int	JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes))
    555 	.int	JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes))
    556 	.int	JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes))
    557 	.int	JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
    558 	.int	JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes))
    559 	.int	JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes))
    560 	.int	JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes))
    561 	.int	JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
    562 	.int	JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes))
    563 	.int	JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes))
    564 	.int	JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes))
    565 	.int	JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
    566 	.int	JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes))
    567 	.int	JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes))
    568 	.int	JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes))
    569 	.popsection
    570 
    571 	ALIGN (4)
    572 L(aligned_16_112bytes):
    573 	movdqa	%xmm0, -112(%edx)
    574 L(aligned_16_96bytes):
    575 	movdqa	%xmm0, -96(%edx)
    576 L(aligned_16_80bytes):
    577 	movdqa	%xmm0, -80(%edx)
    578 L(aligned_16_64bytes):
    579 	movdqa	%xmm0, -64(%edx)
    580 L(aligned_16_48bytes):
    581 	movdqa	%xmm0, -48(%edx)
    582 L(aligned_16_32bytes):
    583 	movdqa	%xmm0, -32(%edx)
    584 L(aligned_16_16bytes):
    585 	movdqa	%xmm0, -16(%edx)
    586 L(aligned_16_0bytes):
    587 	SETRTNVAL
    588 	RETURN
    589 
    590 	ALIGN (4)
    591 L(aligned_16_113bytes):
    592 	movdqa	%xmm0, -113(%edx)
    593 L(aligned_16_97bytes):
    594 	movdqa	%xmm0, -97(%edx)
    595 L(aligned_16_81bytes):
    596 	movdqa	%xmm0, -81(%edx)
    597 L(aligned_16_65bytes):
    598 	movdqa	%xmm0, -65(%edx)
    599 L(aligned_16_49bytes):
    600 	movdqa	%xmm0, -49(%edx)
    601 L(aligned_16_33bytes):
    602 	movdqa	%xmm0, -33(%edx)
    603 L(aligned_16_17bytes):
    604 	movdqa	%xmm0, -17(%edx)
    605 L(aligned_16_1bytes):
    606 	movb	%al, -1(%edx)
    607 	SETRTNVAL
    608 	RETURN
    609 
    610 	ALIGN (4)
    611 L(aligned_16_114bytes):
    612 	movdqa	%xmm0, -114(%edx)
    613 L(aligned_16_98bytes):
    614 	movdqa	%xmm0, -98(%edx)
    615 L(aligned_16_82bytes):
    616 	movdqa	%xmm0, -82(%edx)
    617 L(aligned_16_66bytes):
    618 	movdqa	%xmm0, -66(%edx)
    619 L(aligned_16_50bytes):
    620 	movdqa	%xmm0, -50(%edx)
    621 L(aligned_16_34bytes):
    622 	movdqa	%xmm0, -34(%edx)
    623 L(aligned_16_18bytes):
    624 	movdqa	%xmm0, -18(%edx)
    625 L(aligned_16_2bytes):
    626 	movw	%ax, -2(%edx)
    627 	SETRTNVAL
    628 	RETURN
    629 
    630 	ALIGN (4)
    631 L(aligned_16_115bytes):
    632 	movdqa	%xmm0, -115(%edx)
    633 L(aligned_16_99bytes):
    634 	movdqa	%xmm0, -99(%edx)
    635 L(aligned_16_83bytes):
    636 	movdqa	%xmm0, -83(%edx)
    637 L(aligned_16_67bytes):
    638 	movdqa	%xmm0, -67(%edx)
    639 L(aligned_16_51bytes):
    640 	movdqa	%xmm0, -51(%edx)
    641 L(aligned_16_35bytes):
    642 	movdqa	%xmm0, -35(%edx)
    643 L(aligned_16_19bytes):
    644 	movdqa	%xmm0, -19(%edx)
    645 L(aligned_16_3bytes):
    646 	movw	%ax, -3(%edx)
    647 	movb	%al, -1(%edx)
    648 	SETRTNVAL
    649 	RETURN
    650 
    651 	ALIGN (4)
    652 L(aligned_16_116bytes):
    653 	movdqa	%xmm0, -116(%edx)
    654 L(aligned_16_100bytes):
    655 	movdqa	%xmm0, -100(%edx)
    656 L(aligned_16_84bytes):
    657 	movdqa	%xmm0, -84(%edx)
    658 L(aligned_16_68bytes):
    659 	movdqa	%xmm0, -68(%edx)
    660 L(aligned_16_52bytes):
    661 	movdqa	%xmm0, -52(%edx)
    662 L(aligned_16_36bytes):
    663 	movdqa	%xmm0, -36(%edx)
    664 L(aligned_16_20bytes):
    665 	movdqa	%xmm0, -20(%edx)
    666 L(aligned_16_4bytes):
    667 	movl	%eax, -4(%edx)
    668 	SETRTNVAL
    669 	RETURN
    670 
    671 	ALIGN (4)
    672 L(aligned_16_117bytes):
    673 	movdqa	%xmm0, -117(%edx)
    674 L(aligned_16_101bytes):
    675 	movdqa	%xmm0, -101(%edx)
    676 L(aligned_16_85bytes):
    677 	movdqa	%xmm0, -85(%edx)
    678 L(aligned_16_69bytes):
    679 	movdqa	%xmm0, -69(%edx)
    680 L(aligned_16_53bytes):
    681 	movdqa	%xmm0, -53(%edx)
    682 L(aligned_16_37bytes):
    683 	movdqa	%xmm0, -37(%edx)
    684 L(aligned_16_21bytes):
    685 	movdqa	%xmm0, -21(%edx)
    686 L(aligned_16_5bytes):
    687 	movl	%eax, -5(%edx)
    688 	movb	%al, -1(%edx)
    689 	SETRTNVAL
    690 	RETURN
    691 
    692 	ALIGN (4)
    693 L(aligned_16_118bytes):
    694 	movdqa	%xmm0, -118(%edx)
    695 L(aligned_16_102bytes):
    696 	movdqa	%xmm0, -102(%edx)
    697 L(aligned_16_86bytes):
    698 	movdqa	%xmm0, -86(%edx)
    699 L(aligned_16_70bytes):
    700 	movdqa	%xmm0, -70(%edx)
    701 L(aligned_16_54bytes):
    702 	movdqa	%xmm0, -54(%edx)
    703 L(aligned_16_38bytes):
    704 	movdqa	%xmm0, -38(%edx)
    705 L(aligned_16_22bytes):
    706 	movdqa	%xmm0, -22(%edx)
    707 L(aligned_16_6bytes):
    708 	movl	%eax, -6(%edx)
    709 	movw	%ax, -2(%edx)
    710 	SETRTNVAL
    711 	RETURN
    712 
    713 	ALIGN (4)
    714 L(aligned_16_119bytes):
    715 	movdqa	%xmm0, -119(%edx)
    716 L(aligned_16_103bytes):
    717 	movdqa	%xmm0, -103(%edx)
    718 L(aligned_16_87bytes):
    719 	movdqa	%xmm0, -87(%edx)
    720 L(aligned_16_71bytes):
    721 	movdqa	%xmm0, -71(%edx)
    722 L(aligned_16_55bytes):
    723 	movdqa	%xmm0, -55(%edx)
    724 L(aligned_16_39bytes):
    725 	movdqa	%xmm0, -39(%edx)
    726 L(aligned_16_23bytes):
    727 	movdqa	%xmm0, -23(%edx)
    728 L(aligned_16_7bytes):
    729 	movl	%eax, -7(%edx)
    730 	movw	%ax, -3(%edx)
    731 	movb	%al, -1(%edx)
    732 	SETRTNVAL
    733 	RETURN
    734 
    735 	ALIGN (4)
    736 L(aligned_16_120bytes):
    737 	movdqa	%xmm0, -120(%edx)
    738 L(aligned_16_104bytes):
    739 	movdqa	%xmm0, -104(%edx)
    740 L(aligned_16_88bytes):
    741 	movdqa	%xmm0, -88(%edx)
    742 L(aligned_16_72bytes):
    743 	movdqa	%xmm0, -72(%edx)
    744 L(aligned_16_56bytes):
    745 	movdqa	%xmm0, -56(%edx)
    746 L(aligned_16_40bytes):
    747 	movdqa	%xmm0, -40(%edx)
    748 L(aligned_16_24bytes):
    749 	movdqa	%xmm0, -24(%edx)
    750 L(aligned_16_8bytes):
    751 	movq	%xmm0, -8(%edx)
    752 	SETRTNVAL
    753 	RETURN
    754 
    755 	ALIGN (4)
    756 L(aligned_16_121bytes):
    757 	movdqa	%xmm0, -121(%edx)
    758 L(aligned_16_105bytes):
    759 	movdqa	%xmm0, -105(%edx)
    760 L(aligned_16_89bytes):
    761 	movdqa	%xmm0, -89(%edx)
    762 L(aligned_16_73bytes):
    763 	movdqa	%xmm0, -73(%edx)
    764 L(aligned_16_57bytes):
    765 	movdqa	%xmm0, -57(%edx)
    766 L(aligned_16_41bytes):
    767 	movdqa	%xmm0, -41(%edx)
    768 L(aligned_16_25bytes):
    769 	movdqa	%xmm0, -25(%edx)
    770 L(aligned_16_9bytes):
    771 	movq	%xmm0, -9(%edx)
    772 	movb	%al, -1(%edx)
    773 	SETRTNVAL
    774 	RETURN
    775 
    776 	ALIGN (4)
    777 L(aligned_16_122bytes):
    778 	movdqa	%xmm0, -122(%edx)
    779 L(aligned_16_106bytes):
    780 	movdqa	%xmm0, -106(%edx)
    781 L(aligned_16_90bytes):
    782 	movdqa	%xmm0, -90(%edx)
    783 L(aligned_16_74bytes):
    784 	movdqa	%xmm0, -74(%edx)
    785 L(aligned_16_58bytes):
    786 	movdqa	%xmm0, -58(%edx)
    787 L(aligned_16_42bytes):
    788 	movdqa	%xmm0, -42(%edx)
    789 L(aligned_16_26bytes):
    790 	movdqa	%xmm0, -26(%edx)
    791 L(aligned_16_10bytes):
    792 	movq	%xmm0, -10(%edx)
    793 	movw	%ax, -2(%edx)
    794 	SETRTNVAL
    795 	RETURN
    796 
    797 	ALIGN (4)
    798 L(aligned_16_123bytes):
    799 	movdqa	%xmm0, -123(%edx)
    800 L(aligned_16_107bytes):
    801 	movdqa	%xmm0, -107(%edx)
    802 L(aligned_16_91bytes):
    803 	movdqa	%xmm0, -91(%edx)
    804 L(aligned_16_75bytes):
    805 	movdqa	%xmm0, -75(%edx)
    806 L(aligned_16_59bytes):
    807 	movdqa	%xmm0, -59(%edx)
    808 L(aligned_16_43bytes):
    809 	movdqa	%xmm0, -43(%edx)
    810 L(aligned_16_27bytes):
    811 	movdqa	%xmm0, -27(%edx)
    812 L(aligned_16_11bytes):
    813 	movq	%xmm0, -11(%edx)
    814 	movw	%ax, -3(%edx)
    815 	movb	%al, -1(%edx)
    816 	SETRTNVAL
    817 	RETURN
    818 
    819 	ALIGN (4)
    820 L(aligned_16_124bytes):
    821 	movdqa	%xmm0, -124(%edx)
    822 L(aligned_16_108bytes):
    823 	movdqa	%xmm0, -108(%edx)
    824 L(aligned_16_92bytes):
    825 	movdqa	%xmm0, -92(%edx)
    826 L(aligned_16_76bytes):
    827 	movdqa	%xmm0, -76(%edx)
    828 L(aligned_16_60bytes):
    829 	movdqa	%xmm0, -60(%edx)
    830 L(aligned_16_44bytes):
    831 	movdqa	%xmm0, -44(%edx)
    832 L(aligned_16_28bytes):
    833 	movdqa	%xmm0, -28(%edx)
    834 L(aligned_16_12bytes):
    835 	movq	%xmm0, -12(%edx)
    836 	movl	%eax, -4(%edx)
    837 	SETRTNVAL
    838 	RETURN
    839 
    840 	ALIGN (4)
    841 L(aligned_16_125bytes):
    842 	movdqa	%xmm0, -125(%edx)
    843 L(aligned_16_109bytes):
    844 	movdqa	%xmm0, -109(%edx)
    845 L(aligned_16_93bytes):
    846 	movdqa	%xmm0, -93(%edx)
    847 L(aligned_16_77bytes):
    848 	movdqa	%xmm0, -77(%edx)
    849 L(aligned_16_61bytes):
    850 	movdqa	%xmm0, -61(%edx)
    851 L(aligned_16_45bytes):
    852 	movdqa	%xmm0, -45(%edx)
    853 L(aligned_16_29bytes):
    854 	movdqa	%xmm0, -29(%edx)
    855 L(aligned_16_13bytes):
    856 	movq	%xmm0, -13(%edx)
    857 	movl	%eax, -5(%edx)
    858 	movb	%al, -1(%edx)
    859 	SETRTNVAL
    860 	RETURN
    861 
    862 	ALIGN (4)
    863 L(aligned_16_126bytes):
    864 	movdqa	%xmm0, -126(%edx)
    865 L(aligned_16_110bytes):
    866 	movdqa	%xmm0, -110(%edx)
    867 L(aligned_16_94bytes):
    868 	movdqa	%xmm0, -94(%edx)
    869 L(aligned_16_78bytes):
    870 	movdqa	%xmm0, -78(%edx)
    871 L(aligned_16_62bytes):
    872 	movdqa	%xmm0, -62(%edx)
    873 L(aligned_16_46bytes):
    874 	movdqa	%xmm0, -46(%edx)
    875 L(aligned_16_30bytes):
    876 	movdqa	%xmm0, -30(%edx)
    877 L(aligned_16_14bytes):
    878 	movq	%xmm0, -14(%edx)
    879 	movl	%eax, -6(%edx)
    880 	movw	%ax, -2(%edx)
    881 	SETRTNVAL
    882 	RETURN
    883 
    884 	ALIGN (4)
    885 L(aligned_16_127bytes):
    886 	movdqa	%xmm0, -127(%edx)
    887 L(aligned_16_111bytes):
    888 	movdqa	%xmm0, -111(%edx)
    889 L(aligned_16_95bytes):
    890 	movdqa	%xmm0, -95(%edx)
    891 L(aligned_16_79bytes):
    892 	movdqa	%xmm0, -79(%edx)
    893 L(aligned_16_63bytes):
    894 	movdqa	%xmm0, -63(%edx)
    895 L(aligned_16_47bytes):
    896 	movdqa	%xmm0, -47(%edx)
    897 L(aligned_16_31bytes):
    898 	movdqa	%xmm0, -31(%edx)
    899 L(aligned_16_15bytes):
    900 	movq	%xmm0, -15(%edx)
    901 	movl	%eax, -7(%edx)
    902 	movw	%ax, -3(%edx)
    903 	movb	%al, -1(%edx)
    904 	SETRTNVAL
    905 	RETURN_END
    906 
    907 END (sse2_memset5_atom)
    908