Home | History | Annotate | Download | only in string
      1 /*
      2 Copyright (c) 2010, Intel Corporation
      3 All rights reserved.
      4 
      5 Redistribution and use in source and binary forms, with or without
      6 modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9     * this list of conditions and the following disclaimer.
     10 
     11     * Redistributions in binary form must reproduce the above copyright notice,
     12     * this list of conditions and the following disclaimer in the documentation
     13     * and/or other materials provided with the distribution.
     14 
     15     * Neither the name of Intel Corporation nor the names of its contributors
     16     * may be used to endorse or promote products derived from this software
     17     * without specific prior written permission.
     18 
     19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 */
     30 
     31 #ifndef L
     32 # define L(label)	.L##label
     33 #endif
     34 
     35 #ifndef ALIGN
     36 # define ALIGN(n)	.p2align n
     37 #endif
     38 
     39 #ifndef cfi_startproc
     40 # define cfi_startproc			.cfi_startproc
     41 #endif
     42 
     43 #ifndef cfi_endproc
     44 # define cfi_endproc			.cfi_endproc
     45 #endif
     46 
     47 #ifndef cfi_rel_offset
     48 # define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
     49 #endif
     50 
     51 #ifndef cfi_restore
     52 # define cfi_restore(reg)		.cfi_restore reg
     53 #endif
     54 
     55 #ifndef cfi_adjust_cfa_offset
     56 # define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
     57 #endif
     58 
     59 #ifndef ENTRY
     60 # define ENTRY(name)			\
     61 	.type name,  @function; 	\
     62 	.globl name;			\
     63 	.p2align 4;			\
     64 name:					\
     65 	cfi_startproc
     66 #endif
     67 
     68 #ifndef END
     69 # define END(name)			\
     70 	cfi_endproc;			\
     71 	.size name, .-name
     72 #endif
     73 
     74 #define CFI_PUSH(REG)						\
     75   cfi_adjust_cfa_offset (4);					\
     76   cfi_rel_offset (REG, 0)
     77 
     78 #define CFI_POP(REG)						\
     79   cfi_adjust_cfa_offset (-4);					\
     80   cfi_restore (REG)
     81 
     82 #define PUSH(REG)	pushl REG; CFI_PUSH (REG)
     83 #define POP(REG)	popl REG; CFI_POP (REG)
     84 
     85 #ifdef USE_AS_BZERO
     86 # define DEST		PARMS
     87 # define LEN		DEST+4
     88 # define SETRTNVAL
     89 #else
     90 # define DEST		PARMS
     91 # define CHR		DEST+4
     92 # define LEN		CHR+4
     93 # define SETRTNVAL	movl DEST(%esp), %eax
     94 #endif
     95 
     96 #if (defined SHARED || defined __PIC__)
     97 # define ENTRANCE	PUSH (%ebx);
     98 # define RETURN_END	POP (%ebx); ret
     99 # define RETURN		RETURN_END; CFI_PUSH (%ebx)
    100 # define PARMS		8		/* Preserve EBX.  */
    101 # define JMPTBL(I, B)	I - B
    102 
    103 /* Load an entry in a jump table into EBX and branch to it.  TABLE is a
    104    jump table with relative offsets.   */
    105 # define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
    106     /* We first load PC into EBX.  */				\
    107     call	__i686.get_pc_thunk.bx;				\
    108     /* Get the address of the jump table.  */			\
    109     add		$(TABLE - .), %ebx;				\
    110     /* Get the entry and convert the relative offset to the	\
    111        absolute address.  */					\
    112     add		(%ebx,%ecx,4), %ebx;				\
    113     add		%ecx, %edx;					\
    114     /* We loaded the jump table and adjuested EDX. Go.  */	\
    115     jmp		*%ebx
    116 
    117 	.section	.gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
    118 	.globl	__i686.get_pc_thunk.bx
    119 	.hidden	__i686.get_pc_thunk.bx
    120 	ALIGN (4)
    121 	.type	__i686.get_pc_thunk.bx,@function
    122 __i686.get_pc_thunk.bx:
    123 	movl	(%esp), %ebx
    124 	ret
    125 #else
    126 # define ENTRANCE
    127 # define RETURN_END	ret
    128 # define RETURN		RETURN_END
    129 # define PARMS		4
    130 # define JMPTBL(I, B)	I
    131 
    132 /* Branch to an entry in a jump table.  TABLE is a jump table with
    133    absolute offsets.  */
    134 # define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
    135     add		%ecx, %edx;					\
    136     jmp		*TABLE(,%ecx,4)
    137 #endif
    138 
    139 	.section .text.sse2,"ax",@progbits
    140 	ALIGN (4)
    141 ENTRY (sse2_memset5_atom)
    142 	ENTRANCE
    143 
    144 	movl	LEN(%esp), %ecx
    145 #ifdef USE_AS_BZERO
    146 	xor	%eax, %eax
    147 #else
    148 	movzbl	CHR(%esp), %eax
    149 	movb	%al, %ah
    150 	/* Fill the whole EAX with pattern.  */
    151 	movl	%eax, %edx
    152 	shl	$16, %eax
    153 	or	%edx, %eax
    154 #endif
    155 	movl	DEST(%esp), %edx
    156 	cmp	$32, %ecx
    157 	jae	L(32bytesormore)
    158 
    159 L(write_less32bytes):
    160 	BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes))
    161 
    162 
    163 	.pushsection .rodata.sse2,"a",@progbits
    164 	ALIGN (2)
    165 L(table_less_32bytes):
    166 	.int	JMPTBL (L(write_0bytes), L(table_less_32bytes))
    167 	.int	JMPTBL (L(write_1bytes), L(table_less_32bytes))
    168 	.int	JMPTBL (L(write_2bytes), L(table_less_32bytes))
    169 	.int	JMPTBL (L(write_3bytes), L(table_less_32bytes))
    170 	.int	JMPTBL (L(write_4bytes), L(table_less_32bytes))
    171 	.int	JMPTBL (L(write_5bytes), L(table_less_32bytes))
    172 	.int	JMPTBL (L(write_6bytes), L(table_less_32bytes))
    173 	.int	JMPTBL (L(write_7bytes), L(table_less_32bytes))
    174 	.int	JMPTBL (L(write_8bytes), L(table_less_32bytes))
    175 	.int	JMPTBL (L(write_9bytes), L(table_less_32bytes))
    176 	.int	JMPTBL (L(write_10bytes), L(table_less_32bytes))
    177 	.int	JMPTBL (L(write_11bytes), L(table_less_32bytes))
    178 	.int	JMPTBL (L(write_12bytes), L(table_less_32bytes))
    179 	.int	JMPTBL (L(write_13bytes), L(table_less_32bytes))
    180 	.int	JMPTBL (L(write_14bytes), L(table_less_32bytes))
    181 	.int	JMPTBL (L(write_15bytes), L(table_less_32bytes))
    182 	.int	JMPTBL (L(write_16bytes), L(table_less_32bytes))
    183 	.int	JMPTBL (L(write_17bytes), L(table_less_32bytes))
    184 	.int	JMPTBL (L(write_18bytes), L(table_less_32bytes))
    185 	.int	JMPTBL (L(write_19bytes), L(table_less_32bytes))
    186 	.int	JMPTBL (L(write_20bytes), L(table_less_32bytes))
    187 	.int	JMPTBL (L(write_21bytes), L(table_less_32bytes))
    188 	.int	JMPTBL (L(write_22bytes), L(table_less_32bytes))
    189 	.int	JMPTBL (L(write_23bytes), L(table_less_32bytes))
    190 	.int	JMPTBL (L(write_24bytes), L(table_less_32bytes))
    191 	.int	JMPTBL (L(write_25bytes), L(table_less_32bytes))
    192 	.int	JMPTBL (L(write_26bytes), L(table_less_32bytes))
    193 	.int	JMPTBL (L(write_27bytes), L(table_less_32bytes))
    194 	.int	JMPTBL (L(write_28bytes), L(table_less_32bytes))
    195 	.int	JMPTBL (L(write_29bytes), L(table_less_32bytes))
    196 	.int	JMPTBL (L(write_30bytes), L(table_less_32bytes))
    197 	.int	JMPTBL (L(write_31bytes), L(table_less_32bytes))
    198 	.popsection
    199 
    200 	ALIGN (4)
    201 L(write_28bytes):
    202 	movl	%eax, -28(%edx)
    203 L(write_24bytes):
    204 	movl	%eax, -24(%edx)
    205 L(write_20bytes):
    206 	movl	%eax, -20(%edx)
    207 L(write_16bytes):
    208 	movl	%eax, -16(%edx)
    209 L(write_12bytes):
    210 	movl	%eax, -12(%edx)
    211 L(write_8bytes):
    212 	movl	%eax, -8(%edx)
    213 L(write_4bytes):
    214 	movl	%eax, -4(%edx)
    215 L(write_0bytes):
    216 	SETRTNVAL
    217 	RETURN
    218 
    219 	ALIGN (4)
    220 L(write_29bytes):
    221 	movl	%eax, -29(%edx)
    222 L(write_25bytes):
    223 	movl	%eax, -25(%edx)
    224 L(write_21bytes):
    225 	movl	%eax, -21(%edx)
    226 L(write_17bytes):
    227 	movl	%eax, -17(%edx)
    228 L(write_13bytes):
    229 	movl	%eax, -13(%edx)
    230 L(write_9bytes):
    231 	movl	%eax, -9(%edx)
    232 L(write_5bytes):
    233 	movl	%eax, -5(%edx)
    234 L(write_1bytes):
    235 	movb	%al, -1(%edx)
    236 	SETRTNVAL
    237 	RETURN
    238 
    239 	ALIGN (4)
    240 L(write_30bytes):
    241 	movl	%eax, -30(%edx)
    242 L(write_26bytes):
    243 	movl	%eax, -26(%edx)
    244 L(write_22bytes):
    245 	movl	%eax, -22(%edx)
    246 L(write_18bytes):
    247 	movl	%eax, -18(%edx)
    248 L(write_14bytes):
    249 	movl	%eax, -14(%edx)
    250 L(write_10bytes):
    251 	movl	%eax, -10(%edx)
    252 L(write_6bytes):
    253 	movl	%eax, -6(%edx)
    254 L(write_2bytes):
    255 	movw	%ax, -2(%edx)
    256 	SETRTNVAL
    257 	RETURN
    258 
    259 	ALIGN (4)
    260 L(write_31bytes):
    261 	movl	%eax, -31(%edx)
    262 L(write_27bytes):
    263 	movl	%eax, -27(%edx)
    264 L(write_23bytes):
    265 	movl	%eax, -23(%edx)
    266 L(write_19bytes):
    267 	movl	%eax, -19(%edx)
    268 L(write_15bytes):
    269 	movl	%eax, -15(%edx)
    270 L(write_11bytes):
    271 	movl	%eax, -11(%edx)
    272 L(write_7bytes):
    273 	movl	%eax, -7(%edx)
    274 L(write_3bytes):
    275 	movw	%ax, -3(%edx)
    276 	movb	%al, -1(%edx)
    277 	SETRTNVAL
    278 	RETURN
    279 
    280 	ALIGN (4)
    281 /* ECX > 32 and EDX is 4 byte aligned.  */
    282 L(32bytesormore):
    283 	/* Fill xmm0 with the pattern.  */
    284 #ifdef USE_AS_BZERO
    285 	pxor	%xmm0, %xmm0
    286 #else
    287 	movd	%eax, %xmm0
    288 	pshufd	$0, %xmm0, %xmm0
    289 #endif
    290 	testl	$0xf, %edx
    291 	jz	L(aligned_16)
    292 /* ECX > 32 and EDX is not 16 byte aligned.  */
    293 L(not_aligned_16):
    294 	movdqu	%xmm0, (%edx)
    295 	movl	%edx, %eax
    296 	and	$-16, %edx
    297 	add	$16, %edx
    298 	sub	%edx, %eax
    299 	add	%eax, %ecx
    300 	movd	%xmm0, %eax
    301 
    302 	ALIGN (4)
    303 L(aligned_16):
    304 	cmp	$128, %ecx
    305 	jae	L(128bytesormore)
    306 
    307 L(aligned_16_less128bytes):
    308 	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
    309 
    310 	ALIGN (4)
    311 L(128bytesormore):
    312 #ifdef SHARED_CACHE_SIZE
    313 	PUSH (%ebx)
    314 	mov	$SHARED_CACHE_SIZE, %ebx
    315 #else
    316 # if (defined SHARED || defined __PIC__)
    317 	call	__i686.get_pc_thunk.bx
    318 	add	$_GLOBAL_OFFSET_TABLE_, %ebx
    319 	mov	__x86_shared_cache_size@GOTOFF(%ebx), %ebx
    320 # else
    321 	PUSH (%ebx)
    322 	mov	__x86_shared_cache_size, %ebx
    323 # endif
    324 #endif
    325 	cmp	%ebx, %ecx
    326 	jae	L(128bytesormore_nt_start)
    327 
    328 
    329 #ifdef DATA_CACHE_SIZE
    330 	POP (%ebx)
    331 # define RESTORE_EBX_STATE CFI_PUSH (%ebx)
    332 	cmp	$DATA_CACHE_SIZE, %ecx
    333 #else
    334 # if (defined SHARED || defined __PIC__)
    335 #  define RESTORE_EBX_STATE
    336 	call	__i686.get_pc_thunk.bx
    337 	add	$_GLOBAL_OFFSET_TABLE_, %ebx
    338 	cmp	__x86_data_cache_size@GOTOFF(%ebx), %ecx
    339 # else
    340 	POP (%ebx)
    341 #  define RESTORE_EBX_STATE CFI_PUSH (%ebx)
    342 	cmp	__x86_data_cache_size, %ecx
    343 # endif
    344 #endif
    345 
    346 	jae	L(128bytes_L2_normal)
    347 	subl	$128, %ecx
    348 L(128bytesormore_normal):
    349 	sub	$128, %ecx
    350 	movdqa	%xmm0, (%edx)
    351 	movdqa	%xmm0, 0x10(%edx)
    352 	movdqa	%xmm0, 0x20(%edx)
    353 	movdqa	%xmm0, 0x30(%edx)
    354 	movdqa	%xmm0, 0x40(%edx)
    355 	movdqa	%xmm0, 0x50(%edx)
    356 	movdqa	%xmm0, 0x60(%edx)
    357 	movdqa	%xmm0, 0x70(%edx)
    358 	lea	128(%edx), %edx
    359 	jb	L(128bytesless_normal)
    360 
    361 
    362 	sub	$128, %ecx
    363 	movdqa	%xmm0, (%edx)
    364 	movdqa	%xmm0, 0x10(%edx)
    365 	movdqa	%xmm0, 0x20(%edx)
    366 	movdqa	%xmm0, 0x30(%edx)
    367 	movdqa	%xmm0, 0x40(%edx)
    368 	movdqa	%xmm0, 0x50(%edx)
    369 	movdqa	%xmm0, 0x60(%edx)
    370 	movdqa	%xmm0, 0x70(%edx)
    371 	lea	128(%edx), %edx
    372 	jae	L(128bytesormore_normal)
    373 
    374 L(128bytesless_normal):
    375 	add	$128, %ecx
    376 	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
    377 
    378 	ALIGN (4)
    379 L(128bytes_L2_normal):
    380 	prefetcht0	0x380(%edx)
    381 	prefetcht0	0x3c0(%edx)
    382 	sub	$128, %ecx
    383 	movdqa	%xmm0, (%edx)
    384 	movaps	%xmm0, 0x10(%edx)
    385 	movaps	%xmm0, 0x20(%edx)
    386 	movaps	%xmm0, 0x30(%edx)
    387 	movaps	%xmm0, 0x40(%edx)
    388 	movaps	%xmm0, 0x50(%edx)
    389 	movaps	%xmm0, 0x60(%edx)
    390 	movaps	%xmm0, 0x70(%edx)
    391 	add	$128, %edx
    392 	cmp	$128, %ecx
    393 	jae	L(128bytes_L2_normal)
    394 
    395 L(128bytesless_L2_normal):
    396 	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
    397 
    398 	RESTORE_EBX_STATE
    399 L(128bytesormore_nt_start):
    400 	sub	%ebx, %ecx
    401 	mov	%ebx, %eax
    402 	and	$0x7f, %eax
    403 	add	%eax, %ecx
    404 	movd	%xmm0, %eax
    405 	ALIGN (4)
    406 L(128bytesormore_shared_cache_loop):
    407 	prefetcht0	0x3c0(%edx)
    408 	prefetcht0	0x380(%edx)
    409 	sub	$0x80, %ebx
    410 	movdqa	%xmm0, (%edx)
    411 	movdqa	%xmm0, 0x10(%edx)
    412 	movdqa	%xmm0, 0x20(%edx)
    413 	movdqa	%xmm0, 0x30(%edx)
    414 	movdqa	%xmm0, 0x40(%edx)
    415 	movdqa	%xmm0, 0x50(%edx)
    416 	movdqa	%xmm0, 0x60(%edx)
    417 	movdqa	%xmm0, 0x70(%edx)
    418 	add	$0x80, %edx
    419 	cmp	$0x80, %ebx
    420 	jae	L(128bytesormore_shared_cache_loop)
    421 	cmp	$0x80, %ecx
    422 	jb	L(shared_cache_loop_end)
    423 	ALIGN (4)
    424 L(128bytesormore_nt):
    425 	sub	$0x80, %ecx
    426 	movntdq	%xmm0, (%edx)
    427 	movntdq	%xmm0, 0x10(%edx)
    428 	movntdq	%xmm0, 0x20(%edx)
    429 	movntdq	%xmm0, 0x30(%edx)
    430 	movntdq	%xmm0, 0x40(%edx)
    431 	movntdq	%xmm0, 0x50(%edx)
    432 	movntdq	%xmm0, 0x60(%edx)
    433 	movntdq	%xmm0, 0x70(%edx)
    434 	add	$0x80, %edx
    435 	cmp	$0x80, %ecx
    436 	jae	L(128bytesormore_nt)
    437 	sfence
    438 L(shared_cache_loop_end):
    439 #if defined DATA_CACHE_SIZE || !(defined SHARED || defined __PIC__)
    440 	POP (%ebx)
    441 #endif
    442 	BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes))
    443 
    444 
    445 	.pushsection .rodata.sse2,"a",@progbits
    446 	ALIGN (2)
    447 L(table_16_128bytes):
    448 	.int	JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes))
    449 	.int	JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes))
    450 	.int	JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes))
    451 	.int	JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes))
    452 	.int	JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes))
    453 	.int	JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes))
    454 	.int	JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes))
    455 	.int	JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes))
    456 	.int	JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes))
    457 	.int	JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes))
    458 	.int	JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes))
    459 	.int	JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes))
    460 	.int	JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes))
    461 	.int	JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes))
    462 	.int	JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes))
    463 	.int	JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes))
    464 	.int	JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes))
    465 	.int	JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes))
    466 	.int	JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes))
    467 	.int	JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes))
    468 	.int	JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes))
    469 	.int	JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes))
    470 	.int	JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes))
    471 	.int	JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes))
    472 	.int	JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes))
    473 	.int	JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes))
    474 	.int	JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes))
    475 	.int	JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes))
    476 	.int	JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes))
    477 	.int	JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes))
    478 	.int	JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes))
    479 	.int	JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes))
    480 	.int	JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes))
    481 	.int	JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes))
    482 	.int	JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes))
    483 	.int	JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes))
    484 	.int	JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes))
    485 	.int	JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes))
    486 	.int	JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes))
    487 	.int	JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes))
    488 	.int	JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes))
    489 	.int	JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes))
    490 	.int	JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes))
    491 	.int	JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes))
    492 	.int	JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes))
    493 	.int	JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes))
    494 	.int	JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes))
    495 	.int	JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes))
    496 	.int	JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes))
    497 	.int	JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes))
    498 	.int	JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes))
    499 	.int	JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes))
    500 	.int	JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes))
    501 	.int	JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes))
    502 	.int	JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes))
    503 	.int	JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes))
    504 	.int	JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes))
    505 	.int	JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes))
    506 	.int	JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes))
    507 	.int	JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes))
    508 	.int	JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes))
    509 	.int	JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes))
    510 	.int	JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes))
    511 	.int	JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes))
    512 	.int	JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes))
    513 	.int	JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes))
    514 	.int	JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes))
    515 	.int	JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes))
    516 	.int	JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes))
    517 	.int	JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes))
    518 	.int	JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes))
    519 	.int	JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes))
    520 	.int	JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes))
    521 	.int	JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes))
    522 	.int	JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes))
    523 	.int	JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes))
    524 	.int	JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes))
    525 	.int	JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes))
    526 	.int	JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes))
    527 	.int	JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes))
    528 	.int	JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes))
    529 	.int	JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes))
    530 	.int	JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes))
    531 	.int	JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes))
    532 	.int	JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes))
    533 	.int	JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes))
    534 	.int	JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes))
    535 	.int	JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes))
    536 	.int	JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes))
    537 	.int	JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes))
    538 	.int	JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes))
    539 	.int	JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes))
    540 	.int	JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes))
    541 	.int	JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes))
    542 	.int	JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes))
    543 	.int	JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes))
    544 	.int	JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes))
    545 	.int	JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes))
    546 	.int	JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes))
    547 	.int	JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes))
    548 	.int	JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes))
    549 	.int	JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes))
    550 	.int	JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes))
    551 	.int	JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes))
    552 	.int	JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes))
    553 	.int	JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes))
    554 	.int	JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes))
    555 	.int	JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes))
    556 	.int	JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes))
    557 	.int	JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes))
    558 	.int	JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes))
    559 	.int	JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes))
    560 	.int	JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes))
    561 	.int	JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes))
    562 	.int	JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes))
    563 	.int	JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes))
    564 	.int	JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes))
    565 	.int	JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes))
    566 	.int	JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes))
    567 	.int	JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes))
    568 	.int	JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes))
    569 	.int	JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes))
    570 	.int	JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes))
    571 	.int	JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes))
    572 	.int	JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes))
    573 	.int	JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes))
    574 	.int	JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes))
    575 	.int	JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes))
    576 	.popsection
    577 
    578 	ALIGN (4)
    579 L(aligned_16_112bytes):
    580 	movdqa	%xmm0, -112(%edx)
    581 L(aligned_16_96bytes):
    582 	movdqa	%xmm0, -96(%edx)
    583 L(aligned_16_80bytes):
    584 	movdqa	%xmm0, -80(%edx)
    585 L(aligned_16_64bytes):
    586 	movdqa	%xmm0, -64(%edx)
    587 L(aligned_16_48bytes):
    588 	movdqa	%xmm0, -48(%edx)
    589 L(aligned_16_32bytes):
    590 	movdqa	%xmm0, -32(%edx)
    591 L(aligned_16_16bytes):
    592 	movdqa	%xmm0, -16(%edx)
    593 L(aligned_16_0bytes):
    594 	SETRTNVAL
    595 	RETURN
    596 
    597 	ALIGN (4)
    598 L(aligned_16_113bytes):
    599 	movdqa	%xmm0, -113(%edx)
    600 L(aligned_16_97bytes):
    601 	movdqa	%xmm0, -97(%edx)
    602 L(aligned_16_81bytes):
    603 	movdqa	%xmm0, -81(%edx)
    604 L(aligned_16_65bytes):
    605 	movdqa	%xmm0, -65(%edx)
    606 L(aligned_16_49bytes):
    607 	movdqa	%xmm0, -49(%edx)
    608 L(aligned_16_33bytes):
    609 	movdqa	%xmm0, -33(%edx)
    610 L(aligned_16_17bytes):
    611 	movdqa	%xmm0, -17(%edx)
    612 L(aligned_16_1bytes):
    613 	movb	%al, -1(%edx)
    614 	SETRTNVAL
    615 	RETURN
    616 
    617 	ALIGN (4)
    618 L(aligned_16_114bytes):
    619 	movdqa	%xmm0, -114(%edx)
    620 L(aligned_16_98bytes):
    621 	movdqa	%xmm0, -98(%edx)
    622 L(aligned_16_82bytes):
    623 	movdqa	%xmm0, -82(%edx)
    624 L(aligned_16_66bytes):
    625 	movdqa	%xmm0, -66(%edx)
    626 L(aligned_16_50bytes):
    627 	movdqa	%xmm0, -50(%edx)
    628 L(aligned_16_34bytes):
    629 	movdqa	%xmm0, -34(%edx)
    630 L(aligned_16_18bytes):
    631 	movdqa	%xmm0, -18(%edx)
    632 L(aligned_16_2bytes):
    633 	movw	%ax, -2(%edx)
    634 	SETRTNVAL
    635 	RETURN
    636 
    637 	ALIGN (4)
    638 L(aligned_16_115bytes):
    639 	movdqa	%xmm0, -115(%edx)
    640 L(aligned_16_99bytes):
    641 	movdqa	%xmm0, -99(%edx)
    642 L(aligned_16_83bytes):
    643 	movdqa	%xmm0, -83(%edx)
    644 L(aligned_16_67bytes):
    645 	movdqa	%xmm0, -67(%edx)
    646 L(aligned_16_51bytes):
    647 	movdqa	%xmm0, -51(%edx)
    648 L(aligned_16_35bytes):
    649 	movdqa	%xmm0, -35(%edx)
    650 L(aligned_16_19bytes):
    651 	movdqa	%xmm0, -19(%edx)
    652 L(aligned_16_3bytes):
    653 	movw	%ax, -3(%edx)
    654 	movb	%al, -1(%edx)
    655 	SETRTNVAL
    656 	RETURN
    657 
    658 	ALIGN (4)
    659 L(aligned_16_116bytes):
    660 	movdqa	%xmm0, -116(%edx)
    661 L(aligned_16_100bytes):
    662 	movdqa	%xmm0, -100(%edx)
    663 L(aligned_16_84bytes):
    664 	movdqa	%xmm0, -84(%edx)
    665 L(aligned_16_68bytes):
    666 	movdqa	%xmm0, -68(%edx)
    667 L(aligned_16_52bytes):
    668 	movdqa	%xmm0, -52(%edx)
    669 L(aligned_16_36bytes):
    670 	movdqa	%xmm0, -36(%edx)
    671 L(aligned_16_20bytes):
    672 	movdqa	%xmm0, -20(%edx)
    673 L(aligned_16_4bytes):
    674 	movl	%eax, -4(%edx)
    675 	SETRTNVAL
    676 	RETURN
    677 
    678 	ALIGN (4)
    679 L(aligned_16_117bytes):
    680 	movdqa	%xmm0, -117(%edx)
    681 L(aligned_16_101bytes):
    682 	movdqa	%xmm0, -101(%edx)
    683 L(aligned_16_85bytes):
    684 	movdqa	%xmm0, -85(%edx)
    685 L(aligned_16_69bytes):
    686 	movdqa	%xmm0, -69(%edx)
    687 L(aligned_16_53bytes):
    688 	movdqa	%xmm0, -53(%edx)
    689 L(aligned_16_37bytes):
    690 	movdqa	%xmm0, -37(%edx)
    691 L(aligned_16_21bytes):
    692 	movdqa	%xmm0, -21(%edx)
    693 L(aligned_16_5bytes):
    694 	movl	%eax, -5(%edx)
    695 	movb	%al, -1(%edx)
    696 	SETRTNVAL
    697 	RETURN
    698 
    699 	ALIGN (4)
    700 L(aligned_16_118bytes):
    701 	movdqa	%xmm0, -118(%edx)
    702 L(aligned_16_102bytes):
    703 	movdqa	%xmm0, -102(%edx)
    704 L(aligned_16_86bytes):
    705 	movdqa	%xmm0, -86(%edx)
    706 L(aligned_16_70bytes):
    707 	movdqa	%xmm0, -70(%edx)
    708 L(aligned_16_54bytes):
    709 	movdqa	%xmm0, -54(%edx)
    710 L(aligned_16_38bytes):
    711 	movdqa	%xmm0, -38(%edx)
    712 L(aligned_16_22bytes):
    713 	movdqa	%xmm0, -22(%edx)
    714 L(aligned_16_6bytes):
    715 	movl	%eax, -6(%edx)
    716 	movw	%ax, -2(%edx)
    717 	SETRTNVAL
    718 	RETURN
    719 
    720 	ALIGN (4)
    721 L(aligned_16_119bytes):
    722 	movdqa	%xmm0, -119(%edx)
    723 L(aligned_16_103bytes):
    724 	movdqa	%xmm0, -103(%edx)
    725 L(aligned_16_87bytes):
    726 	movdqa	%xmm0, -87(%edx)
    727 L(aligned_16_71bytes):
    728 	movdqa	%xmm0, -71(%edx)
    729 L(aligned_16_55bytes):
    730 	movdqa	%xmm0, -55(%edx)
    731 L(aligned_16_39bytes):
    732 	movdqa	%xmm0, -39(%edx)
    733 L(aligned_16_23bytes):
    734 	movdqa	%xmm0, -23(%edx)
    735 L(aligned_16_7bytes):
    736 	movl	%eax, -7(%edx)
    737 	movw	%ax, -3(%edx)
    738 	movb	%al, -1(%edx)
    739 	SETRTNVAL
    740 	RETURN
    741 
    742 	ALIGN (4)
    743 L(aligned_16_120bytes):
    744 	movdqa	%xmm0, -120(%edx)
    745 L(aligned_16_104bytes):
    746 	movdqa	%xmm0, -104(%edx)
    747 L(aligned_16_88bytes):
    748 	movdqa	%xmm0, -88(%edx)
    749 L(aligned_16_72bytes):
    750 	movdqa	%xmm0, -72(%edx)
    751 L(aligned_16_56bytes):
    752 	movdqa	%xmm0, -56(%edx)
    753 L(aligned_16_40bytes):
    754 	movdqa	%xmm0, -40(%edx)
    755 L(aligned_16_24bytes):
    756 	movdqa	%xmm0, -24(%edx)
    757 L(aligned_16_8bytes):
    758 	movq	%xmm0, -8(%edx)
    759 	SETRTNVAL
    760 	RETURN
    761 
    762 	ALIGN (4)
    763 L(aligned_16_121bytes):
    764 	movdqa	%xmm0, -121(%edx)
    765 L(aligned_16_105bytes):
    766 	movdqa	%xmm0, -105(%edx)
    767 L(aligned_16_89bytes):
    768 	movdqa	%xmm0, -89(%edx)
    769 L(aligned_16_73bytes):
    770 	movdqa	%xmm0, -73(%edx)
    771 L(aligned_16_57bytes):
    772 	movdqa	%xmm0, -57(%edx)
    773 L(aligned_16_41bytes):
    774 	movdqa	%xmm0, -41(%edx)
    775 L(aligned_16_25bytes):
    776 	movdqa	%xmm0, -25(%edx)
    777 L(aligned_16_9bytes):
    778 	movq	%xmm0, -9(%edx)
    779 	movb	%al, -1(%edx)
    780 	SETRTNVAL
    781 	RETURN
    782 
    783 	ALIGN (4)
    784 L(aligned_16_122bytes):
    785 	movdqa	%xmm0, -122(%edx)
    786 L(aligned_16_106bytes):
    787 	movdqa	%xmm0, -106(%edx)
    788 L(aligned_16_90bytes):
    789 	movdqa	%xmm0, -90(%edx)
    790 L(aligned_16_74bytes):
    791 	movdqa	%xmm0, -74(%edx)
    792 L(aligned_16_58bytes):
    793 	movdqa	%xmm0, -58(%edx)
    794 L(aligned_16_42bytes):
    795 	movdqa	%xmm0, -42(%edx)
    796 L(aligned_16_26bytes):
    797 	movdqa	%xmm0, -26(%edx)
    798 L(aligned_16_10bytes):
    799 	movq	%xmm0, -10(%edx)
    800 	movw	%ax, -2(%edx)
    801 	SETRTNVAL
    802 	RETURN
    803 
    804 	ALIGN (4)
    805 L(aligned_16_123bytes):
    806 	movdqa	%xmm0, -123(%edx)
    807 L(aligned_16_107bytes):
    808 	movdqa	%xmm0, -107(%edx)
    809 L(aligned_16_91bytes):
    810 	movdqa	%xmm0, -91(%edx)
    811 L(aligned_16_75bytes):
    812 	movdqa	%xmm0, -75(%edx)
    813 L(aligned_16_59bytes):
    814 	movdqa	%xmm0, -59(%edx)
    815 L(aligned_16_43bytes):
    816 	movdqa	%xmm0, -43(%edx)
    817 L(aligned_16_27bytes):
    818 	movdqa	%xmm0, -27(%edx)
    819 L(aligned_16_11bytes):
    820 	movq	%xmm0, -11(%edx)
    821 	movw	%ax, -3(%edx)
    822 	movb	%al, -1(%edx)
    823 	SETRTNVAL
    824 	RETURN
    825 
    826 	ALIGN (4)
    827 L(aligned_16_124bytes):
    828 	movdqa	%xmm0, -124(%edx)
    829 L(aligned_16_108bytes):
    830 	movdqa	%xmm0, -108(%edx)
    831 L(aligned_16_92bytes):
    832 	movdqa	%xmm0, -92(%edx)
    833 L(aligned_16_76bytes):
    834 	movdqa	%xmm0, -76(%edx)
    835 L(aligned_16_60bytes):
    836 	movdqa	%xmm0, -60(%edx)
    837 L(aligned_16_44bytes):
    838 	movdqa	%xmm0, -44(%edx)
    839 L(aligned_16_28bytes):
    840 	movdqa	%xmm0, -28(%edx)
    841 L(aligned_16_12bytes):
    842 	movq	%xmm0, -12(%edx)
    843 	movl	%eax, -4(%edx)
    844 	SETRTNVAL
    845 	RETURN
    846 
    847 	ALIGN (4)
    848 L(aligned_16_125bytes):
    849 	movdqa	%xmm0, -125(%edx)
    850 L(aligned_16_109bytes):
    851 	movdqa	%xmm0, -109(%edx)
    852 L(aligned_16_93bytes):
    853 	movdqa	%xmm0, -93(%edx)
    854 L(aligned_16_77bytes):
    855 	movdqa	%xmm0, -77(%edx)
    856 L(aligned_16_61bytes):
    857 	movdqa	%xmm0, -61(%edx)
    858 L(aligned_16_45bytes):
    859 	movdqa	%xmm0, -45(%edx)
    860 L(aligned_16_29bytes):
    861 	movdqa	%xmm0, -29(%edx)
    862 L(aligned_16_13bytes):
    863 	movq	%xmm0, -13(%edx)
    864 	movl	%eax, -5(%edx)
    865 	movb	%al, -1(%edx)
    866 	SETRTNVAL
    867 	RETURN
    868 
    869 	ALIGN (4)
    870 L(aligned_16_126bytes):
    871 	movdqa	%xmm0, -126(%edx)
    872 L(aligned_16_110bytes):
    873 	movdqa	%xmm0, -110(%edx)
    874 L(aligned_16_94bytes):
    875 	movdqa	%xmm0, -94(%edx)
    876 L(aligned_16_78bytes):
    877 	movdqa	%xmm0, -78(%edx)
    878 L(aligned_16_62bytes):
    879 	movdqa	%xmm0, -62(%edx)
    880 L(aligned_16_46bytes):
    881 	movdqa	%xmm0, -46(%edx)
    882 L(aligned_16_30bytes):
    883 	movdqa	%xmm0, -30(%edx)
    884 L(aligned_16_14bytes):
    885 	movq	%xmm0, -14(%edx)
    886 	movl	%eax, -6(%edx)
    887 	movw	%ax, -2(%edx)
    888 	SETRTNVAL
    889 	RETURN
    890 
    891 	ALIGN (4)
    892 L(aligned_16_127bytes):
    893 	movdqa	%xmm0, -127(%edx)
    894 L(aligned_16_111bytes):
    895 	movdqa	%xmm0, -111(%edx)
    896 L(aligned_16_95bytes):
    897 	movdqa	%xmm0, -95(%edx)
    898 L(aligned_16_79bytes):
    899 	movdqa	%xmm0, -79(%edx)
    900 L(aligned_16_63bytes):
    901 	movdqa	%xmm0, -63(%edx)
    902 L(aligned_16_47bytes):
    903 	movdqa	%xmm0, -47(%edx)
    904 L(aligned_16_31bytes):
    905 	movdqa	%xmm0, -31(%edx)
    906 L(aligned_16_15bytes):
    907 	movq	%xmm0, -15(%edx)
    908 	movl	%eax, -7(%edx)
    909 	movw	%ax, -3(%edx)
    910 	movb	%al, -1(%edx)
    911 	SETRTNVAL
    912 	RETURN_END
    913 
    914 END (sse2_memset5_atom)
    915