Home | History | Annotate | Download | only in string
      1 /*
      2 Copyright (c) 2010, Intel Corporation
      3 All rights reserved.
      4 
      5 Redistribution and use in source and binary forms, with or without
      6 modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9     * this list of conditions and the following disclaimer.
     10 
     11     * Redistributions in binary form must reproduce the above copyright notice,
     12     * this list of conditions and the following disclaimer in the documentation
     13     * and/or other materials provided with the distribution.
     14 
     15     * Neither the name of Intel Corporation nor the names of its contributors
     16     * may be used to endorse or promote products derived from this software
     17     * without specific prior written permission.
     18 
     19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 */
     30 
     31 #include <private/bionic_asm.h>
     32 
     33 #include "cache.h"
     34 
     35 #ifndef L
     36 # define L(label)	.L##label
     37 #endif
     38 
     39 #ifndef ALIGN
     40 # define ALIGN(n)	.p2align n
     41 #endif
     42 
     43 #define CFI_PUSH(REG)						\
     44   .cfi_adjust_cfa_offset 4;					\
     45   .cfi_rel_offset REG, 0
     46 
     47 #define CFI_POP(REG)						\
     48   .cfi_adjust_cfa_offset -4;					\
     49   .cfi_restore REG
     50 
     51 #define PUSH(REG)	pushl REG; CFI_PUSH(REG)
     52 #define POP(REG)	popl REG; CFI_POP(REG)
     53 
     54 #define PARMS 8  /* Preserve EBX. */
     55 #define DST PARMS
     56 #define CHR (DST+4)
     57 #define LEN (CHR+4)
     58 #define CHK_DST_LEN (LEN+4)
     59 #define SETRTNVAL	movl DST(%esp), %eax
     60 
     61 #define ENTRANCE	PUSH(%ebx);
     62 #define RETURN_END	POP(%ebx); ret
     63 #define RETURN		RETURN_END; CFI_PUSH(%ebx)
     64 #define JMPTBL(I, B)	I - B
     65 
     66 /* Load an entry in a jump table into EBX and branch to it.  TABLE is a
     67    jump table with relative offsets.   */
     68 # define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
     69     /* We first load PC into EBX.  */				\
     70     call	__x86.get_pc_thunk.bx;				\
     71     /* Get the address of the jump table.  */			\
     72     add		$(TABLE - .), %ebx;				\
     73     /* Get the entry and convert the relative offset to the	\
     74        absolute address.  */					\
     75     add		(%ebx,%ecx,4), %ebx;				\
     76     add		%ecx, %edx;					\
     77     /* We loaded the jump table and adjusted EDX. Go.  */	\
     78     jmp		*%ebx
     79 
     80 	.section	.gnu.linkonce.t.__x86.get_pc_thunk.bx,"ax",@progbits
     81 	.globl	__x86.get_pc_thunk.bx
     82 	.hidden	__x86.get_pc_thunk.bx
     83 	ALIGN(4)
     84 	.type	__x86.get_pc_thunk.bx,@function
     85 __x86.get_pc_thunk.bx:
     86 	movl	(%esp), %ebx
     87 	ret
     88 
     89 ENTRY(__memset_chk)
     90   ENTRANCE
     91 
     92   movl LEN(%esp), %ecx
     93   cmpl CHK_DST_LEN(%esp), %ecx
     94   jna L(memset_length_loaded)
     95 
     96   POP(%ebx) // Undo ENTRANCE without returning.
     97   jmp __memset_chk_fail
     98 END(__memset_chk)
     99 
    100 	.section .text.sse2,"ax",@progbits
    101 	ALIGN(4)
    102 ENTRY(memset)
    103 	ENTRANCE
    104 
    105 	movl	LEN(%esp), %ecx
    106 L(memset_length_loaded):
    107 	movzbl	CHR(%esp), %eax
    108 	movb	%al, %ah
    109 	/* Fill the whole EAX with pattern.  */
    110 	movl	%eax, %edx
    111 	shl	$16, %eax
    112 	or	%edx, %eax
    113 	movl	DST(%esp), %edx
    114 	cmp	$32, %ecx
    115 	jae	L(32bytesormore)
    116 
    117 L(write_less32bytes):
    118 	BRANCH_TO_JMPTBL_ENTRY(L(table_less_32bytes))
    119 
    120 
    121 	.pushsection .rodata.sse2,"a",@progbits
    122 	ALIGN(2)
    123 L(table_less_32bytes):
    124 	.int	JMPTBL(L(write_0bytes), L(table_less_32bytes))
    125 	.int	JMPTBL(L(write_1bytes), L(table_less_32bytes))
    126 	.int	JMPTBL(L(write_2bytes), L(table_less_32bytes))
    127 	.int	JMPTBL(L(write_3bytes), L(table_less_32bytes))
    128 	.int	JMPTBL(L(write_4bytes), L(table_less_32bytes))
    129 	.int	JMPTBL(L(write_5bytes), L(table_less_32bytes))
    130 	.int	JMPTBL(L(write_6bytes), L(table_less_32bytes))
    131 	.int	JMPTBL(L(write_7bytes), L(table_less_32bytes))
    132 	.int	JMPTBL(L(write_8bytes), L(table_less_32bytes))
    133 	.int	JMPTBL(L(write_9bytes), L(table_less_32bytes))
    134 	.int	JMPTBL(L(write_10bytes), L(table_less_32bytes))
    135 	.int	JMPTBL(L(write_11bytes), L(table_less_32bytes))
    136 	.int	JMPTBL(L(write_12bytes), L(table_less_32bytes))
    137 	.int	JMPTBL(L(write_13bytes), L(table_less_32bytes))
    138 	.int	JMPTBL(L(write_14bytes), L(table_less_32bytes))
    139 	.int	JMPTBL(L(write_15bytes), L(table_less_32bytes))
    140 	.int	JMPTBL(L(write_16bytes), L(table_less_32bytes))
    141 	.int	JMPTBL(L(write_17bytes), L(table_less_32bytes))
    142 	.int	JMPTBL(L(write_18bytes), L(table_less_32bytes))
    143 	.int	JMPTBL(L(write_19bytes), L(table_less_32bytes))
    144 	.int	JMPTBL(L(write_20bytes), L(table_less_32bytes))
    145 	.int	JMPTBL(L(write_21bytes), L(table_less_32bytes))
    146 	.int	JMPTBL(L(write_22bytes), L(table_less_32bytes))
    147 	.int	JMPTBL(L(write_23bytes), L(table_less_32bytes))
    148 	.int	JMPTBL(L(write_24bytes), L(table_less_32bytes))
    149 	.int	JMPTBL(L(write_25bytes), L(table_less_32bytes))
    150 	.int	JMPTBL(L(write_26bytes), L(table_less_32bytes))
    151 	.int	JMPTBL(L(write_27bytes), L(table_less_32bytes))
    152 	.int	JMPTBL(L(write_28bytes), L(table_less_32bytes))
    153 	.int	JMPTBL(L(write_29bytes), L(table_less_32bytes))
    154 	.int	JMPTBL(L(write_30bytes), L(table_less_32bytes))
    155 	.int	JMPTBL(L(write_31bytes), L(table_less_32bytes))
    156 	.popsection
    157 
    158 	ALIGN(4)
    159 L(write_28bytes):
    160 	movl	%eax, -28(%edx)
    161 L(write_24bytes):
    162 	movl	%eax, -24(%edx)
    163 L(write_20bytes):
    164 	movl	%eax, -20(%edx)
    165 L(write_16bytes):
    166 	movl	%eax, -16(%edx)
    167 L(write_12bytes):
    168 	movl	%eax, -12(%edx)
    169 L(write_8bytes):
    170 	movl	%eax, -8(%edx)
    171 L(write_4bytes):
    172 	movl	%eax, -4(%edx)
    173 L(write_0bytes):
    174 	SETRTNVAL
    175 	RETURN
    176 
    177 	ALIGN(4)
    178 L(write_29bytes):
    179 	movl	%eax, -29(%edx)
    180 L(write_25bytes):
    181 	movl	%eax, -25(%edx)
    182 L(write_21bytes):
    183 	movl	%eax, -21(%edx)
    184 L(write_17bytes):
    185 	movl	%eax, -17(%edx)
    186 L(write_13bytes):
    187 	movl	%eax, -13(%edx)
    188 L(write_9bytes):
    189 	movl	%eax, -9(%edx)
    190 L(write_5bytes):
    191 	movl	%eax, -5(%edx)
    192 L(write_1bytes):
    193 	movb	%al, -1(%edx)
    194 	SETRTNVAL
    195 	RETURN
    196 
    197 	ALIGN(4)
    198 L(write_30bytes):
    199 	movl	%eax, -30(%edx)
    200 L(write_26bytes):
    201 	movl	%eax, -26(%edx)
    202 L(write_22bytes):
    203 	movl	%eax, -22(%edx)
    204 L(write_18bytes):
    205 	movl	%eax, -18(%edx)
    206 L(write_14bytes):
    207 	movl	%eax, -14(%edx)
    208 L(write_10bytes):
    209 	movl	%eax, -10(%edx)
    210 L(write_6bytes):
    211 	movl	%eax, -6(%edx)
    212 L(write_2bytes):
    213 	movw	%ax, -2(%edx)
    214 	SETRTNVAL
    215 	RETURN
    216 
    217 	ALIGN(4)
    218 L(write_31bytes):
    219 	movl	%eax, -31(%edx)
    220 L(write_27bytes):
    221 	movl	%eax, -27(%edx)
    222 L(write_23bytes):
    223 	movl	%eax, -23(%edx)
    224 L(write_19bytes):
    225 	movl	%eax, -19(%edx)
    226 L(write_15bytes):
    227 	movl	%eax, -15(%edx)
    228 L(write_11bytes):
    229 	movl	%eax, -11(%edx)
    230 L(write_7bytes):
    231 	movl	%eax, -7(%edx)
    232 L(write_3bytes):
    233 	movw	%ax, -3(%edx)
    234 	movb	%al, -1(%edx)
    235 	SETRTNVAL
    236 	RETURN
    237 
    238 	ALIGN(4)
    239 /* ECX > 32 and EDX is 4 byte aligned.  */
    240 L(32bytesormore):
    241 	/* Fill xmm0 with the pattern.  */
    242 	movd	%eax, %xmm0
    243 	pshufd	$0, %xmm0, %xmm0
    244 	testl	$0xf, %edx
    245 	jz	L(aligned_16)
    246 /* ECX > 32 and EDX is not 16 byte aligned.  */
    247 L(not_aligned_16):
    248 	movdqu	%xmm0, (%edx)
    249 	movl	%edx, %eax
    250 	and	$-16, %edx
    251 	add	$16, %edx
    252 	sub	%edx, %eax
    253 	add	%eax, %ecx
    254 	movd	%xmm0, %eax
    255 
    256 	ALIGN(4)
    257 L(aligned_16):
    258 	cmp	$128, %ecx
    259 	jae	L(128bytesormore)
    260 
    261 L(aligned_16_less128bytes):
    262 	BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
    263 
    264 	ALIGN(4)
    265 L(128bytesormore):
    266 	PUSH(%ebx)
    267 	mov	$SHARED_CACHE_SIZE, %ebx
    268 	cmp	%ebx, %ecx
    269 	jae	L(128bytesormore_nt_start)
    270 
    271 
    272 	POP(%ebx)
    273 # define RESTORE_EBX_STATE CFI_PUSH(%ebx)
    274 	cmp	$DATA_CACHE_SIZE, %ecx
    275 
    276 	jae	L(128bytes_L2_normal)
    277 	subl	$128, %ecx
    278 L(128bytesormore_normal):
    279 	sub	$128, %ecx
    280 	movdqa	%xmm0, (%edx)
    281 	movdqa	%xmm0, 0x10(%edx)
    282 	movdqa	%xmm0, 0x20(%edx)
    283 	movdqa	%xmm0, 0x30(%edx)
    284 	movdqa	%xmm0, 0x40(%edx)
    285 	movdqa	%xmm0, 0x50(%edx)
    286 	movdqa	%xmm0, 0x60(%edx)
    287 	movdqa	%xmm0, 0x70(%edx)
    288 	lea	128(%edx), %edx
    289 	jb	L(128bytesless_normal)
    290 
    291 
    292 	sub	$128, %ecx
    293 	movdqa	%xmm0, (%edx)
    294 	movdqa	%xmm0, 0x10(%edx)
    295 	movdqa	%xmm0, 0x20(%edx)
    296 	movdqa	%xmm0, 0x30(%edx)
    297 	movdqa	%xmm0, 0x40(%edx)
    298 	movdqa	%xmm0, 0x50(%edx)
    299 	movdqa	%xmm0, 0x60(%edx)
    300 	movdqa	%xmm0, 0x70(%edx)
    301 	lea	128(%edx), %edx
    302 	jae	L(128bytesormore_normal)
    303 
    304 L(128bytesless_normal):
    305 	add	$128, %ecx
    306 	BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
    307 
    308 	ALIGN(4)
    309 L(128bytes_L2_normal):
    310 	prefetcht0	0x380(%edx)
    311 	prefetcht0	0x3c0(%edx)
    312 	sub	$128, %ecx
    313 	movdqa	%xmm0, (%edx)
    314 	movaps	%xmm0, 0x10(%edx)
    315 	movaps	%xmm0, 0x20(%edx)
    316 	movaps	%xmm0, 0x30(%edx)
    317 	movaps	%xmm0, 0x40(%edx)
    318 	movaps	%xmm0, 0x50(%edx)
    319 	movaps	%xmm0, 0x60(%edx)
    320 	movaps	%xmm0, 0x70(%edx)
    321 	add	$128, %edx
    322 	cmp	$128, %ecx
    323 	jae	L(128bytes_L2_normal)
    324 
    325 L(128bytesless_L2_normal):
    326 	BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
    327 
    328 	RESTORE_EBX_STATE
    329 L(128bytesormore_nt_start):
    330 	sub	%ebx, %ecx
    331 	mov	%ebx, %eax
    332 	and	$0x7f, %eax
    333 	add	%eax, %ecx
    334 	movd	%xmm0, %eax
    335 	ALIGN(4)
    336 L(128bytesormore_shared_cache_loop):
    337 	prefetcht0	0x3c0(%edx)
    338 	prefetcht0	0x380(%edx)
    339 	sub	$0x80, %ebx
    340 	movdqa	%xmm0, (%edx)
    341 	movdqa	%xmm0, 0x10(%edx)
    342 	movdqa	%xmm0, 0x20(%edx)
    343 	movdqa	%xmm0, 0x30(%edx)
    344 	movdqa	%xmm0, 0x40(%edx)
    345 	movdqa	%xmm0, 0x50(%edx)
    346 	movdqa	%xmm0, 0x60(%edx)
    347 	movdqa	%xmm0, 0x70(%edx)
    348 	add	$0x80, %edx
    349 	cmp	$0x80, %ebx
    350 	jae	L(128bytesormore_shared_cache_loop)
    351 	cmp	$0x80, %ecx
    352 	jb	L(shared_cache_loop_end)
    353 	ALIGN(4)
    354 L(128bytesormore_nt):
    355 	sub	$0x80, %ecx
    356 	movntdq	%xmm0, (%edx)
    357 	movntdq	%xmm0, 0x10(%edx)
    358 	movntdq	%xmm0, 0x20(%edx)
    359 	movntdq	%xmm0, 0x30(%edx)
    360 	movntdq	%xmm0, 0x40(%edx)
    361 	movntdq	%xmm0, 0x50(%edx)
    362 	movntdq	%xmm0, 0x60(%edx)
    363 	movntdq	%xmm0, 0x70(%edx)
    364 	add	$0x80, %edx
    365 	cmp	$0x80, %ecx
    366 	jae	L(128bytesormore_nt)
    367 	sfence
    368 L(shared_cache_loop_end):
    369 	POP(%ebx)
    370 	BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
    371 
    372 
    373 	.pushsection .rodata.sse2,"a",@progbits
    374 	ALIGN(2)
    375 L(table_16_128bytes):
    376 	.int	JMPTBL(L(aligned_16_0bytes), L(table_16_128bytes))
    377 	.int	JMPTBL(L(aligned_16_1bytes), L(table_16_128bytes))
    378 	.int	JMPTBL(L(aligned_16_2bytes), L(table_16_128bytes))
    379 	.int	JMPTBL(L(aligned_16_3bytes), L(table_16_128bytes))
    380 	.int	JMPTBL(L(aligned_16_4bytes), L(table_16_128bytes))
    381 	.int	JMPTBL(L(aligned_16_5bytes), L(table_16_128bytes))
    382 	.int	JMPTBL(L(aligned_16_6bytes), L(table_16_128bytes))
    383 	.int	JMPTBL(L(aligned_16_7bytes), L(table_16_128bytes))
    384 	.int	JMPTBL(L(aligned_16_8bytes), L(table_16_128bytes))
    385 	.int	JMPTBL(L(aligned_16_9bytes), L(table_16_128bytes))
    386 	.int	JMPTBL(L(aligned_16_10bytes), L(table_16_128bytes))
    387 	.int	JMPTBL(L(aligned_16_11bytes), L(table_16_128bytes))
    388 	.int	JMPTBL(L(aligned_16_12bytes), L(table_16_128bytes))
    389 	.int	JMPTBL(L(aligned_16_13bytes), L(table_16_128bytes))
    390 	.int	JMPTBL(L(aligned_16_14bytes), L(table_16_128bytes))
    391 	.int	JMPTBL(L(aligned_16_15bytes), L(table_16_128bytes))
    392 	.int	JMPTBL(L(aligned_16_16bytes), L(table_16_128bytes))
    393 	.int	JMPTBL(L(aligned_16_17bytes), L(table_16_128bytes))
    394 	.int	JMPTBL(L(aligned_16_18bytes), L(table_16_128bytes))
    395 	.int	JMPTBL(L(aligned_16_19bytes), L(table_16_128bytes))
    396 	.int	JMPTBL(L(aligned_16_20bytes), L(table_16_128bytes))
    397 	.int	JMPTBL(L(aligned_16_21bytes), L(table_16_128bytes))
    398 	.int	JMPTBL(L(aligned_16_22bytes), L(table_16_128bytes))
    399 	.int	JMPTBL(L(aligned_16_23bytes), L(table_16_128bytes))
    400 	.int	JMPTBL(L(aligned_16_24bytes), L(table_16_128bytes))
    401 	.int	JMPTBL(L(aligned_16_25bytes), L(table_16_128bytes))
    402 	.int	JMPTBL(L(aligned_16_26bytes), L(table_16_128bytes))
    403 	.int	JMPTBL(L(aligned_16_27bytes), L(table_16_128bytes))
    404 	.int	JMPTBL(L(aligned_16_28bytes), L(table_16_128bytes))
    405 	.int	JMPTBL(L(aligned_16_29bytes), L(table_16_128bytes))
    406 	.int	JMPTBL(L(aligned_16_30bytes), L(table_16_128bytes))
    407 	.int	JMPTBL(L(aligned_16_31bytes), L(table_16_128bytes))
    408 	.int	JMPTBL(L(aligned_16_32bytes), L(table_16_128bytes))
    409 	.int	JMPTBL(L(aligned_16_33bytes), L(table_16_128bytes))
    410 	.int	JMPTBL(L(aligned_16_34bytes), L(table_16_128bytes))
    411 	.int	JMPTBL(L(aligned_16_35bytes), L(table_16_128bytes))
    412 	.int	JMPTBL(L(aligned_16_36bytes), L(table_16_128bytes))
    413 	.int	JMPTBL(L(aligned_16_37bytes), L(table_16_128bytes))
    414 	.int	JMPTBL(L(aligned_16_38bytes), L(table_16_128bytes))
    415 	.int	JMPTBL(L(aligned_16_39bytes), L(table_16_128bytes))
    416 	.int	JMPTBL(L(aligned_16_40bytes), L(table_16_128bytes))
    417 	.int	JMPTBL(L(aligned_16_41bytes), L(table_16_128bytes))
    418 	.int	JMPTBL(L(aligned_16_42bytes), L(table_16_128bytes))
    419 	.int	JMPTBL(L(aligned_16_43bytes), L(table_16_128bytes))
    420 	.int	JMPTBL(L(aligned_16_44bytes), L(table_16_128bytes))
    421 	.int	JMPTBL(L(aligned_16_45bytes), L(table_16_128bytes))
    422 	.int	JMPTBL(L(aligned_16_46bytes), L(table_16_128bytes))
    423 	.int	JMPTBL(L(aligned_16_47bytes), L(table_16_128bytes))
    424 	.int	JMPTBL(L(aligned_16_48bytes), L(table_16_128bytes))
    425 	.int	JMPTBL(L(aligned_16_49bytes), L(table_16_128bytes))
    426 	.int	JMPTBL(L(aligned_16_50bytes), L(table_16_128bytes))
    427 	.int	JMPTBL(L(aligned_16_51bytes), L(table_16_128bytes))
    428 	.int	JMPTBL(L(aligned_16_52bytes), L(table_16_128bytes))
    429 	.int	JMPTBL(L(aligned_16_53bytes), L(table_16_128bytes))
    430 	.int	JMPTBL(L(aligned_16_54bytes), L(table_16_128bytes))
    431 	.int	JMPTBL(L(aligned_16_55bytes), L(table_16_128bytes))
    432 	.int	JMPTBL(L(aligned_16_56bytes), L(table_16_128bytes))
    433 	.int	JMPTBL(L(aligned_16_57bytes), L(table_16_128bytes))
    434 	.int	JMPTBL(L(aligned_16_58bytes), L(table_16_128bytes))
    435 	.int	JMPTBL(L(aligned_16_59bytes), L(table_16_128bytes))
    436 	.int	JMPTBL(L(aligned_16_60bytes), L(table_16_128bytes))
    437 	.int	JMPTBL(L(aligned_16_61bytes), L(table_16_128bytes))
    438 	.int	JMPTBL(L(aligned_16_62bytes), L(table_16_128bytes))
    439 	.int	JMPTBL(L(aligned_16_63bytes), L(table_16_128bytes))
    440 	.int	JMPTBL(L(aligned_16_64bytes), L(table_16_128bytes))
    441 	.int	JMPTBL(L(aligned_16_65bytes), L(table_16_128bytes))
    442 	.int	JMPTBL(L(aligned_16_66bytes), L(table_16_128bytes))
    443 	.int	JMPTBL(L(aligned_16_67bytes), L(table_16_128bytes))
    444 	.int	JMPTBL(L(aligned_16_68bytes), L(table_16_128bytes))
    445 	.int	JMPTBL(L(aligned_16_69bytes), L(table_16_128bytes))
    446 	.int	JMPTBL(L(aligned_16_70bytes), L(table_16_128bytes))
    447 	.int	JMPTBL(L(aligned_16_71bytes), L(table_16_128bytes))
    448 	.int	JMPTBL(L(aligned_16_72bytes), L(table_16_128bytes))
    449 	.int	JMPTBL(L(aligned_16_73bytes), L(table_16_128bytes))
    450 	.int	JMPTBL(L(aligned_16_74bytes), L(table_16_128bytes))
    451 	.int	JMPTBL(L(aligned_16_75bytes), L(table_16_128bytes))
    452 	.int	JMPTBL(L(aligned_16_76bytes), L(table_16_128bytes))
    453 	.int	JMPTBL(L(aligned_16_77bytes), L(table_16_128bytes))
    454 	.int	JMPTBL(L(aligned_16_78bytes), L(table_16_128bytes))
    455 	.int	JMPTBL(L(aligned_16_79bytes), L(table_16_128bytes))
    456 	.int	JMPTBL(L(aligned_16_80bytes), L(table_16_128bytes))
    457 	.int	JMPTBL(L(aligned_16_81bytes), L(table_16_128bytes))
    458 	.int	JMPTBL(L(aligned_16_82bytes), L(table_16_128bytes))
    459 	.int	JMPTBL(L(aligned_16_83bytes), L(table_16_128bytes))
    460 	.int	JMPTBL(L(aligned_16_84bytes), L(table_16_128bytes))
    461 	.int	JMPTBL(L(aligned_16_85bytes), L(table_16_128bytes))
    462 	.int	JMPTBL(L(aligned_16_86bytes), L(table_16_128bytes))
    463 	.int	JMPTBL(L(aligned_16_87bytes), L(table_16_128bytes))
    464 	.int	JMPTBL(L(aligned_16_88bytes), L(table_16_128bytes))
    465 	.int	JMPTBL(L(aligned_16_89bytes), L(table_16_128bytes))
    466 	.int	JMPTBL(L(aligned_16_90bytes), L(table_16_128bytes))
    467 	.int	JMPTBL(L(aligned_16_91bytes), L(table_16_128bytes))
    468 	.int	JMPTBL(L(aligned_16_92bytes), L(table_16_128bytes))
    469 	.int	JMPTBL(L(aligned_16_93bytes), L(table_16_128bytes))
    470 	.int	JMPTBL(L(aligned_16_94bytes), L(table_16_128bytes))
    471 	.int	JMPTBL(L(aligned_16_95bytes), L(table_16_128bytes))
    472 	.int	JMPTBL(L(aligned_16_96bytes), L(table_16_128bytes))
    473 	.int	JMPTBL(L(aligned_16_97bytes), L(table_16_128bytes))
    474 	.int	JMPTBL(L(aligned_16_98bytes), L(table_16_128bytes))
    475 	.int	JMPTBL(L(aligned_16_99bytes), L(table_16_128bytes))
    476 	.int	JMPTBL(L(aligned_16_100bytes), L(table_16_128bytes))
    477 	.int	JMPTBL(L(aligned_16_101bytes), L(table_16_128bytes))
    478 	.int	JMPTBL(L(aligned_16_102bytes), L(table_16_128bytes))
    479 	.int	JMPTBL(L(aligned_16_103bytes), L(table_16_128bytes))
    480 	.int	JMPTBL(L(aligned_16_104bytes), L(table_16_128bytes))
    481 	.int	JMPTBL(L(aligned_16_105bytes), L(table_16_128bytes))
    482 	.int	JMPTBL(L(aligned_16_106bytes), L(table_16_128bytes))
    483 	.int	JMPTBL(L(aligned_16_107bytes), L(table_16_128bytes))
    484 	.int	JMPTBL(L(aligned_16_108bytes), L(table_16_128bytes))
    485 	.int	JMPTBL(L(aligned_16_109bytes), L(table_16_128bytes))
    486 	.int	JMPTBL(L(aligned_16_110bytes), L(table_16_128bytes))
    487 	.int	JMPTBL(L(aligned_16_111bytes), L(table_16_128bytes))
    488 	.int	JMPTBL(L(aligned_16_112bytes), L(table_16_128bytes))
    489 	.int	JMPTBL(L(aligned_16_113bytes), L(table_16_128bytes))
    490 	.int	JMPTBL(L(aligned_16_114bytes), L(table_16_128bytes))
    491 	.int	JMPTBL(L(aligned_16_115bytes), L(table_16_128bytes))
    492 	.int	JMPTBL(L(aligned_16_116bytes), L(table_16_128bytes))
    493 	.int	JMPTBL(L(aligned_16_117bytes), L(table_16_128bytes))
    494 	.int	JMPTBL(L(aligned_16_118bytes), L(table_16_128bytes))
    495 	.int	JMPTBL(L(aligned_16_119bytes), L(table_16_128bytes))
    496 	.int	JMPTBL(L(aligned_16_120bytes), L(table_16_128bytes))
    497 	.int	JMPTBL(L(aligned_16_121bytes), L(table_16_128bytes))
    498 	.int	JMPTBL(L(aligned_16_122bytes), L(table_16_128bytes))
    499 	.int	JMPTBL(L(aligned_16_123bytes), L(table_16_128bytes))
    500 	.int	JMPTBL(L(aligned_16_124bytes), L(table_16_128bytes))
    501 	.int	JMPTBL(L(aligned_16_125bytes), L(table_16_128bytes))
    502 	.int	JMPTBL(L(aligned_16_126bytes), L(table_16_128bytes))
    503 	.int	JMPTBL(L(aligned_16_127bytes), L(table_16_128bytes))
    504 	.popsection
    505 
    506 	ALIGN(4)
    507 L(aligned_16_112bytes):
    508 	movdqa	%xmm0, -112(%edx)
    509 L(aligned_16_96bytes):
    510 	movdqa	%xmm0, -96(%edx)
    511 L(aligned_16_80bytes):
    512 	movdqa	%xmm0, -80(%edx)
    513 L(aligned_16_64bytes):
    514 	movdqa	%xmm0, -64(%edx)
    515 L(aligned_16_48bytes):
    516 	movdqa	%xmm0, -48(%edx)
    517 L(aligned_16_32bytes):
    518 	movdqa	%xmm0, -32(%edx)
    519 L(aligned_16_16bytes):
    520 	movdqa	%xmm0, -16(%edx)
    521 L(aligned_16_0bytes):
    522 	SETRTNVAL
    523 	RETURN
    524 
    525 	ALIGN(4)
    526 L(aligned_16_113bytes):
    527 	movdqa	%xmm0, -113(%edx)
    528 L(aligned_16_97bytes):
    529 	movdqa	%xmm0, -97(%edx)
    530 L(aligned_16_81bytes):
    531 	movdqa	%xmm0, -81(%edx)
    532 L(aligned_16_65bytes):
    533 	movdqa	%xmm0, -65(%edx)
    534 L(aligned_16_49bytes):
    535 	movdqa	%xmm0, -49(%edx)
    536 L(aligned_16_33bytes):
    537 	movdqa	%xmm0, -33(%edx)
    538 L(aligned_16_17bytes):
    539 	movdqa	%xmm0, -17(%edx)
    540 L(aligned_16_1bytes):
    541 	movb	%al, -1(%edx)
    542 	SETRTNVAL
    543 	RETURN
    544 
    545 	ALIGN(4)
    546 L(aligned_16_114bytes):
    547 	movdqa	%xmm0, -114(%edx)
    548 L(aligned_16_98bytes):
    549 	movdqa	%xmm0, -98(%edx)
    550 L(aligned_16_82bytes):
    551 	movdqa	%xmm0, -82(%edx)
    552 L(aligned_16_66bytes):
    553 	movdqa	%xmm0, -66(%edx)
    554 L(aligned_16_50bytes):
    555 	movdqa	%xmm0, -50(%edx)
    556 L(aligned_16_34bytes):
    557 	movdqa	%xmm0, -34(%edx)
    558 L(aligned_16_18bytes):
    559 	movdqa	%xmm0, -18(%edx)
    560 L(aligned_16_2bytes):
    561 	movw	%ax, -2(%edx)
    562 	SETRTNVAL
    563 	RETURN
    564 
    565 	ALIGN(4)
    566 L(aligned_16_115bytes):
    567 	movdqa	%xmm0, -115(%edx)
    568 L(aligned_16_99bytes):
    569 	movdqa	%xmm0, -99(%edx)
    570 L(aligned_16_83bytes):
    571 	movdqa	%xmm0, -83(%edx)
    572 L(aligned_16_67bytes):
    573 	movdqa	%xmm0, -67(%edx)
    574 L(aligned_16_51bytes):
    575 	movdqa	%xmm0, -51(%edx)
    576 L(aligned_16_35bytes):
    577 	movdqa	%xmm0, -35(%edx)
    578 L(aligned_16_19bytes):
    579 	movdqa	%xmm0, -19(%edx)
    580 L(aligned_16_3bytes):
    581 	movw	%ax, -3(%edx)
    582 	movb	%al, -1(%edx)
    583 	SETRTNVAL
    584 	RETURN
    585 
    586 	ALIGN(4)
    587 L(aligned_16_116bytes):
    588 	movdqa	%xmm0, -116(%edx)
    589 L(aligned_16_100bytes):
    590 	movdqa	%xmm0, -100(%edx)
    591 L(aligned_16_84bytes):
    592 	movdqa	%xmm0, -84(%edx)
    593 L(aligned_16_68bytes):
    594 	movdqa	%xmm0, -68(%edx)
    595 L(aligned_16_52bytes):
    596 	movdqa	%xmm0, -52(%edx)
    597 L(aligned_16_36bytes):
    598 	movdqa	%xmm0, -36(%edx)
    599 L(aligned_16_20bytes):
    600 	movdqa	%xmm0, -20(%edx)
    601 L(aligned_16_4bytes):
    602 	movl	%eax, -4(%edx)
    603 	SETRTNVAL
    604 	RETURN
    605 
    606 	ALIGN(4)
    607 L(aligned_16_117bytes):
    608 	movdqa	%xmm0, -117(%edx)
    609 L(aligned_16_101bytes):
    610 	movdqa	%xmm0, -101(%edx)
    611 L(aligned_16_85bytes):
    612 	movdqa	%xmm0, -85(%edx)
    613 L(aligned_16_69bytes):
    614 	movdqa	%xmm0, -69(%edx)
    615 L(aligned_16_53bytes):
    616 	movdqa	%xmm0, -53(%edx)
    617 L(aligned_16_37bytes):
    618 	movdqa	%xmm0, -37(%edx)
    619 L(aligned_16_21bytes):
    620 	movdqa	%xmm0, -21(%edx)
    621 L(aligned_16_5bytes):
    622 	movl	%eax, -5(%edx)
    623 	movb	%al, -1(%edx)
    624 	SETRTNVAL
    625 	RETURN
    626 
    627 	ALIGN(4)
    628 L(aligned_16_118bytes):
    629 	movdqa	%xmm0, -118(%edx)
    630 L(aligned_16_102bytes):
    631 	movdqa	%xmm0, -102(%edx)
    632 L(aligned_16_86bytes):
    633 	movdqa	%xmm0, -86(%edx)
    634 L(aligned_16_70bytes):
    635 	movdqa	%xmm0, -70(%edx)
    636 L(aligned_16_54bytes):
    637 	movdqa	%xmm0, -54(%edx)
    638 L(aligned_16_38bytes):
    639 	movdqa	%xmm0, -38(%edx)
    640 L(aligned_16_22bytes):
    641 	movdqa	%xmm0, -22(%edx)
    642 L(aligned_16_6bytes):
    643 	movl	%eax, -6(%edx)
    644 	movw	%ax, -2(%edx)
    645 	SETRTNVAL
    646 	RETURN
    647 
    648 	ALIGN(4)
    649 L(aligned_16_119bytes):
    650 	movdqa	%xmm0, -119(%edx)
    651 L(aligned_16_103bytes):
    652 	movdqa	%xmm0, -103(%edx)
    653 L(aligned_16_87bytes):
    654 	movdqa	%xmm0, -87(%edx)
    655 L(aligned_16_71bytes):
    656 	movdqa	%xmm0, -71(%edx)
    657 L(aligned_16_55bytes):
    658 	movdqa	%xmm0, -55(%edx)
    659 L(aligned_16_39bytes):
    660 	movdqa	%xmm0, -39(%edx)
    661 L(aligned_16_23bytes):
    662 	movdqa	%xmm0, -23(%edx)
    663 L(aligned_16_7bytes):
    664 	movl	%eax, -7(%edx)
    665 	movw	%ax, -3(%edx)
    666 	movb	%al, -1(%edx)
    667 	SETRTNVAL
    668 	RETURN
    669 
    670 	ALIGN(4)
    671 L(aligned_16_120bytes):
    672 	movdqa	%xmm0, -120(%edx)
    673 L(aligned_16_104bytes):
    674 	movdqa	%xmm0, -104(%edx)
    675 L(aligned_16_88bytes):
    676 	movdqa	%xmm0, -88(%edx)
    677 L(aligned_16_72bytes):
    678 	movdqa	%xmm0, -72(%edx)
    679 L(aligned_16_56bytes):
    680 	movdqa	%xmm0, -56(%edx)
    681 L(aligned_16_40bytes):
    682 	movdqa	%xmm0, -40(%edx)
    683 L(aligned_16_24bytes):
    684 	movdqa	%xmm0, -24(%edx)
    685 L(aligned_16_8bytes):
    686 	movq	%xmm0, -8(%edx)
    687 	SETRTNVAL
    688 	RETURN
    689 
    690 	ALIGN(4)
    691 L(aligned_16_121bytes):
    692 	movdqa	%xmm0, -121(%edx)
    693 L(aligned_16_105bytes):
    694 	movdqa	%xmm0, -105(%edx)
    695 L(aligned_16_89bytes):
    696 	movdqa	%xmm0, -89(%edx)
    697 L(aligned_16_73bytes):
    698 	movdqa	%xmm0, -73(%edx)
    699 L(aligned_16_57bytes):
    700 	movdqa	%xmm0, -57(%edx)
    701 L(aligned_16_41bytes):
    702 	movdqa	%xmm0, -41(%edx)
    703 L(aligned_16_25bytes):
    704 	movdqa	%xmm0, -25(%edx)
    705 L(aligned_16_9bytes):
    706 	movq	%xmm0, -9(%edx)
    707 	movb	%al, -1(%edx)
    708 	SETRTNVAL
    709 	RETURN
    710 
    711 	ALIGN(4)
    712 L(aligned_16_122bytes):
    713 	movdqa	%xmm0, -122(%edx)
    714 L(aligned_16_106bytes):
    715 	movdqa	%xmm0, -106(%edx)
    716 L(aligned_16_90bytes):
    717 	movdqa	%xmm0, -90(%edx)
    718 L(aligned_16_74bytes):
    719 	movdqa	%xmm0, -74(%edx)
    720 L(aligned_16_58bytes):
    721 	movdqa	%xmm0, -58(%edx)
    722 L(aligned_16_42bytes):
    723 	movdqa	%xmm0, -42(%edx)
    724 L(aligned_16_26bytes):
    725 	movdqa	%xmm0, -26(%edx)
    726 L(aligned_16_10bytes):
    727 	movq	%xmm0, -10(%edx)
    728 	movw	%ax, -2(%edx)
    729 	SETRTNVAL
    730 	RETURN
    731 
    732 	ALIGN(4)
    733 L(aligned_16_123bytes):
    734 	movdqa	%xmm0, -123(%edx)
    735 L(aligned_16_107bytes):
    736 	movdqa	%xmm0, -107(%edx)
    737 L(aligned_16_91bytes):
    738 	movdqa	%xmm0, -91(%edx)
    739 L(aligned_16_75bytes):
    740 	movdqa	%xmm0, -75(%edx)
    741 L(aligned_16_59bytes):
    742 	movdqa	%xmm0, -59(%edx)
    743 L(aligned_16_43bytes):
    744 	movdqa	%xmm0, -43(%edx)
    745 L(aligned_16_27bytes):
    746 	movdqa	%xmm0, -27(%edx)
    747 L(aligned_16_11bytes):
    748 	movq	%xmm0, -11(%edx)
    749 	movw	%ax, -3(%edx)
    750 	movb	%al, -1(%edx)
    751 	SETRTNVAL
    752 	RETURN
    753 
    754 	ALIGN(4)
    755 L(aligned_16_124bytes):
    756 	movdqa	%xmm0, -124(%edx)
    757 L(aligned_16_108bytes):
    758 	movdqa	%xmm0, -108(%edx)
    759 L(aligned_16_92bytes):
    760 	movdqa	%xmm0, -92(%edx)
    761 L(aligned_16_76bytes):
    762 	movdqa	%xmm0, -76(%edx)
    763 L(aligned_16_60bytes):
    764 	movdqa	%xmm0, -60(%edx)
    765 L(aligned_16_44bytes):
    766 	movdqa	%xmm0, -44(%edx)
    767 L(aligned_16_28bytes):
    768 	movdqa	%xmm0, -28(%edx)
    769 L(aligned_16_12bytes):
    770 	movq	%xmm0, -12(%edx)
    771 	movl	%eax, -4(%edx)
    772 	SETRTNVAL
    773 	RETURN
    774 
    775 	ALIGN(4)
    776 L(aligned_16_125bytes):
    777 	movdqa	%xmm0, -125(%edx)
    778 L(aligned_16_109bytes):
    779 	movdqa	%xmm0, -109(%edx)
    780 L(aligned_16_93bytes):
    781 	movdqa	%xmm0, -93(%edx)
    782 L(aligned_16_77bytes):
    783 	movdqa	%xmm0, -77(%edx)
    784 L(aligned_16_61bytes):
    785 	movdqa	%xmm0, -61(%edx)
    786 L(aligned_16_45bytes):
    787 	movdqa	%xmm0, -45(%edx)
    788 L(aligned_16_29bytes):
    789 	movdqa	%xmm0, -29(%edx)
    790 L(aligned_16_13bytes):
    791 	movq	%xmm0, -13(%edx)
    792 	movl	%eax, -5(%edx)
    793 	movb	%al, -1(%edx)
    794 	SETRTNVAL
    795 	RETURN
    796 
    797 	ALIGN(4)
    798 L(aligned_16_126bytes):
    799 	movdqa	%xmm0, -126(%edx)
    800 L(aligned_16_110bytes):
    801 	movdqa	%xmm0, -110(%edx)
    802 L(aligned_16_94bytes):
    803 	movdqa	%xmm0, -94(%edx)
    804 L(aligned_16_78bytes):
    805 	movdqa	%xmm0, -78(%edx)
    806 L(aligned_16_62bytes):
    807 	movdqa	%xmm0, -62(%edx)
    808 L(aligned_16_46bytes):
    809 	movdqa	%xmm0, -46(%edx)
    810 L(aligned_16_30bytes):
    811 	movdqa	%xmm0, -30(%edx)
    812 L(aligned_16_14bytes):
    813 	movq	%xmm0, -14(%edx)
    814 	movl	%eax, -6(%edx)
    815 	movw	%ax, -2(%edx)
    816 	SETRTNVAL
    817 	RETURN
    818 
    819 	ALIGN(4)
    820 L(aligned_16_127bytes):
    821 	movdqa	%xmm0, -127(%edx)
    822 L(aligned_16_111bytes):
    823 	movdqa	%xmm0, -111(%edx)
    824 L(aligned_16_95bytes):
    825 	movdqa	%xmm0, -95(%edx)
    826 L(aligned_16_79bytes):
    827 	movdqa	%xmm0, -79(%edx)
    828 L(aligned_16_63bytes):
    829 	movdqa	%xmm0, -63(%edx)
    830 L(aligned_16_47bytes):
    831 	movdqa	%xmm0, -47(%edx)
    832 L(aligned_16_31bytes):
    833 	movdqa	%xmm0, -31(%edx)
    834 L(aligned_16_15bytes):
    835 	movq	%xmm0, -15(%edx)
    836 	movl	%eax, -7(%edx)
    837 	movw	%ax, -3(%edx)
    838 	movb	%al, -1(%edx)
    839 	SETRTNVAL
    840 	RETURN_END
    841 
    842 END(memset)
    843