Home | History | Annotate | Download | only in string
      1 /*
      2 Copyright (c) 2014, Intel Corporation
      3 All rights reserved.
      4 
      5 Redistribution and use in source and binary forms, with or without
      6 modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9     * this list of conditions and the following disclaimer.
     10 
     11     * Redistributions in binary form must reproduce the above copyright notice,
     12     * this list of conditions and the following disclaimer in the documentation
     13     * and/or other materials provided with the distribution.
     14 
     15     * Neither the name of Intel Corporation nor the names of its contributors
     16     * may be used to endorse or promote products derived from this software
     17     * without specific prior written permission.
     18 
     19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 */
     30 
     31 #include <private/bionic_asm.h>
     32 
     33 #include "cache.h"
     34 
     35 #ifndef L
     36 # define L(label)	.L##label
     37 #endif
     38 
     39 #ifndef ALIGN
     40 # define ALIGN(n)	.p2align n
     41 #endif
     42 
     43 #define CFI_PUSH(REG)						\
     44   .cfi_adjust_cfa_offset 4;					\
     45   .cfi_rel_offset REG, 0
     46 
     47 #define CFI_POP(REG)						\
     48   .cfi_adjust_cfa_offset -4;					\
     49   .cfi_restore REG
     50 
     51 #define PUSH(REG)	pushl REG; CFI_PUSH(REG)
     52 #define POP(REG)	popl REG; CFI_POP(REG)
     53 
     54 #define PARMS 8 /* Preserve EBX. */
     55 #define DST PARMS
     56 #define CHR (DST+4)
     57 #define LEN (CHR+4)
     58 #define CHK_DST_LEN (LEN+4)
     59 #define SETRTNVAL	movl DST(%esp), %eax
     60 
     61 # define ENTRANCE	PUSH(%ebx);
     62 # define RETURN_END	POP(%ebx); ret
     63 # define RETURN		RETURN_END; CFI_PUSH(%ebx)
     64 # define JMPTBL(I, B)	I - B
     65 
     66 /* Load an entry in a jump table into EBX and branch to it.  TABLE is a
     67    jump table with relative offsets.   */
     68 # define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
     69     /* We first load PC into EBX.  */				\
     70     call	__x86.get_pc_thunk.bx;				\
     71     /* Get the address of the jump table.  */			\
     72     add		$(TABLE - .), %ebx;				\
     73     /* Get the entry and convert the relative offset to the	\
     74        absolute address.  */					\
     75     add		(%ebx,%ecx,4), %ebx;				\
     76     add		%ecx, %edx;					\
     77     /* We loaded the jump table and adjusted EDX. Go.  */	\
     78     jmp		*%ebx
     79 
     80 	.section	.gnu.linkonce.t.__x86.get_pc_thunk.bx,"ax",@progbits
     81 	.globl	__x86.get_pc_thunk.bx
     82 	.hidden	__x86.get_pc_thunk.bx
     83 	ALIGN(4)
     84 	.type	__x86.get_pc_thunk.bx,@function
     85 __x86.get_pc_thunk.bx:
     86 	movl	(%esp), %ebx
     87 	ret
     88 
     89 ENTRY(__memset_chk_generic)
     90   ENTRANCE
     91 
     92   movl LEN(%esp), %ecx
     93   cmpl CHK_DST_LEN(%esp), %ecx
     94   jna L(memset_length_loaded)
     95 
     96   POP(%ebx) // Undo ENTRANCE without returning.
     97   jmp __memset_chk_fail
     98 END(__memset_chk_generic)
     99 
    100 	.section .text.sse2,"ax",@progbits
    101 	ALIGN(4)
    102 ENTRY(memset_generic)
    103 	ENTRANCE
    104 
    105 	movl	LEN(%esp), %ecx
    106 L(memset_length_loaded):
    107 	cmp	$0, %ecx
    108 	ja	L(1byteormore)
    109 	SETRTNVAL
    110 	RETURN
    111 
    112 L(1byteormore):
    113 	movzbl	CHR(%esp), %eax
    114 	movb	%al, %ah
    115 	/* Fill the whole EAX with pattern.  */
    116 	movl	%eax, %edx
    117 	shl	 $16, %eax
    118 	or	%edx, %eax
    119 	movl	DST(%esp), %edx
    120 	cmp	$1, %ecx
    121 	je	L(1byte)
    122 	cmp	$16, %ecx
    123 	jae	L(16bytesormore)
    124 
    125 	cmp	$4, %ecx
    126 	jb	L(4bytesless)
    127 	movl	%eax, (%edx)
    128 	movl	%eax, -4(%edx, %ecx)
    129 	cmp	$8, %ecx
    130 	jb	L(8bytesless)
    131 	movl	%eax, 4(%edx)
    132 	movl	%eax, -8(%edx, %ecx)
    133 L(8bytesless):
    134 	SETRTNVAL
    135 	RETURN
    136 
    137 L(4bytesless):
    138 	movw	%ax, (%edx)
    139 	movw	%ax, -2(%edx, %ecx)
    140 	SETRTNVAL
    141 	RETURN
    142 
    143 L(1byte):
    144 	movb	%al, (%edx)
    145 	SETRTNVAL
    146 	RETURN
    147 
    148 	ALIGN(4)
    149 L(16bytesormore):
    150 	movd	%eax, %xmm0
    151 	pshufd	$0, %xmm0, %xmm0
    152 
    153 	cmp	$64, %ecx
    154 	ja	L(64bytesmore)
    155 	movdqu	%xmm0, (%edx)
    156 	movdqu	%xmm0, -16(%edx, %ecx)
    157 	cmp	$32, %ecx
    158 	jbe	L(32bytesless)
    159 	movdqu	%xmm0, 16(%edx)
    160 	movdqu	%xmm0, -32(%edx, %ecx)
    161 L(32bytesless):
    162 	SETRTNVAL
    163 	RETURN
    164 
    165 L(64bytesmore):
    166 	testl	$0xf, %edx
    167 	jz	L(aligned_16)
    168 L(not_aligned_16):
    169 	movdqu	%xmm0, (%edx)
    170 	movl	%edx, %eax
    171 	and	$-16, %edx
    172 	add	$16, %edx
    173 	sub	%edx, %eax
    174 	add	%eax, %ecx
    175 	movd	%xmm0, %eax
    176 
    177 	ALIGN(4)
    178 L(aligned_16):
    179 	cmp	$128, %ecx
    180 	jae	L(128bytesormore)
    181 
    182 L(aligned_16_less128bytes):
    183 	BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
    184 
    185 	ALIGN(4)
    186 L(128bytesormore):
    187 	PUSH(%ebx)
    188 	mov	$SHARED_CACHE_SIZE, %ebx
    189 	cmp	%ebx, %ecx
    190 	jae	L(128bytesormore_nt_start)
    191 
    192 	POP(%ebx)
    193 
    194 	PUSH(%ebx)
    195 	mov	$DATA_CACHE_SIZE, %ebx
    196 
    197 	cmp	%ebx, %ecx
    198 	jae	L(128bytes_L2_normal)
    199 	subl	$128, %ecx
    200 L(128bytesormore_normal):
    201 	sub	$128, %ecx
    202 	movdqa	%xmm0, (%edx)
    203 	movaps	%xmm0, 0x10(%edx)
    204 	movaps	%xmm0, 0x20(%edx)
    205 	movaps	%xmm0, 0x30(%edx)
    206 	movaps	%xmm0, 0x40(%edx)
    207 	movaps	%xmm0, 0x50(%edx)
    208 	movaps	%xmm0, 0x60(%edx)
    209 	movaps	%xmm0, 0x70(%edx)
    210 	lea	128(%edx), %edx
    211 	jb	L(128bytesless_normal)
    212 
    213 
    214 	sub	$128, %ecx
    215 	movdqa	%xmm0, (%edx)
    216 	movaps	%xmm0, 0x10(%edx)
    217 	movaps	%xmm0, 0x20(%edx)
    218 	movaps	%xmm0, 0x30(%edx)
    219 	movaps	%xmm0, 0x40(%edx)
    220 	movaps	%xmm0, 0x50(%edx)
    221 	movaps	%xmm0, 0x60(%edx)
    222 	movaps	%xmm0, 0x70(%edx)
    223 	lea	128(%edx), %edx
    224 	jae	L(128bytesormore_normal)
    225 
    226 L(128bytesless_normal):
    227 	lea	128(%ecx), %ecx
    228 	POP(%ebx)
    229 	BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
    230 
    231 	ALIGN(4)
    232 L(128bytes_L2_normal):
    233 	prefetchnta	0x380(%edx)
    234 	prefetchnta	0x3c0(%edx)
    235 	sub	$128, %ecx
    236 	movdqa	%xmm0, (%edx)
    237 	movaps	%xmm0, 0x10(%edx)
    238 	movaps	%xmm0, 0x20(%edx)
    239 	movaps	%xmm0, 0x30(%edx)
    240 	movaps	%xmm0, 0x40(%edx)
    241 	movaps	%xmm0, 0x50(%edx)
    242 	movaps	%xmm0, 0x60(%edx)
    243 	movaps	%xmm0, 0x70(%edx)
    244 	add	$128, %edx
    245 	cmp	$128, %ecx
    246 	jae	L(128bytes_L2_normal)
    247 
    248 L(128bytesless_L2_normal):
    249 	POP(%ebx)
    250 	BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
    251 
    252 L(128bytesormore_nt_start):
    253 	sub	%ebx, %ecx
    254 	ALIGN(4)
    255 L(128bytesormore_shared_cache_loop):
    256 	prefetchnta	0x3c0(%edx)
    257 	prefetchnta	0x380(%edx)
    258 	sub	$0x80, %ebx
    259 	movdqa	%xmm0, (%edx)
    260 	movaps	%xmm0, 0x10(%edx)
    261 	movaps	%xmm0, 0x20(%edx)
    262 	movaps	%xmm0, 0x30(%edx)
    263 	movaps	%xmm0, 0x40(%edx)
    264 	movaps	%xmm0, 0x50(%edx)
    265 	movaps	%xmm0, 0x60(%edx)
    266 	movaps	%xmm0, 0x70(%edx)
    267 	add	$0x80, %edx
    268 	cmp	$0x80, %ebx
    269 	jae	L(128bytesormore_shared_cache_loop)
    270 	cmp	$0x80, %ecx
    271 	jb	L(shared_cache_loop_end)
    272 	ALIGN(4)
    273 L(128bytesormore_nt):
    274 	sub	$0x80, %ecx
    275 	movntdq	%xmm0, (%edx)
    276 	movntdq	%xmm0, 0x10(%edx)
    277 	movntdq	%xmm0, 0x20(%edx)
    278 	movntdq	%xmm0, 0x30(%edx)
    279 	movntdq	%xmm0, 0x40(%edx)
    280 	movntdq	%xmm0, 0x50(%edx)
    281 	movntdq	%xmm0, 0x60(%edx)
    282 	movntdq	%xmm0, 0x70(%edx)
    283 	add	$0x80, %edx
    284 	cmp	$0x80, %ecx
    285 	jae	L(128bytesormore_nt)
    286 	sfence
    287 L(shared_cache_loop_end):
    288 	POP(%ebx)
    289 	BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
    290 
    291 
    292 	.pushsection .rodata.sse2,"a",@progbits
    293 	ALIGN(2)
    294 L(table_16_128bytes):
    295 	.int	JMPTBL(L(aligned_16_0bytes), L(table_16_128bytes))
    296 	.int	JMPTBL(L(aligned_16_1bytes), L(table_16_128bytes))
    297 	.int	JMPTBL(L(aligned_16_2bytes), L(table_16_128bytes))
    298 	.int	JMPTBL(L(aligned_16_3bytes), L(table_16_128bytes))
    299 	.int	JMPTBL(L(aligned_16_4bytes), L(table_16_128bytes))
    300 	.int	JMPTBL(L(aligned_16_5bytes), L(table_16_128bytes))
    301 	.int	JMPTBL(L(aligned_16_6bytes), L(table_16_128bytes))
    302 	.int	JMPTBL(L(aligned_16_7bytes), L(table_16_128bytes))
    303 	.int	JMPTBL(L(aligned_16_8bytes), L(table_16_128bytes))
    304 	.int	JMPTBL(L(aligned_16_9bytes), L(table_16_128bytes))
    305 	.int	JMPTBL(L(aligned_16_10bytes), L(table_16_128bytes))
    306 	.int	JMPTBL(L(aligned_16_11bytes), L(table_16_128bytes))
    307 	.int	JMPTBL(L(aligned_16_12bytes), L(table_16_128bytes))
    308 	.int	JMPTBL(L(aligned_16_13bytes), L(table_16_128bytes))
    309 	.int	JMPTBL(L(aligned_16_14bytes), L(table_16_128bytes))
    310 	.int	JMPTBL(L(aligned_16_15bytes), L(table_16_128bytes))
    311 	.int	JMPTBL(L(aligned_16_16bytes), L(table_16_128bytes))
    312 	.int	JMPTBL(L(aligned_16_17bytes), L(table_16_128bytes))
    313 	.int	JMPTBL(L(aligned_16_18bytes), L(table_16_128bytes))
    314 	.int	JMPTBL(L(aligned_16_19bytes), L(table_16_128bytes))
    315 	.int	JMPTBL(L(aligned_16_20bytes), L(table_16_128bytes))
    316 	.int	JMPTBL(L(aligned_16_21bytes), L(table_16_128bytes))
    317 	.int	JMPTBL(L(aligned_16_22bytes), L(table_16_128bytes))
    318 	.int	JMPTBL(L(aligned_16_23bytes), L(table_16_128bytes))
    319 	.int	JMPTBL(L(aligned_16_24bytes), L(table_16_128bytes))
    320 	.int	JMPTBL(L(aligned_16_25bytes), L(table_16_128bytes))
    321 	.int	JMPTBL(L(aligned_16_26bytes), L(table_16_128bytes))
    322 	.int	JMPTBL(L(aligned_16_27bytes), L(table_16_128bytes))
    323 	.int	JMPTBL(L(aligned_16_28bytes), L(table_16_128bytes))
    324 	.int	JMPTBL(L(aligned_16_29bytes), L(table_16_128bytes))
    325 	.int	JMPTBL(L(aligned_16_30bytes), L(table_16_128bytes))
    326 	.int	JMPTBL(L(aligned_16_31bytes), L(table_16_128bytes))
    327 	.int	JMPTBL(L(aligned_16_32bytes), L(table_16_128bytes))
    328 	.int	JMPTBL(L(aligned_16_33bytes), L(table_16_128bytes))
    329 	.int	JMPTBL(L(aligned_16_34bytes), L(table_16_128bytes))
    330 	.int	JMPTBL(L(aligned_16_35bytes), L(table_16_128bytes))
    331 	.int	JMPTBL(L(aligned_16_36bytes), L(table_16_128bytes))
    332 	.int	JMPTBL(L(aligned_16_37bytes), L(table_16_128bytes))
    333 	.int	JMPTBL(L(aligned_16_38bytes), L(table_16_128bytes))
    334 	.int	JMPTBL(L(aligned_16_39bytes), L(table_16_128bytes))
    335 	.int	JMPTBL(L(aligned_16_40bytes), L(table_16_128bytes))
    336 	.int	JMPTBL(L(aligned_16_41bytes), L(table_16_128bytes))
    337 	.int	JMPTBL(L(aligned_16_42bytes), L(table_16_128bytes))
    338 	.int	JMPTBL(L(aligned_16_43bytes), L(table_16_128bytes))
    339 	.int	JMPTBL(L(aligned_16_44bytes), L(table_16_128bytes))
    340 	.int	JMPTBL(L(aligned_16_45bytes), L(table_16_128bytes))
    341 	.int	JMPTBL(L(aligned_16_46bytes), L(table_16_128bytes))
    342 	.int	JMPTBL(L(aligned_16_47bytes), L(table_16_128bytes))
    343 	.int	JMPTBL(L(aligned_16_48bytes), L(table_16_128bytes))
    344 	.int	JMPTBL(L(aligned_16_49bytes), L(table_16_128bytes))
    345 	.int	JMPTBL(L(aligned_16_50bytes), L(table_16_128bytes))
    346 	.int	JMPTBL(L(aligned_16_51bytes), L(table_16_128bytes))
    347 	.int	JMPTBL(L(aligned_16_52bytes), L(table_16_128bytes))
    348 	.int	JMPTBL(L(aligned_16_53bytes), L(table_16_128bytes))
    349 	.int	JMPTBL(L(aligned_16_54bytes), L(table_16_128bytes))
    350 	.int	JMPTBL(L(aligned_16_55bytes), L(table_16_128bytes))
    351 	.int	JMPTBL(L(aligned_16_56bytes), L(table_16_128bytes))
    352 	.int	JMPTBL(L(aligned_16_57bytes), L(table_16_128bytes))
    353 	.int	JMPTBL(L(aligned_16_58bytes), L(table_16_128bytes))
    354 	.int	JMPTBL(L(aligned_16_59bytes), L(table_16_128bytes))
    355 	.int	JMPTBL(L(aligned_16_60bytes), L(table_16_128bytes))
    356 	.int	JMPTBL(L(aligned_16_61bytes), L(table_16_128bytes))
    357 	.int	JMPTBL(L(aligned_16_62bytes), L(table_16_128bytes))
    358 	.int	JMPTBL(L(aligned_16_63bytes), L(table_16_128bytes))
    359 	.int	JMPTBL(L(aligned_16_64bytes), L(table_16_128bytes))
    360 	.int	JMPTBL(L(aligned_16_65bytes), L(table_16_128bytes))
    361 	.int	JMPTBL(L(aligned_16_66bytes), L(table_16_128bytes))
    362 	.int	JMPTBL(L(aligned_16_67bytes), L(table_16_128bytes))
    363 	.int	JMPTBL(L(aligned_16_68bytes), L(table_16_128bytes))
    364 	.int	JMPTBL(L(aligned_16_69bytes), L(table_16_128bytes))
    365 	.int	JMPTBL(L(aligned_16_70bytes), L(table_16_128bytes))
    366 	.int	JMPTBL(L(aligned_16_71bytes), L(table_16_128bytes))
    367 	.int	JMPTBL(L(aligned_16_72bytes), L(table_16_128bytes))
    368 	.int	JMPTBL(L(aligned_16_73bytes), L(table_16_128bytes))
    369 	.int	JMPTBL(L(aligned_16_74bytes), L(table_16_128bytes))
    370 	.int	JMPTBL(L(aligned_16_75bytes), L(table_16_128bytes))
    371 	.int	JMPTBL(L(aligned_16_76bytes), L(table_16_128bytes))
    372 	.int	JMPTBL(L(aligned_16_77bytes), L(table_16_128bytes))
    373 	.int	JMPTBL(L(aligned_16_78bytes), L(table_16_128bytes))
    374 	.int	JMPTBL(L(aligned_16_79bytes), L(table_16_128bytes))
    375 	.int	JMPTBL(L(aligned_16_80bytes), L(table_16_128bytes))
    376 	.int	JMPTBL(L(aligned_16_81bytes), L(table_16_128bytes))
    377 	.int	JMPTBL(L(aligned_16_82bytes), L(table_16_128bytes))
    378 	.int	JMPTBL(L(aligned_16_83bytes), L(table_16_128bytes))
    379 	.int	JMPTBL(L(aligned_16_84bytes), L(table_16_128bytes))
    380 	.int	JMPTBL(L(aligned_16_85bytes), L(table_16_128bytes))
    381 	.int	JMPTBL(L(aligned_16_86bytes), L(table_16_128bytes))
    382 	.int	JMPTBL(L(aligned_16_87bytes), L(table_16_128bytes))
    383 	.int	JMPTBL(L(aligned_16_88bytes), L(table_16_128bytes))
    384 	.int	JMPTBL(L(aligned_16_89bytes), L(table_16_128bytes))
    385 	.int	JMPTBL(L(aligned_16_90bytes), L(table_16_128bytes))
    386 	.int	JMPTBL(L(aligned_16_91bytes), L(table_16_128bytes))
    387 	.int	JMPTBL(L(aligned_16_92bytes), L(table_16_128bytes))
    388 	.int	JMPTBL(L(aligned_16_93bytes), L(table_16_128bytes))
    389 	.int	JMPTBL(L(aligned_16_94bytes), L(table_16_128bytes))
    390 	.int	JMPTBL(L(aligned_16_95bytes), L(table_16_128bytes))
    391 	.int	JMPTBL(L(aligned_16_96bytes), L(table_16_128bytes))
    392 	.int	JMPTBL(L(aligned_16_97bytes), L(table_16_128bytes))
    393 	.int	JMPTBL(L(aligned_16_98bytes), L(table_16_128bytes))
    394 	.int	JMPTBL(L(aligned_16_99bytes), L(table_16_128bytes))
    395 	.int	JMPTBL(L(aligned_16_100bytes), L(table_16_128bytes))
    396 	.int	JMPTBL(L(aligned_16_101bytes), L(table_16_128bytes))
    397 	.int	JMPTBL(L(aligned_16_102bytes), L(table_16_128bytes))
    398 	.int	JMPTBL(L(aligned_16_103bytes), L(table_16_128bytes))
    399 	.int	JMPTBL(L(aligned_16_104bytes), L(table_16_128bytes))
    400 	.int	JMPTBL(L(aligned_16_105bytes), L(table_16_128bytes))
    401 	.int	JMPTBL(L(aligned_16_106bytes), L(table_16_128bytes))
    402 	.int	JMPTBL(L(aligned_16_107bytes), L(table_16_128bytes))
    403 	.int	JMPTBL(L(aligned_16_108bytes), L(table_16_128bytes))
    404 	.int	JMPTBL(L(aligned_16_109bytes), L(table_16_128bytes))
    405 	.int	JMPTBL(L(aligned_16_110bytes), L(table_16_128bytes))
    406 	.int	JMPTBL(L(aligned_16_111bytes), L(table_16_128bytes))
    407 	.int	JMPTBL(L(aligned_16_112bytes), L(table_16_128bytes))
    408 	.int	JMPTBL(L(aligned_16_113bytes), L(table_16_128bytes))
    409 	.int	JMPTBL(L(aligned_16_114bytes), L(table_16_128bytes))
    410 	.int	JMPTBL(L(aligned_16_115bytes), L(table_16_128bytes))
    411 	.int	JMPTBL(L(aligned_16_116bytes), L(table_16_128bytes))
    412 	.int	JMPTBL(L(aligned_16_117bytes), L(table_16_128bytes))
    413 	.int	JMPTBL(L(aligned_16_118bytes), L(table_16_128bytes))
    414 	.int	JMPTBL(L(aligned_16_119bytes), L(table_16_128bytes))
    415 	.int	JMPTBL(L(aligned_16_120bytes), L(table_16_128bytes))
    416 	.int	JMPTBL(L(aligned_16_121bytes), L(table_16_128bytes))
    417 	.int	JMPTBL(L(aligned_16_122bytes), L(table_16_128bytes))
    418 	.int	JMPTBL(L(aligned_16_123bytes), L(table_16_128bytes))
    419 	.int	JMPTBL(L(aligned_16_124bytes), L(table_16_128bytes))
    420 	.int	JMPTBL(L(aligned_16_125bytes), L(table_16_128bytes))
    421 	.int	JMPTBL(L(aligned_16_126bytes), L(table_16_128bytes))
    422 	.int	JMPTBL(L(aligned_16_127bytes), L(table_16_128bytes))
    423 	.popsection
    424 
    425 	ALIGN(4)
    426 L(aligned_16_112bytes):
    427 	movdqa	%xmm0, -112(%edx)
    428 L(aligned_16_96bytes):
    429 	movdqa	%xmm0, -96(%edx)
    430 L(aligned_16_80bytes):
    431 	movdqa	%xmm0, -80(%edx)
    432 L(aligned_16_64bytes):
    433 	movdqa	%xmm0, -64(%edx)
    434 L(aligned_16_48bytes):
    435 	movdqa	%xmm0, -48(%edx)
    436 L(aligned_16_32bytes):
    437 	movdqa	%xmm0, -32(%edx)
    438 L(aligned_16_16bytes):
    439 	movdqa	%xmm0, -16(%edx)
    440 L(aligned_16_0bytes):
    441 	SETRTNVAL
    442 	RETURN
    443 
    444 	ALIGN(4)
    445 L(aligned_16_113bytes):
    446 	movdqa	%xmm0, -113(%edx)
    447 L(aligned_16_97bytes):
    448 	movdqa	%xmm0, -97(%edx)
    449 L(aligned_16_81bytes):
    450 	movdqa	%xmm0, -81(%edx)
    451 L(aligned_16_65bytes):
    452 	movdqa	%xmm0, -65(%edx)
    453 L(aligned_16_49bytes):
    454 	movdqa	%xmm0, -49(%edx)
    455 L(aligned_16_33bytes):
    456 	movdqa	%xmm0, -33(%edx)
    457 L(aligned_16_17bytes):
    458 	movdqa	%xmm0, -17(%edx)
    459 L(aligned_16_1bytes):
    460 	movb	%al, -1(%edx)
    461 	SETRTNVAL
    462 	RETURN
    463 
    464 	ALIGN(4)
    465 L(aligned_16_114bytes):
    466 	movdqa	%xmm0, -114(%edx)
    467 L(aligned_16_98bytes):
    468 	movdqa	%xmm0, -98(%edx)
    469 L(aligned_16_82bytes):
    470 	movdqa	%xmm0, -82(%edx)
    471 L(aligned_16_66bytes):
    472 	movdqa	%xmm0, -66(%edx)
    473 L(aligned_16_50bytes):
    474 	movdqa	%xmm0, -50(%edx)
    475 L(aligned_16_34bytes):
    476 	movdqa	%xmm0, -34(%edx)
    477 L(aligned_16_18bytes):
    478 	movdqa	%xmm0, -18(%edx)
    479 L(aligned_16_2bytes):
    480 	movw	%ax, -2(%edx)
    481 	SETRTNVAL
    482 	RETURN
    483 
    484 	ALIGN(4)
    485 L(aligned_16_115bytes):
    486 	movdqa	%xmm0, -115(%edx)
    487 L(aligned_16_99bytes):
    488 	movdqa	%xmm0, -99(%edx)
    489 L(aligned_16_83bytes):
    490 	movdqa	%xmm0, -83(%edx)
    491 L(aligned_16_67bytes):
    492 	movdqa	%xmm0, -67(%edx)
    493 L(aligned_16_51bytes):
    494 	movdqa	%xmm0, -51(%edx)
    495 L(aligned_16_35bytes):
    496 	movdqa	%xmm0, -35(%edx)
    497 L(aligned_16_19bytes):
    498 	movdqa	%xmm0, -19(%edx)
    499 L(aligned_16_3bytes):
    500 	movw	%ax, -3(%edx)
    501 	movb	%al, -1(%edx)
    502 	SETRTNVAL
    503 	RETURN
    504 
    505 	ALIGN(4)
    506 L(aligned_16_116bytes):
    507 	movdqa	%xmm0, -116(%edx)
    508 L(aligned_16_100bytes):
    509 	movdqa	%xmm0, -100(%edx)
    510 L(aligned_16_84bytes):
    511 	movdqa	%xmm0, -84(%edx)
    512 L(aligned_16_68bytes):
    513 	movdqa	%xmm0, -68(%edx)
    514 L(aligned_16_52bytes):
    515 	movdqa	%xmm0, -52(%edx)
    516 L(aligned_16_36bytes):
    517 	movdqa	%xmm0, -36(%edx)
    518 L(aligned_16_20bytes):
    519 	movdqa	%xmm0, -20(%edx)
    520 L(aligned_16_4bytes):
    521 	movl	%eax, -4(%edx)
    522 	SETRTNVAL
    523 	RETURN
    524 
    525 	ALIGN(4)
    526 L(aligned_16_117bytes):
    527 	movdqa	%xmm0, -117(%edx)
    528 L(aligned_16_101bytes):
    529 	movdqa	%xmm0, -101(%edx)
    530 L(aligned_16_85bytes):
    531 	movdqa	%xmm0, -85(%edx)
    532 L(aligned_16_69bytes):
    533 	movdqa	%xmm0, -69(%edx)
    534 L(aligned_16_53bytes):
    535 	movdqa	%xmm0, -53(%edx)
    536 L(aligned_16_37bytes):
    537 	movdqa	%xmm0, -37(%edx)
    538 L(aligned_16_21bytes):
    539 	movdqa	%xmm0, -21(%edx)
    540 L(aligned_16_5bytes):
    541 	movl	%eax, -5(%edx)
    542 	movb	%al, -1(%edx)
    543 	SETRTNVAL
    544 	RETURN
    545 
    546 	ALIGN(4)
    547 L(aligned_16_118bytes):
    548 	movdqa	%xmm0, -118(%edx)
    549 L(aligned_16_102bytes):
    550 	movdqa	%xmm0, -102(%edx)
    551 L(aligned_16_86bytes):
    552 	movdqa	%xmm0, -86(%edx)
    553 L(aligned_16_70bytes):
    554 	movdqa	%xmm0, -70(%edx)
    555 L(aligned_16_54bytes):
    556 	movdqa	%xmm0, -54(%edx)
    557 L(aligned_16_38bytes):
    558 	movdqa	%xmm0, -38(%edx)
    559 L(aligned_16_22bytes):
    560 	movdqa	%xmm0, -22(%edx)
    561 L(aligned_16_6bytes):
    562 	movl	%eax, -6(%edx)
    563 	movw	%ax, -2(%edx)
    564 	SETRTNVAL
    565 	RETURN
    566 
    567 	ALIGN(4)
    568 L(aligned_16_119bytes):
    569 	movdqa	%xmm0, -119(%edx)
    570 L(aligned_16_103bytes):
    571 	movdqa	%xmm0, -103(%edx)
    572 L(aligned_16_87bytes):
    573 	movdqa	%xmm0, -87(%edx)
    574 L(aligned_16_71bytes):
    575 	movdqa	%xmm0, -71(%edx)
    576 L(aligned_16_55bytes):
    577 	movdqa	%xmm0, -55(%edx)
    578 L(aligned_16_39bytes):
    579 	movdqa	%xmm0, -39(%edx)
    580 L(aligned_16_23bytes):
    581 	movdqa	%xmm0, -23(%edx)
    582 L(aligned_16_7bytes):
    583 	movl	%eax, -7(%edx)
    584 	movw	%ax, -3(%edx)
    585 	movb	%al, -1(%edx)
    586 	SETRTNVAL
    587 	RETURN
    588 
    589 	ALIGN(4)
    590 L(aligned_16_120bytes):
    591 	movdqa	%xmm0, -120(%edx)
    592 L(aligned_16_104bytes):
    593 	movdqa	%xmm0, -104(%edx)
    594 L(aligned_16_88bytes):
    595 	movdqa	%xmm0, -88(%edx)
    596 L(aligned_16_72bytes):
    597 	movdqa	%xmm0, -72(%edx)
    598 L(aligned_16_56bytes):
    599 	movdqa	%xmm0, -56(%edx)
    600 L(aligned_16_40bytes):
    601 	movdqa	%xmm0, -40(%edx)
    602 L(aligned_16_24bytes):
    603 	movdqa	%xmm0, -24(%edx)
    604 L(aligned_16_8bytes):
    605 	movq	%xmm0, -8(%edx)
    606 	SETRTNVAL
    607 	RETURN
    608 
    609 	ALIGN(4)
    610 L(aligned_16_121bytes):
    611 	movdqa	%xmm0, -121(%edx)
    612 L(aligned_16_105bytes):
    613 	movdqa	%xmm0, -105(%edx)
    614 L(aligned_16_89bytes):
    615 	movdqa	%xmm0, -89(%edx)
    616 L(aligned_16_73bytes):
    617 	movdqa	%xmm0, -73(%edx)
    618 L(aligned_16_57bytes):
    619 	movdqa	%xmm0, -57(%edx)
    620 L(aligned_16_41bytes):
    621 	movdqa	%xmm0, -41(%edx)
    622 L(aligned_16_25bytes):
    623 	movdqa	%xmm0, -25(%edx)
    624 L(aligned_16_9bytes):
    625 	movq	%xmm0, -9(%edx)
    626 	movb	%al, -1(%edx)
    627 	SETRTNVAL
    628 	RETURN
    629 
    630 	ALIGN(4)
    631 L(aligned_16_122bytes):
    632 	movdqa	%xmm0, -122(%edx)
    633 L(aligned_16_106bytes):
    634 	movdqa	%xmm0, -106(%edx)
    635 L(aligned_16_90bytes):
    636 	movdqa	%xmm0, -90(%edx)
    637 L(aligned_16_74bytes):
    638 	movdqa	%xmm0, -74(%edx)
    639 L(aligned_16_58bytes):
    640 	movdqa	%xmm0, -58(%edx)
    641 L(aligned_16_42bytes):
    642 	movdqa	%xmm0, -42(%edx)
    643 L(aligned_16_26bytes):
    644 	movdqa	%xmm0, -26(%edx)
    645 L(aligned_16_10bytes):
    646 	movq	%xmm0, -10(%edx)
    647 	movw	%ax, -2(%edx)
    648 	SETRTNVAL
    649 	RETURN
    650 
    651 	ALIGN(4)
    652 L(aligned_16_123bytes):
    653 	movdqa	%xmm0, -123(%edx)
    654 L(aligned_16_107bytes):
    655 	movdqa	%xmm0, -107(%edx)
    656 L(aligned_16_91bytes):
    657 	movdqa	%xmm0, -91(%edx)
    658 L(aligned_16_75bytes):
    659 	movdqa	%xmm0, -75(%edx)
    660 L(aligned_16_59bytes):
    661 	movdqa	%xmm0, -59(%edx)
    662 L(aligned_16_43bytes):
    663 	movdqa	%xmm0, -43(%edx)
    664 L(aligned_16_27bytes):
    665 	movdqa	%xmm0, -27(%edx)
    666 L(aligned_16_11bytes):
    667 	movq	%xmm0, -11(%edx)
    668 	movw	%ax, -3(%edx)
    669 	movb	%al, -1(%edx)
    670 	SETRTNVAL
    671 	RETURN
    672 
    673 	ALIGN(4)
    674 L(aligned_16_124bytes):
    675 	movdqa	%xmm0, -124(%edx)
    676 L(aligned_16_108bytes):
    677 	movdqa	%xmm0, -108(%edx)
    678 L(aligned_16_92bytes):
    679 	movdqa	%xmm0, -92(%edx)
    680 L(aligned_16_76bytes):
    681 	movdqa	%xmm0, -76(%edx)
    682 L(aligned_16_60bytes):
    683 	movdqa	%xmm0, -60(%edx)
    684 L(aligned_16_44bytes):
    685 	movdqa	%xmm0, -44(%edx)
    686 L(aligned_16_28bytes):
    687 	movdqa	%xmm0, -28(%edx)
    688 L(aligned_16_12bytes):
    689 	movq	%xmm0, -12(%edx)
    690 	movl	%eax, -4(%edx)
    691 	SETRTNVAL
    692 	RETURN
    693 
    694 	ALIGN(4)
    695 L(aligned_16_125bytes):
    696 	movdqa	%xmm0, -125(%edx)
    697 L(aligned_16_109bytes):
    698 	movdqa	%xmm0, -109(%edx)
    699 L(aligned_16_93bytes):
    700 	movdqa	%xmm0, -93(%edx)
    701 L(aligned_16_77bytes):
    702 	movdqa	%xmm0, -77(%edx)
    703 L(aligned_16_61bytes):
    704 	movdqa	%xmm0, -61(%edx)
    705 L(aligned_16_45bytes):
    706 	movdqa	%xmm0, -45(%edx)
    707 L(aligned_16_29bytes):
    708 	movdqa	%xmm0, -29(%edx)
    709 L(aligned_16_13bytes):
    710 	movq	%xmm0, -13(%edx)
    711 	movl	%eax, -5(%edx)
    712 	movb	%al, -1(%edx)
    713 	SETRTNVAL
    714 	RETURN
    715 
    716 	ALIGN(4)
    717 L(aligned_16_126bytes):
    718 	movdqa	%xmm0, -126(%edx)
    719 L(aligned_16_110bytes):
    720 	movdqa	%xmm0, -110(%edx)
    721 L(aligned_16_94bytes):
    722 	movdqa	%xmm0, -94(%edx)
    723 L(aligned_16_78bytes):
    724 	movdqa	%xmm0, -78(%edx)
    725 L(aligned_16_62bytes):
    726 	movdqa	%xmm0, -62(%edx)
    727 L(aligned_16_46bytes):
    728 	movdqa	%xmm0, -46(%edx)
    729 L(aligned_16_30bytes):
    730 	movdqa	%xmm0, -30(%edx)
    731 L(aligned_16_14bytes):
    732 	movq	%xmm0, -14(%edx)
    733 	movl	%eax, -6(%edx)
    734 	movw	%ax, -2(%edx)
    735 	SETRTNVAL
    736 	RETURN
    737 
    738 	ALIGN(4)
    739 L(aligned_16_127bytes):
    740 	movdqa	%xmm0, -127(%edx)
    741 L(aligned_16_111bytes):
    742 	movdqa	%xmm0, -111(%edx)
    743 L(aligned_16_95bytes):
    744 	movdqa	%xmm0, -95(%edx)
    745 L(aligned_16_79bytes):
    746 	movdqa	%xmm0, -79(%edx)
    747 L(aligned_16_63bytes):
    748 	movdqa	%xmm0, -63(%edx)
    749 L(aligned_16_47bytes):
    750 	movdqa	%xmm0, -47(%edx)
    751 L(aligned_16_31bytes):
    752 	movdqa	%xmm0, -31(%edx)
    753 L(aligned_16_15bytes):
    754 	movq	%xmm0, -15(%edx)
    755 	movl	%eax, -7(%edx)
    756 	movw	%ax, -3(%edx)
    757 	movb	%al, -1(%edx)
    758 	SETRTNVAL
    759 	RETURN_END
    760 
    761 END(memset_generic)
    762