Home | History | Annotate | Download | only in string
      1 /*
      2 Copyright (c) 2014, Intel Corporation
      3 All rights reserved.
      4 
      5 Redistribution and use in source and binary forms, with or without
      6 modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9     * this list of conditions and the following disclaimer.
     10 
     11     * Redistributions in binary form must reproduce the above copyright notice,
     12     * this list of conditions and the following disclaimer in the documentation
     13     * and/or other materials provided with the distribution.
     14 
     15     * Neither the name of Intel Corporation nor the names of its contributors
     16     * may be used to endorse or promote products derived from this software
     17     * without specific prior written permission.
     18 
     19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 */
     30 
     31 #include <private/bionic_asm.h>
     32 
     33 #include "cache.h"
     34 
     35 #ifndef L
     36 # define L(label)	.L##label
     37 #endif
     38 
     39 #ifndef ALIGN
     40 # define ALIGN(n)	.p2align n
     41 #endif
     42 
     43 
     44 ENTRY(__memset_chk)
     45   # %rdi = dst, %rsi = byte, %rdx = n, %rcx = dst_len
     46   cmp %rcx, %rdx
     47   ja __memset_chk_fail
     48   // Fall through to memset...
     49 END(__memset_chk)
     50 
     51 
     52 	.section .text.sse2,"ax",@progbits
     53 ENTRY(memset)
     54 	movq	%rdi, %rax
     55 	and	$0xff, %rsi
     56 	mov	$0x0101010101010101, %rcx
     57 	imul	%rsi, %rcx
     58 	cmpq	$16, %rdx
     59 	jae	L(16bytesormore)
     60 	testb	$8, %dl
     61 	jnz	L(8_15bytes)
     62 	testb	$4, %dl
     63 	jnz	L(4_7bytes)
     64 	testb	$2, %dl
     65 	jnz	L(2_3bytes)
     66 	testb	$1, %dl
     67 	jz	L(return)
     68 	movb	%cl, (%rdi)
     69 L(return):
     70 	ret
     71 
     72 L(8_15bytes):
     73 	movq	%rcx, (%rdi)
     74 	movq	%rcx, -8(%rdi, %rdx)
     75 	ret
     76 
     77 L(4_7bytes):
     78 	movl	%ecx, (%rdi)
     79 	movl	%ecx, -4(%rdi, %rdx)
     80 	ret
     81 
     82 L(2_3bytes):
     83 	movw	%cx, (%rdi)
     84 	movw	%cx, -2(%rdi, %rdx)
     85 	ret
     86 
     87 	ALIGN (4)
     88 L(16bytesormore):
     89 	movd	%rcx, %xmm0
     90 	pshufd	$0, %xmm0, %xmm0
     91 	movdqu	%xmm0, (%rdi)
     92 	movdqu	%xmm0, -16(%rdi, %rdx)
     93 	cmpq	$32, %rdx
     94 	jbe	L(32bytesless)
     95 	movdqu	%xmm0, 16(%rdi)
     96 	movdqu	%xmm0, -32(%rdi, %rdx)
     97 	cmpq	$64, %rdx
     98 	jbe	L(64bytesless)
     99 	movdqu	%xmm0, 32(%rdi)
    100 	movdqu	%xmm0, 48(%rdi)
    101 	movdqu	%xmm0, -64(%rdi, %rdx)
    102 	movdqu	%xmm0, -48(%rdi, %rdx)
    103 	cmpq	$128, %rdx
    104 	ja	L(128bytesmore)
    105 L(32bytesless):
    106 L(64bytesless):
    107 	ret
    108 
    109 	ALIGN (4)
    110 L(128bytesmore):
    111 	leaq	64(%rdi), %rcx
    112 	andq	$-64, %rcx
    113 	movq	%rdx, %r8
    114 	addq	%rdi, %rdx
    115 	andq	$-64, %rdx
    116 	cmpq	%rcx, %rdx
    117 	je	L(return)
    118 
    119 #ifdef SHARED_CACHE_SIZE
    120 	cmp	$SHARED_CACHE_SIZE, %r8
    121 #else
    122 	cmp	__x86_64_shared_cache_size(%rip), %r8
    123 #endif
    124 	ja	L(128bytesmore_nt)
    125 
    126 	ALIGN (4)
    127 L(128bytesmore_normal):
    128 	movdqa	%xmm0, (%rcx)
    129 	movaps	%xmm0, 0x10(%rcx)
    130 	movaps	%xmm0, 0x20(%rcx)
    131 	movaps	%xmm0, 0x30(%rcx)
    132 	addq	$64, %rcx
    133 	cmpq	%rcx, %rdx
    134 	jne	L(128bytesmore_normal)
    135 	ret
    136 
    137 	ALIGN (4)
    138 L(128bytesmore_nt):
    139 	movntdq	%xmm0, (%rcx)
    140 	movntdq	%xmm0, 0x10(%rcx)
    141 	movntdq	%xmm0, 0x20(%rcx)
    142 	movntdq	%xmm0, 0x30(%rcx)
    143 	leaq	64(%rcx), %rcx
    144 	cmpq	%rcx, %rdx
    145 	jne	L(128bytesmore_nt)
    146 	sfence
    147 	ret
    148 
    149 END(memset)
    150