Home | History | Annotate | Download | only in string
      1 /*
      2 Copyright (c) 2014, Intel Corporation
      3 All rights reserved.
      4 
      5 Redistribution and use in source and binary forms, with or without
      6 modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice,
      9     * this list of conditions and the following disclaimer.
     10 
     11     * Redistributions in binary form must reproduce the above copyright notice,
     12     * this list of conditions and the following disclaimer in the documentation
     13     * and/or other materials provided with the distribution.
     14 
     15     * Neither the name of Intel Corporation nor the names of its contributors
     16     * may be used to endorse or promote products derived from this software
     17     * without specific prior written permission.
     18 
     19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
     20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     22 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
     23 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     24 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     26 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     28 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 */
     30 
     31 #include "cache.h"
     32 
     33 #ifndef MEMSET
     34 # define MEMSET		memset
     35 #endif
     36 
     37 #ifndef L
     38 # define L(label)	.L##label
     39 #endif
     40 
     41 #ifndef ALIGN
     42 # define ALIGN(n)	.p2align n
     43 #endif
     44 
     45 #ifndef cfi_startproc
     46 # define cfi_startproc			.cfi_startproc
     47 #endif
     48 
     49 #ifndef cfi_endproc
     50 # define cfi_endproc			.cfi_endproc
     51 #endif
     52 
     53 #ifndef ENTRY
     54 # define ENTRY(name)			\
     55 	.type name,  @function;	\
     56 	.globl name;			\
     57 name:					\
     58 	cfi_startproc
     59 #endif
     60 
     61 #ifndef END
     62 # define END(name)			\
     63 	cfi_endproc;			\
     64 	.size name, .-name
     65 #endif
     66 
     67 	.section .text.sse2,"ax",@progbits
     68 ENTRY (MEMSET)
     69 	movq	%rdi, %rax
     70 #ifdef USE_AS_BZERO_P
     71 	mov	%rsi, %rdx
     72 	xor	%rcx, %rcx
     73 #else
     74 	and	$0xff, %rsi
     75 	mov	$0x0101010101010101, %rcx
     76 	imul	%rsi, %rcx
     77 #endif
     78 	cmpq	$16, %rdx
     79 	jae	L(16bytesormore)
     80 	testb	$8, %dl
     81 	jnz	L(8_15bytes)
     82 	testb	$4, %dl
     83 	jnz	L(4_7bytes)
     84 	testb	$2, %dl
     85 	jnz	L(2_3bytes)
     86 	testb	$1, %dl
     87 	jz	L(return)
     88 	movb	%cl, (%rdi)
     89 L(return):
     90 	ret
     91 
     92 L(8_15bytes):
     93 	movq	%rcx, (%rdi)
     94 	movq	%rcx, -8(%rdi, %rdx)
     95 	ret
     96 
     97 L(4_7bytes):
     98 	movl	%ecx, (%rdi)
     99 	movl	%ecx, -4(%rdi, %rdx)
    100 	ret
    101 
    102 L(2_3bytes):
    103 	movw	%cx, (%rdi)
    104 	movw	%cx, -2(%rdi, %rdx)
    105 	ret
    106 
    107 	ALIGN (4)
    108 L(16bytesormore):
    109 #ifdef USE_AS_BZERO_P
    110 	pxor	%xmm0, %xmm0
    111 #else
    112 	movd	%rcx, %xmm0
    113 	pshufd	$0, %xmm0, %xmm0
    114 #endif
    115 	movdqu	%xmm0, (%rdi)
    116 	movdqu	%xmm0, -16(%rdi, %rdx)
    117 	cmpq	$32, %rdx
    118 	jbe	L(32bytesless)
    119 	movdqu	%xmm0, 16(%rdi)
    120 	movdqu	%xmm0, -32(%rdi, %rdx)
    121 	cmpq	$64, %rdx
    122 	jbe	L(64bytesless)
    123 	movdqu	%xmm0, 32(%rdi)
    124 	movdqu	%xmm0, 48(%rdi)
    125 	movdqu	%xmm0, -64(%rdi, %rdx)
    126 	movdqu	%xmm0, -48(%rdi, %rdx)
    127 	cmpq	$128, %rdx
    128 	ja	L(128bytesmore)
    129 L(32bytesless):
    130 L(64bytesless):
    131 	ret
    132 
    133 	ALIGN (4)
    134 L(128bytesmore):
    135 	leaq	64(%rdi), %rcx
    136 	andq	$-64, %rcx
    137 	movq	%rdx, %r8
    138 	addq	%rdi, %rdx
    139 	andq	$-64, %rdx
    140 	cmpq	%rcx, %rdx
    141 	je	L(return)
    142 
    143 #ifdef SHARED_CACHE_SIZE
    144 	cmp	$SHARED_CACHE_SIZE, %r8
    145 #else
    146 	cmp	__x86_64_shared_cache_size(%rip), %r8
    147 #endif
    148 	ja	L(128bytesmore_nt)
    149 
    150 	ALIGN (4)
    151 L(128bytesmore_normal):
    152 	movdqa	%xmm0, (%rcx)
    153 	movaps	%xmm0, 0x10(%rcx)
    154 	movaps	%xmm0, 0x20(%rcx)
    155 	movaps	%xmm0, 0x30(%rcx)
    156 	addq	$64, %rcx
    157 	cmpq	%rcx, %rdx
    158 	jne	L(128bytesmore_normal)
    159 	ret
    160 
    161 	ALIGN (4)
    162 L(128bytesmore_nt):
    163 	movntdq	%xmm0, (%rcx)
    164 	movntdq	%xmm0, 0x10(%rcx)
    165 	movntdq	%xmm0, 0x20(%rcx)
    166 	movntdq	%xmm0, 0x30(%rcx)
    167 	leaq	64(%rcx), %rcx
    168 	cmpq	%rcx, %rdx
    169 	jne	L(128bytesmore_nt)
    170 	sfence
    171 	ret
    172 
    173 END (MEMSET)
    174