Home | History | Annotate | Download | only in bionic
      1 /* Copyright (c) 2012-2013, Linaro Limited
      2    All rights reserved.
      3 
      4    Redistribution and use in source and binary forms, with or without
      5    modification, are permitted provided that the following conditions are met:
      6        * Redistributions of source code must retain the above copyright
      7          notice, this list of conditions and the following disclaimer.
      8        * Redistributions in binary form must reproduce the above copyright
      9          notice, this list of conditions and the following disclaimer in the
     10          documentation and/or other materials provided with the distribution.
     11        * Neither the name of the Linaro nor the
     12          names of its contributors may be used to endorse or promote products
     13          derived from this software without specific prior written permission.
     14 
     15    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     16    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     17    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     18    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     19    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     20    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     21    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     22    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     23    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
     26 
     27 /*
     28  * Copyright (c) 2015 ARM Ltd
     29  * All rights reserved.
     30  *
     31  * Redistribution and use in source and binary forms, with or without
     32  * modification, are permitted provided that the following conditions
     33  * are met:
     34  * 1. Redistributions of source code must retain the above copyright
     35  *    notice, this list of conditions and the following disclaimer.
     36  * 2. Redistributions in binary form must reproduce the above copyright
     37  *    notice, this list of conditions and the following disclaimer in the
     38  *    documentation and/or other materials provided with the distribution.
     39  * 3. The name of the company may not be used to endorse or promote
     40  *    products derived from this software without specific prior written
     41  *    permission.
     42  *
     43  * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
     44  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
     45  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     46  * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     47  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     48  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     49  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     50  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     51  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     52  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     53  */
     54 
     55 /* Assumptions:
     56  *
     57  * ARMv8-a, AArch64, unaligned accesses
     58  *
     59  */
     60 
     61 #include <private/bionic_asm.h>
     62 
     63 /* By default we assume that the DC instruction can be used to zero
     64    data blocks more efficiently.  In some circumstances this might be
     65    unsafe, for example in an asymmetric multiprocessor environment with
     66    different DC clear lengths (neither the upper nor lower lengths are
     67    safe to use).
     68 
     69    If code may be run in a virtualized environment, then define
     70    MAYBE_VIRT.  This will cause the code to cache the system register
     71    values rather than re-reading them each call.  */
     72 
     73 #define dstin		x0
     74 #define val		x1
     75 #define valw		w1
     76 #define count		x2
     77 #define dst 		x3
     78 #define dstend		x4
     79 #define tmp1		x5
     80 #define tmp1w		w5
     81 #define tmp2		x6
     82 #define tmp2w		w6
     83 #define zva_len		x7
     84 #define zva_lenw	w7
     85 
     86 #define L(l) .L ## l
     87 
     88 ENTRY(__memset_chk)
     89   cmp count, dst
     90   bls memset
     91 
     92   // Preserve for accurate backtrace.
     93   stp x29, x30, [sp, -16]!
     94   .cfi_def_cfa_offset 16
     95   .cfi_rel_offset x29, 0
     96   .cfi_rel_offset x30, 8
     97 
     98   bl __memset_chk_fail
     99 END(__memset_chk)
    100 
    101 ENTRY(memset)
    102 
    103 	dup	v0.16B, valw
    104 	add	dstend, dstin, count
    105 
    106 	cmp	count, 96
    107 	b.hi	L(set_long)
    108 	cmp	count, 16
    109 	b.hs	L(set_medium)
    110 	mov	val, v0.D[0]
    111 
    112 	/* Set 0..15 bytes.  */
    113 	tbz	count, 3, 1f
    114 	str	val, [dstin]
    115 	str	val, [dstend, -8]
    116 	ret
    117 	nop
    118 1:	tbz	count, 2, 2f
    119 	str	valw, [dstin]
    120 	str	valw, [dstend, -4]
    121 	ret
    122 2:	cbz	count, 3f
    123 	strb	valw, [dstin]
    124 	tbz	count, 1, 3f
    125 	strh	valw, [dstend, -2]
    126 3:	ret
    127 
    128 	/* Set 17..96 bytes.  */
    129 L(set_medium):
    130 	str	q0, [dstin]
    131 	tbnz	count, 6, L(set96)
    132 	str	q0, [dstend, -16]
    133 	tbz	count, 5, 1f
    134 	str	q0, [dstin, 16]
    135 	str	q0, [dstend, -32]
    136 1:	ret
    137 
    138 	.p2align 4
    139 	/* Set 64..96 bytes.  Write 64 bytes from the start and
    140 	   32 bytes from the end.  */
    141 L(set96):
    142 	str	q0, [dstin, 16]
    143 	stp	q0, q0, [dstin, 32]
    144 	stp	q0, q0, [dstend, -32]
    145 	ret
    146 
    147 	.p2align 3
    148 	nop
    149 L(set_long):
    150 	and	valw, valw, 255
    151 	bic	dst, dstin, 15
    152 	str	q0, [dstin]
    153 	cmp	count, 256
    154 	ccmp	valw, 0, 0, cs
    155 	b.eq	L(try_zva)
    156 L(no_zva):
    157 	sub	count, dstend, dst	/* Count is 16 too large.  */
    158 	add	dst, dst, 16
    159 	sub	count, count, 64 + 16	/* Adjust count and bias for loop.  */
    160 1:	stp	q0, q0, [dst], 64
    161 	stp	q0, q0, [dst, -32]
    162 L(tail64):
    163 	subs	count, count, 64
    164 	b.hi	1b
    165 2:	stp	q0, q0, [dstend, -64]
    166 	stp	q0, q0, [dstend, -32]
    167 	ret
    168 
    169 	.p2align 3
    170 L(try_zva):
    171 	mrs	tmp1, dczid_el0
    172 	tbnz	tmp1w, 4, L(no_zva)
    173 	and	tmp1w, tmp1w, 15
    174 	cmp	tmp1w, 4	/* ZVA size is 64 bytes.  */
    175 	b.ne	 L(zva_128)
    176 
    177 	/* Write the first and last 64 byte aligned block using stp rather
    178 	   than using DC ZVA.  This is faster on some cores.
    179 	 */
    180 L(zva_64):
    181 	str	q0, [dst, 16]
    182 	stp	q0, q0, [dst, 32]
    183 	bic	dst, dst, 63
    184 	stp	q0, q0, [dst, 64]
    185 	stp	q0, q0, [dst, 96]
    186 	sub	count, dstend, dst	/* Count is now 128 too large.	*/
    187 	sub	count, count, 128+64+64	/* Adjust count and bias for loop.  */
    188 	add	dst, dst, 128
    189 	nop
    190 1:	dc	zva, dst
    191 	add	dst, dst, 64
    192 	subs	count, count, 64
    193 	b.hi	1b
    194 	stp	q0, q0, [dst, 0]
    195 	stp	q0, q0, [dst, 32]
    196 	stp	q0, q0, [dstend, -64]
    197 	stp	q0, q0, [dstend, -32]
    198 	ret
    199 
    200 	.p2align 3
    201 L(zva_128):
    202 	cmp	tmp1w, 5	/* ZVA size is 128 bytes.  */
    203 	b.ne	L(zva_other)
    204 
    205 	str	q0, [dst, 16]
    206 	stp	q0, q0, [dst, 32]
    207 	stp	q0, q0, [dst, 64]
    208 	stp	q0, q0, [dst, 96]
    209 	bic	dst, dst, 127
    210 	sub	count, dstend, dst	/* Count is now 128 too large.	*/
    211 	sub	count, count, 128+128	/* Adjust count and bias for loop.  */
    212 	add	dst, dst, 128
    213 1:	dc	zva, dst
    214 	add	dst, dst, 128
    215 	subs	count, count, 128
    216 	b.hi	1b
    217 	stp	q0, q0, [dstend, -128]
    218 	stp	q0, q0, [dstend, -96]
    219 	stp	q0, q0, [dstend, -64]
    220 	stp	q0, q0, [dstend, -32]
    221 	ret
    222 
    223 L(zva_other):
    224 	mov	tmp2w, 4
    225 	lsl	zva_lenw, tmp2w, tmp1w
    226 	add	tmp1, zva_len, 64	/* Max alignment bytes written.	 */
    227 	cmp	count, tmp1
    228 	blo	L(no_zva)
    229 
    230 	sub	tmp2, zva_len, 1
    231 	add	tmp1, dst, zva_len
    232 	add	dst, dst, 16
    233 	subs	count, tmp1, dst	/* Actual alignment bytes to write.  */
    234 	bic	tmp1, tmp1, tmp2	/* Aligned dc zva start address.  */
    235 	beq	2f
    236 1:	stp	q0, q0, [dst], 64
    237 	stp	q0, q0, [dst, -32]
    238 	subs	count, count, 64
    239 	b.hi	1b
    240 2:	mov	dst, tmp1
    241 	sub	count, dstend, tmp1	/* Remaining bytes to write.  */
    242 	subs	count, count, zva_len
    243 	b.lo	4f
    244 3:	dc	zva, dst
    245 	add	dst, dst, zva_len
    246 	subs	count, count, zva_len
    247 	b.hs	3b
    248 4:	add	count, count, zva_len
    249 	b	L(tail64)
    250 
    251 END(memset)
    252