Home | History | Annotate | Download | only in lib
      1 /*
      2  *  arch/xtensa/lib/memset.S
      3  *
      4  *  ANSI C standard library function memset
      5  *  (Well, almost.  .fixup code might return zero.)
      6  *
      7  *  This file is subject to the terms and conditions of the GNU General
      8  *  Public License.  See the file "COPYING" in the main directory of
      9  *  this archive for more details.
     10  *
     11  *  Copyright (C) 2002 Tensilica Inc.
     12  */
     13 
     14 #include <variant/core.h>
     15 
     16 /*
     17  * void *memset(void *dst, int c, size_t length)
     18  *
     19  * The algorithm is as follows:
     20  *   Create a word with c in all byte positions
     21  *   If the destination is aligned,
     22  *     do 16B chucks with a loop, and then finish up with
     23  *     8B, 4B, 2B, and 1B stores conditional on the length.
     24  *   If destination is unaligned, align it by conditionally
     25  *     setting 1B and 2B and then go to aligned case.
     26  *   This code tries to use fall-through branches for the common
     27  *     case of an aligned destination (except for the branches to
     28  *     the alignment labels).
     29  */
     30 
     31 /* Load or store instructions that may cause exceptions use the EX macro. */
     32 
     33 #define EX(insn,reg1,reg2,offset,handler)	\
     34 9:	insn	reg1, reg2, offset;		\
     35 	.section __ex_table, "a";		\
     36 	.word	9b, handler;			\
     37 	.previous
     38 
     39 
     40 .text
     41 .align	4
     42 .global	memset
     43 .type	memset,@function
     44 memset:
     45 	entry	sp, 16		# minimal stack frame
     46 	# a2/ dst, a3/ c, a4/ length
     47 	extui	a3, a3, 0, 8	# mask to just 8 bits
     48 	slli	a7, a3, 8	# duplicate character in all bytes of word
     49 	or	a3, a3, a7	# ...
     50 	slli	a7, a3, 16	# ...
     51 	or	a3, a3, a7	# ...
     52 	mov	a5, a2		# copy dst so that a2 is return value
     53 	movi	a6, 3		# for alignment tests
     54 	bany	a2, a6, .Ldstunaligned # if dst is unaligned
     55 .L0:	# return here from .Ldstunaligned when dst is aligned
     56 	srli	a7, a4, 4	# number of loop iterations with 16B
     57 				# per iteration
     58 	bnez	a4, .Laligned
     59 	retw
     60 
     61 /*
     62  * Destination is word-aligned.
     63  */
     64 	# set 16 bytes per iteration for word-aligned dst
     65 	.align	4		# 1 mod 4 alignment for LOOPNEZ
     66 	.byte	0		# (0 mod 4 alignment for LBEG)
     67 .Laligned:
     68 #if XCHAL_HAVE_LOOPS
     69 	loopnez	a7, .Loop1done
     70 #else /* !XCHAL_HAVE_LOOPS */
     71 	beqz	a7, .Loop1done
     72 	slli	a6, a7, 4
     73 	add	a6, a6, a5	# a6 = end of last 16B chunk
     74 #endif /* !XCHAL_HAVE_LOOPS */
     75 .Loop1:
     76 	EX(s32i, a3, a5,  0, memset_fixup)
     77 	EX(s32i, a3, a5,  4, memset_fixup)
     78 	EX(s32i, a3, a5,  8, memset_fixup)
     79 	EX(s32i, a3, a5, 12, memset_fixup)
     80 	addi	a5, a5, 16
     81 #if !XCHAL_HAVE_LOOPS
     82 	blt	a5, a6, .Loop1
     83 #endif /* !XCHAL_HAVE_LOOPS */
     84 .Loop1done:
     85 	bbci.l	a4, 3, .L2
     86 	# set 8 bytes
     87 	EX(s32i, a3, a5,  0, memset_fixup)
     88 	EX(s32i, a3, a5,  4, memset_fixup)
     89 	addi	a5, a5,  8
     90 .L2:
     91 	bbci.l	a4, 2, .L3
     92 	# set 4 bytes
     93 	EX(s32i, a3, a5,  0, memset_fixup)
     94 	addi	a5, a5,  4
     95 .L3:
     96 	bbci.l	a4, 1, .L4
     97 	# set 2 bytes
     98 	EX(s16i, a3, a5,  0, memset_fixup)
     99 	addi	a5, a5,  2
    100 .L4:
    101 	bbci.l	a4, 0, .L5
    102 	# set 1 byte
    103 	EX(s8i, a3, a5,  0, memset_fixup)
    104 .L5:
    105 .Lret1:
    106 	retw
    107 
    108 /*
    109  * Destination is unaligned
    110  */
    111 
    112 .Ldstunaligned:
    113 	bltui	a4, 8, .Lbyteset	# do short copies byte by byte
    114 	bbci.l	a5, 0, .L20		# branch if dst alignment half-aligned
    115 	# dst is only byte aligned
    116 	# set 1 byte
    117 	EX(s8i, a3, a5,  0, memset_fixup)
    118 	addi	a5, a5,  1
    119 	addi	a4, a4, -1
    120 	# now retest if dst aligned
    121 	bbci.l	a5, 1, .L0	# if now aligned, return to main algorithm
    122 .L20:
    123 	# dst half-aligned
    124 	# set 2 bytes
    125 	EX(s16i, a3, a5,  0, memset_fixup)
    126 	addi	a5, a5,  2
    127 	addi	a4, a4, -2
    128 	j	.L0		# dst is now aligned, return to main algorithm
    129 
    130 /*
    131  * Byte by byte set
    132  */
    133 	.align	4
    134 	.byte	0		# 1 mod 4 alignment for LOOPNEZ
    135 				# (0 mod 4 alignment for LBEG)
    136 .Lbyteset:
    137 #if XCHAL_HAVE_LOOPS
    138 	loopnez	a4, .Lbytesetdone
    139 #else /* !XCHAL_HAVE_LOOPS */
    140 	beqz	a4, .Lbytesetdone
    141 	add	a6, a5, a4	# a6 = ending address
    142 #endif /* !XCHAL_HAVE_LOOPS */
    143 .Lbyteloop:
    144 	EX(s8i, a3, a5, 0, memset_fixup)
    145 	addi	a5, a5, 1
    146 #if !XCHAL_HAVE_LOOPS
    147 	blt	a5, a6, .Lbyteloop
    148 #endif /* !XCHAL_HAVE_LOOPS */
    149 .Lbytesetdone:
    150 	retw
    151 
    152 
    153 	.section .fixup, "ax"
    154 	.align	4
    155 
    156 /* We return zero if a failure occurred. */
    157 
    158 memset_fixup:
    159 	movi	a2, 0
    160 	retw
    161