Home | History | Annotate | Download | only in runtime
      1 // Inferno's libkern/memmove-arm.s
      2 // https://bitbucket.org/inferno-os/inferno-os/src/default/libkern/memmove-arm.s
      3 //
      4 //         Copyright  1994-1999 Lucent Technologies Inc. All rights reserved.
      5 //         Revisions Copyright  2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
      6 //         Portions Copyright 2009 The Go Authors. All rights reserved.
      7 //
      8 // Permission is hereby granted, free of charge, to any person obtaining a copy
      9 // of this software and associated documentation files (the "Software"), to deal
     10 // in the Software without restriction, including without limitation the rights
     11 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     12 // copies of the Software, and to permit persons to whom the Software is
     13 // furnished to do so, subject to the following conditions:
     14 //
     15 // The above copyright notice and this permission notice shall be included in
     16 // all copies or substantial portions of the Software.
     17 //
     18 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     19 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     20 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
     21 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     22 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     23 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
     24 // THE SOFTWARE.
     25 
     26 #include "textflag.h"
     27 
     28 // TE or TS are spilled to the stack during bulk register moves.
     29 #define TS	R0
     30 #define TE	R8
     31 
     32 // Warning: the linker will use R11 to synthesize certain instructions. Please
     33 // take care and double check with objdump.
     34 #define FROM	R11
     35 #define N	R12
     36 #define TMP	R12				/* N and TMP don't overlap */
     37 #define TMP1	R5
     38 
     39 #define RSHIFT	R5
     40 #define LSHIFT	R6
     41 #define OFFSET	R7
     42 
     43 #define BR0	R0					/* shared with TS */
     44 #define BW0	R1
     45 #define BR1	R1
     46 #define BW1	R2
     47 #define BR2	R2
     48 #define BW2	R3
     49 #define BR3	R3
     50 #define BW3	R4
     51 
     52 #define FW0	R1
     53 #define FR0	R2
     54 #define FW1	R2
     55 #define FR1	R3
     56 #define FW2	R3
     57 #define FR2	R4
     58 #define FW3	R4
     59 #define FR3	R8					/* shared with TE */
     60 
     61 TEXT runtimememmove(SB), NOSPLIT, $4-12
     62 _memmove:
     63 	MOVW	to+0(FP), TS
     64 	MOVW	from+4(FP), FROM
     65 	MOVW	n+8(FP), N
     66 
     67 	ADD	N, TS, TE	/* to end pointer */
     68 
     69 	CMP	FROM, TS
     70 	BLS	_forward
     71 
     72 _back:
     73 	ADD	N, FROM		/* from end pointer */
     74 	CMP	$4, N		/* need at least 4 bytes to copy */
     75 	BLT	_b1tail
     76 
     77 _b4align:				/* align destination on 4 */
     78 	AND.S	$3, TE, TMP
     79 	BEQ	_b4aligned
     80 
     81 	MOVBU.W	-1(FROM), TMP	/* pre-indexed */
     82 	MOVBU.W	TMP, -1(TE)	/* pre-indexed */
     83 	B	_b4align
     84 
     85 _b4aligned:				/* is source now aligned? */
     86 	AND.S	$3, FROM, TMP
     87 	BNE	_bunaligned
     88 
     89 	ADD	$31, TS, TMP	/* do 32-byte chunks if possible */
     90 	MOVW	TS, savedts-4(SP)
     91 _b32loop:
     92 	CMP	TMP, TE
     93 	BLS	_b4tail
     94 
     95 	MOVM.DB.W (FROM), [R0-R7]
     96 	MOVM.DB.W [R0-R7], (TE)
     97 	B	_b32loop
     98 
     99 _b4tail:				/* do remaining words if possible */
    100 	MOVW	savedts-4(SP), TS
    101 	ADD	$3, TS, TMP
    102 _b4loop:
    103 	CMP	TMP, TE
    104 	BLS	_b1tail
    105 
    106 	MOVW.W	-4(FROM), TMP1	/* pre-indexed */
    107 	MOVW.W	TMP1, -4(TE)	/* pre-indexed */
    108 	B	_b4loop
    109 
    110 _b1tail:				/* remaining bytes */
    111 	CMP	TE, TS
    112 	BEQ	_return
    113 
    114 	MOVBU.W	-1(FROM), TMP	/* pre-indexed */
    115 	MOVBU.W	TMP, -1(TE)	/* pre-indexed */
    116 	B	_b1tail
    117 
    118 _forward:
    119 	CMP	$4, N		/* need at least 4 bytes to copy */
    120 	BLT	_f1tail
    121 
    122 _f4align:				/* align destination on 4 */
    123 	AND.S	$3, TS, TMP
    124 	BEQ	_f4aligned
    125 
    126 	MOVBU.P	1(FROM), TMP	/* implicit write back */
    127 	MOVBU.P	TMP, 1(TS)	/* implicit write back */
    128 	B	_f4align
    129 
    130 _f4aligned:				/* is source now aligned? */
    131 	AND.S	$3, FROM, TMP
    132 	BNE	_funaligned
    133 
    134 	SUB	$31, TE, TMP	/* do 32-byte chunks if possible */
    135 	MOVW	TE, savedte-4(SP)
    136 _f32loop:
    137 	CMP	TMP, TS
    138 	BHS	_f4tail
    139 
    140 	MOVM.IA.W (FROM), [R1-R8]
    141 	MOVM.IA.W [R1-R8], (TS)
    142 	B	_f32loop
    143 
    144 _f4tail:
    145 	MOVW	savedte-4(SP), TE
    146 	SUB	$3, TE, TMP	/* do remaining words if possible */
    147 _f4loop:
    148 	CMP	TMP, TS
    149 	BHS	_f1tail
    150 
    151 	MOVW.P	4(FROM), TMP1	/* implicit write back */
    152 	MOVW.P	TMP1, 4(TS)	/* implicit write back */
    153 	B	_f4loop
    154 
    155 _f1tail:
    156 	CMP	TS, TE
    157 	BEQ	_return
    158 
    159 	MOVBU.P	1(FROM), TMP	/* implicit write back */
    160 	MOVBU.P	TMP, 1(TS)	/* implicit write back */
    161 	B	_f1tail
    162 
    163 _return:
    164 	MOVW	to+0(FP), R0
    165 	RET
    166 
    167 _bunaligned:
    168 	CMP	$2, TMP		/* is TMP < 2 ? */
    169 
    170 	MOVW.LT	$8, RSHIFT		/* (R(n)<<24)|(R(n-1)>>8) */
    171 	MOVW.LT	$24, LSHIFT
    172 	MOVW.LT	$1, OFFSET
    173 
    174 	MOVW.EQ	$16, RSHIFT		/* (R(n)<<16)|(R(n-1)>>16) */
    175 	MOVW.EQ	$16, LSHIFT
    176 	MOVW.EQ	$2, OFFSET
    177 
    178 	MOVW.GT	$24, RSHIFT		/* (R(n)<<8)|(R(n-1)>>24) */
    179 	MOVW.GT	$8, LSHIFT
    180 	MOVW.GT	$3, OFFSET
    181 
    182 	ADD	$16, TS, TMP	/* do 16-byte chunks if possible */
    183 	CMP	TMP, TE
    184 	BLS	_b1tail
    185 
    186 	BIC	$3, FROM		/* align source */
    187 	MOVW	TS, savedts-4(SP)
    188 	MOVW	(FROM), BR0	/* prime first block register */
    189 
    190 _bu16loop:
    191 	CMP	TMP, TE
    192 	BLS	_bu1tail
    193 
    194 	MOVW	BR0<<LSHIFT, BW3
    195 	MOVM.DB.W (FROM), [BR0-BR3]
    196 	ORR	BR3>>RSHIFT, BW3
    197 
    198 	MOVW	BR3<<LSHIFT, BW2
    199 	ORR	BR2>>RSHIFT, BW2
    200 
    201 	MOVW	BR2<<LSHIFT, BW1
    202 	ORR	BR1>>RSHIFT, BW1
    203 
    204 	MOVW	BR1<<LSHIFT, BW0
    205 	ORR	BR0>>RSHIFT, BW0
    206 
    207 	MOVM.DB.W [BW0-BW3], (TE)
    208 	B	_bu16loop
    209 
    210 _bu1tail:
    211 	MOVW	savedts-4(SP), TS
    212 	ADD	OFFSET, FROM
    213 	B	_b1tail
    214 
    215 _funaligned:
    216 	CMP	$2, TMP
    217 
    218 	MOVW.LT	$8, RSHIFT		/* (R(n+1)<<24)|(R(n)>>8) */
    219 	MOVW.LT	$24, LSHIFT
    220 	MOVW.LT	$3, OFFSET
    221 
    222 	MOVW.EQ	$16, RSHIFT		/* (R(n+1)<<16)|(R(n)>>16) */
    223 	MOVW.EQ	$16, LSHIFT
    224 	MOVW.EQ	$2, OFFSET
    225 
    226 	MOVW.GT	$24, RSHIFT		/* (R(n+1)<<8)|(R(n)>>24) */
    227 	MOVW.GT	$8, LSHIFT
    228 	MOVW.GT	$1, OFFSET
    229 
    230 	SUB	$16, TE, TMP	/* do 16-byte chunks if possible */
    231 	CMP	TMP, TS
    232 	BHS	_f1tail
    233 
    234 	BIC	$3, FROM		/* align source */
    235 	MOVW	TE, savedte-4(SP)
    236 	MOVW.P	4(FROM), FR3	/* prime last block register, implicit write back */
    237 
    238 _fu16loop:
    239 	CMP	TMP, TS
    240 	BHS	_fu1tail
    241 
    242 	MOVW	FR3>>RSHIFT, FW0
    243 	MOVM.IA.W (FROM), [FR0,FR1,FR2,FR3]
    244 	ORR	FR0<<LSHIFT, FW0
    245 
    246 	MOVW	FR0>>RSHIFT, FW1
    247 	ORR	FR1<<LSHIFT, FW1
    248 
    249 	MOVW	FR1>>RSHIFT, FW2
    250 	ORR	FR2<<LSHIFT, FW2
    251 
    252 	MOVW	FR2>>RSHIFT, FW3
    253 	ORR	FR3<<LSHIFT, FW3
    254 
    255 	MOVM.IA.W [FW0,FW1,FW2,FW3], (TS)
    256 	B	_fu16loop
    257 
    258 _fu1tail:
    259 	MOVW	savedte-4(SP), TE
    260 	SUB	OFFSET, FROM
    261 	B	_f1tail
    262