Home | History | Annotate | Download | only in lib
      1 !   Copyright (C) 2008-2012 Imagination Technologies Ltd.
      2 
      3 	.text
      4 	.global	_memcpy
      5 	.type	_memcpy,function
      6 ! D1Ar1 dst
      7 ! D0Ar2 src
      8 ! D1Ar3 cnt
      9 ! D0Re0 dst
     10 _memcpy:
     11 	CMP 	D1Ar3, #16
     12 	MOV 	A1.2, D0Ar2		! source pointer
     13 	MOV 	A0.2, D1Ar1		! destination pointer
     14 	MOV 	A0.3, D1Ar1		! for return value
     15 ! If there are less than 16 bytes to copy use the byte copy loop
     16 	BGE 	$Llong_copy
     17 
     18 $Lbyte_copy:
     19 ! Simply copy a byte at a time
     20 	SUBS	TXRPT, D1Ar3, #1
     21 	BLT	$Lend
     22 $Lloop_byte:
     23 	GETB 	D1Re0, [A1.2++]
     24 	SETB 	[A0.2++], D1Re0
     25 	BR	$Lloop_byte
     26 
     27 $Lend:
     28 ! Finally set return value and return
     29 	MOV 	D0Re0, A0.3
     30 	MOV 	PC, D1RtP
     31 
     32 $Llong_copy:
     33 	ANDS 	D1Ar5, D1Ar1, #7	! test destination alignment
     34 	BZ	$Laligned_dst
     35 
     36 ! The destination address is not 8 byte aligned. We will copy bytes from
     37 ! the source to the destination until the remaining data has an 8 byte
     38 ! destination address alignment (i.e we should never copy more than 7
     39 ! bytes here).
     40 $Lalign_dst:
     41 	GETB 	D0Re0, [A1.2++]
     42 	ADD 	D1Ar5, D1Ar5, #1	! dest is aligned when D1Ar5 reaches #8
     43 	SUB 	D1Ar3, D1Ar3, #1	! decrement count of remaining bytes
     44 	SETB 	[A0.2++], D0Re0
     45 	CMP 	D1Ar5, #8
     46 	BNE 	$Lalign_dst
     47 
     48 ! We have at least (16 - 7) = 9 bytes to copy - calculate the number of 8 byte
     49 ! blocks, then jump to the unaligned copy loop or fall through to the aligned
     50 ! copy loop as appropriate.
     51 $Laligned_dst:
     52 	MOV	D0Ar4, A1.2
     53 	LSR 	D1Ar5, D1Ar3, #3	! D1Ar5 = number of 8 byte blocks
     54 	ANDS 	D0Ar4, D0Ar4, #7	! test source alignment
     55 	BNZ 	$Lunaligned_copy	! if unaligned, use unaligned copy loop
     56 
     57 ! Both source and destination are 8 byte aligned - the easy case.
     58 $Laligned_copy:
     59 	LSRS	D1Ar5, D1Ar3, #5	! D1Ar5 = number of 32 byte blocks
     60 	BZ	$Lbyte_copy
     61 	SUB	TXRPT, D1Ar5, #1
     62 
     63 $Laligned_32:
     64 	GETL 	D0Re0, D1Re0, [A1.2++]
     65 	GETL 	D0Ar6, D1Ar5, [A1.2++]
     66 	SETL 	[A0.2++], D0Re0, D1Re0
     67 	SETL 	[A0.2++], D0Ar6, D1Ar5
     68 	GETL 	D0Re0, D1Re0, [A1.2++]
     69 	GETL 	D0Ar6, D1Ar5, [A1.2++]
     70 	SETL 	[A0.2++], D0Re0, D1Re0
     71 	SETL 	[A0.2++], D0Ar6, D1Ar5
     72 	BR	$Laligned_32
     73 
     74 ! If there are any remaining bytes use the byte copy loop, otherwise we are done
     75 	ANDS 	D1Ar3, D1Ar3, #0x1f
     76 	BNZ	$Lbyte_copy
     77 	B	$Lend
     78 
     79 ! The destination is 8 byte aligned but the source is not, and there are 8
     80 ! or more bytes to be copied.
     81 $Lunaligned_copy:
     82 ! Adjust the source pointer (A1.2) to the 8 byte boundary before its
     83 ! current value
     84 	MOV 	D0Ar4, A1.2
     85 	MOV 	D0Ar6, A1.2
     86 	ANDMB 	D0Ar4, D0Ar4, #0xfff8
     87 	MOV 	A1.2, D0Ar4
     88 ! Save the number of bytes of mis-alignment in D0Ar4 for use later
     89 	SUBS 	D0Ar6, D0Ar6, D0Ar4
     90 	MOV	D0Ar4, D0Ar6
     91 ! if there is no mis-alignment after all, use the aligned copy loop
     92 	BZ 	$Laligned_copy
     93 
     94 ! prefetch 8 bytes
     95 	GETL 	D0Re0, D1Re0, [A1.2]
     96 
     97 	SUB	TXRPT, D1Ar5, #1
     98 
     99 ! There are 3 mis-alignment cases to be considered. Less than 4 bytes, exactly
    100 ! 4 bytes, and more than 4 bytes.
    101 	CMP 	D0Ar6, #4
    102 	BLT 	$Lunaligned_1_2_3	! use 1-3 byte mis-alignment loop
    103 	BZ 	$Lunaligned_4		! use 4 byte mis-alignment loop
    104 
    105 ! The mis-alignment is more than 4 bytes
    106 $Lunaligned_5_6_7:
    107 	SUB 	D0Ar6, D0Ar6, #4
    108 ! Calculate the bit offsets required for the shift operations necesssary
    109 ! to align the data.
    110 ! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset)
    111 	MULW 	D0Ar6, D0Ar6, #8
    112 	MOV	D1Ar5, #32
    113 	SUB	D1Ar5, D1Ar5, D0Ar6
    114 ! Move data 4 bytes before we enter the main loop
    115 	MOV 	D0Re0, D1Re0
    116 
    117 $Lloop_5_6_7:
    118 	GETL 	D0Ar2, D1Ar1, [++A1.2]
    119 ! form 64-bit data in D0Re0, D1Re0
    120 	LSR 	D0Re0, D0Re0, D0Ar6
    121 	MOV 	D1Re0, D0Ar2
    122 	LSL 	D1Re0, D1Re0, D1Ar5
    123 	ADD 	D0Re0, D0Re0, D1Re0
    124 
    125 	LSR 	D0Ar2, D0Ar2, D0Ar6
    126 	LSL 	D1Re0, D1Ar1, D1Ar5
    127 	ADD 	D1Re0, D1Re0, D0Ar2
    128 
    129 	SETL 	[A0.2++], D0Re0, D1Re0
    130 	MOV 	D0Re0, D1Ar1
    131 	BR	$Lloop_5_6_7
    132 
    133 	B 	$Lunaligned_end
    134 
    135 $Lunaligned_1_2_3:
    136 ! Calculate the bit offsets required for the shift operations necesssary
    137 ! to align the data.
    138 ! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset)
    139 	MULW 	D0Ar6, D0Ar6, #8
    140 	MOV	D1Ar5, #32
    141 	SUB	D1Ar5, D1Ar5, D0Ar6
    142 
    143 $Lloop_1_2_3:
    144 ! form 64-bit data in D0Re0,D1Re0
    145 	LSR 	D0Re0, D0Re0, D0Ar6
    146 	LSL 	D1Ar1, D1Re0, D1Ar5
    147 	ADD 	D0Re0, D0Re0, D1Ar1
    148 	MOV	D0Ar2, D1Re0
    149 	LSR 	D0FrT, D0Ar2, D0Ar6
    150 	GETL 	D0Ar2, D1Ar1, [++A1.2]
    151 
    152 	MOV 	D1Re0, D0Ar2
    153 	LSL 	D1Re0, D1Re0, D1Ar5
    154 	ADD 	D1Re0, D1Re0, D0FrT
    155 
    156 	SETL 	[A0.2++], D0Re0, D1Re0
    157 	MOV 	D0Re0, D0Ar2
    158 	MOV 	D1Re0, D1Ar1
    159 	BR	$Lloop_1_2_3
    160 
    161 	B 	$Lunaligned_end
    162 
    163 ! The 4 byte mis-alignment case - this does not require any shifting, just a
    164 ! shuffling of registers.
    165 $Lunaligned_4:
    166 	MOV 	D0Re0, D1Re0
    167 $Lloop_4:
    168 	GETL 	D0Ar2, D1Ar1, [++A1.2]
    169 	MOV 	D1Re0, D0Ar2
    170 	SETL 	[A0.2++], D0Re0, D1Re0
    171 	MOV 	D0Re0, D1Ar1
    172 	BR	$Lloop_4
    173 
    174 $Lunaligned_end:
    175 ! If there are no remaining bytes to copy, we are done.
    176 	ANDS 	D1Ar3, D1Ar3, #7
    177 	BZ	$Lend
    178 ! Re-adjust the source pointer (A1.2) back to the actual (unaligned) byte
    179 ! address of the remaining bytes, and fall through to the byte copy loop.
    180 	MOV 	D0Ar6, A1.2
    181 	ADD 	D1Ar5, D0Ar4, D0Ar6
    182 	MOV 	A1.2, D1Ar5
    183 	B	$Lbyte_copy
    184 
    185 	.size _memcpy,.-_memcpy
    186