Home | History | Annotate | Download | only in lib
      1 /*
      2  * Copyright (C) 2002 Paul Mackerras, IBM Corp.
      3  *
      4  * This program is free software; you can redistribute it and/or
      5  * modify it under the terms of the GNU General Public License
      6  * as published by the Free Software Foundation; either version
      7  * 2 of the License, or (at your option) any later version.
      8  */
      9 #include <asm/processor.h>
     10 #include <asm/ppc_asm.h>
     11 
     12 	.align	7
     13 _GLOBAL(memcpy)
     14 BEGIN_FTR_SECTION
     15 	std	r3,48(r1)	/* save destination pointer for return value */
     16 FTR_SECTION_ELSE
     17 	b	memcpy_power7
     18 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
     19 	PPC_MTOCRF(0x01,r5)
     20 	cmpldi	cr1,r5,16
     21 	neg	r6,r3		# LS 3 bits = # bytes to 8-byte dest bdry
     22 	andi.	r6,r6,7
     23 	dcbt	0,r4
     24 	blt	cr1,.Lshort_copy
     25 /* Below we want to nop out the bne if we're on a CPU that has the
     26    CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
     27    cleared.
     28    At the time of writing the only CPU that has this combination of bits
     29    set is Power6. */
     30 BEGIN_FTR_SECTION
     31 	nop
     32 FTR_SECTION_ELSE
     33 	bne	.Ldst_unaligned
     34 ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
     35                     CPU_FTR_UNALIGNED_LD_STD)
     36 .Ldst_aligned:
     37 	addi	r3,r3,-16
     38 BEGIN_FTR_SECTION
     39 	andi.	r0,r4,7
     40 	bne	.Lsrc_unaligned
     41 END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
     42 	srdi	r7,r5,4
     43 	ld	r9,0(r4)
     44 	addi	r4,r4,-8
     45 	mtctr	r7
     46 	andi.	r5,r5,7
     47 	bf	cr7*4+0,2f
     48 	addi	r3,r3,8
     49 	addi	r4,r4,8
     50 	mr	r8,r9
     51 	blt	cr1,3f
     52 1:	ld	r9,8(r4)
     53 	std	r8,8(r3)
     54 2:	ldu	r8,16(r4)
     55 	stdu	r9,16(r3)
     56 	bdnz	1b
     57 3:	std	r8,8(r3)
     58 	beq	3f
     59 	addi	r3,r3,16
     60 .Ldo_tail:
     61 	bf	cr7*4+1,1f
     62 	lwz	r9,8(r4)
     63 	addi	r4,r4,4
     64 	stw	r9,0(r3)
     65 	addi	r3,r3,4
     66 1:	bf	cr7*4+2,2f
     67 	lhz	r9,8(r4)
     68 	addi	r4,r4,2
     69 	sth	r9,0(r3)
     70 	addi	r3,r3,2
     71 2:	bf	cr7*4+3,3f
     72 	lbz	r9,8(r4)
     73 	stb	r9,0(r3)
     74 3:	ld	r3,48(r1)	/* return dest pointer */
     75 	blr
     76 
     77 .Lsrc_unaligned:
     78 	srdi	r6,r5,3
     79 	addi	r5,r5,-16
     80 	subf	r4,r0,r4
     81 	srdi	r7,r5,4
     82 	sldi	r10,r0,3
     83 	cmpdi	cr6,r6,3
     84 	andi.	r5,r5,7
     85 	mtctr	r7
     86 	subfic	r11,r10,64
     87 	add	r5,r5,r0
     88 
     89 	bt	cr7*4+0,0f
     90 
     91 	ld	r9,0(r4)	# 3+2n loads, 2+2n stores
     92 	ld	r0,8(r4)
     93 	sld	r6,r9,r10
     94 	ldu	r9,16(r4)
     95 	srd	r7,r0,r11
     96 	sld	r8,r0,r10
     97 	or	r7,r7,r6
     98 	blt	cr6,4f
     99 	ld	r0,8(r4)
    100 	# s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
    101 	b	2f
    102 
    103 0:	ld	r0,0(r4)	# 4+2n loads, 3+2n stores
    104 	ldu	r9,8(r4)
    105 	sld	r8,r0,r10
    106 	addi	r3,r3,-8
    107 	blt	cr6,5f
    108 	ld	r0,8(r4)
    109 	srd	r12,r9,r11
    110 	sld	r6,r9,r10
    111 	ldu	r9,16(r4)
    112 	or	r12,r8,r12
    113 	srd	r7,r0,r11
    114 	sld	r8,r0,r10
    115 	addi	r3,r3,16
    116 	beq	cr6,3f
    117 
    118 	# d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
    119 1:	or	r7,r7,r6
    120 	ld	r0,8(r4)
    121 	std	r12,8(r3)
    122 2:	srd	r12,r9,r11
    123 	sld	r6,r9,r10
    124 	ldu	r9,16(r4)
    125 	or	r12,r8,r12
    126 	stdu	r7,16(r3)
    127 	srd	r7,r0,r11
    128 	sld	r8,r0,r10
    129 	bdnz	1b
    130 
    131 3:	std	r12,8(r3)
    132 	or	r7,r7,r6
    133 4:	std	r7,16(r3)
    134 5:	srd	r12,r9,r11
    135 	or	r12,r8,r12
    136 	std	r12,24(r3)
    137 	beq	4f
    138 	cmpwi	cr1,r5,8
    139 	addi	r3,r3,32
    140 	sld	r9,r9,r10
    141 	ble	cr1,6f
    142 	ld	r0,8(r4)
    143 	srd	r7,r0,r11
    144 	or	r9,r7,r9
    145 6:
    146 	bf	cr7*4+1,1f
    147 	rotldi	r9,r9,32
    148 	stw	r9,0(r3)
    149 	addi	r3,r3,4
    150 1:	bf	cr7*4+2,2f
    151 	rotldi	r9,r9,16
    152 	sth	r9,0(r3)
    153 	addi	r3,r3,2
    154 2:	bf	cr7*4+3,3f
    155 	rotldi	r9,r9,8
    156 	stb	r9,0(r3)
    157 3:	ld	r3,48(r1)	/* return dest pointer */
    158 	blr
    159 
    160 .Ldst_unaligned:
    161 	PPC_MTOCRF(0x01,r6)		# put #bytes to 8B bdry into cr7
    162 	subf	r5,r6,r5
    163 	li	r7,0
    164 	cmpldi	cr1,r5,16
    165 	bf	cr7*4+3,1f
    166 	lbz	r0,0(r4)
    167 	stb	r0,0(r3)
    168 	addi	r7,r7,1
    169 1:	bf	cr7*4+2,2f
    170 	lhzx	r0,r7,r4
    171 	sthx	r0,r7,r3
    172 	addi	r7,r7,2
    173 2:	bf	cr7*4+1,3f
    174 	lwzx	r0,r7,r4
    175 	stwx	r0,r7,r3
    176 3:	PPC_MTOCRF(0x01,r5)
    177 	add	r4,r6,r4
    178 	add	r3,r6,r3
    179 	b	.Ldst_aligned
    180 
    181 .Lshort_copy:
    182 	bf	cr7*4+0,1f
    183 	lwz	r0,0(r4)
    184 	lwz	r9,4(r4)
    185 	addi	r4,r4,8
    186 	stw	r0,0(r3)
    187 	stw	r9,4(r3)
    188 	addi	r3,r3,8
    189 1:	bf	cr7*4+1,2f
    190 	lwz	r0,0(r4)
    191 	addi	r4,r4,4
    192 	stw	r0,0(r3)
    193 	addi	r3,r3,4
    194 2:	bf	cr7*4+2,3f
    195 	lhz	r0,0(r4)
    196 	addi	r4,r4,2
    197 	sth	r0,0(r3)
    198 	addi	r3,r3,2
    199 3:	bf	cr7*4+3,4f
    200 	lbz	r0,0(r4)
    201 	stb	r0,0(r3)
    202 4:	ld	r3,48(r1)	/* return dest pointer */
    203 	blr
    204