Home | History | Annotate | Download | only in sparc
      1 /*
      2  * Clip testing in SPARC assembly
      3  */
      4 
      5 #if __arch64__
      6 #define LDPTR		ldx
      7 #define V4F_DATA	0x00
      8 #define V4F_START	0x08
      9 #define V4F_COUNT	0x10
     10 #define V4F_STRIDE	0x14
     11 #define V4F_SIZE	0x18
     12 #define V4F_FLAGS	0x1c
     13 #else
     14 #define LDPTR		ld
     15 #define V4F_DATA	0x00
     16 #define V4F_START	0x04
     17 #define V4F_COUNT	0x08
     18 #define V4F_STRIDE	0x0c
     19 #define V4F_SIZE	0x10
     20 #define V4F_FLAGS	0x14
     21 #endif
     22 
     23 #define VEC_SIZE_1   	1
     24 #define VEC_SIZE_2   	3
     25 #define VEC_SIZE_3   	7
     26 #define VEC_SIZE_4   	15
     27 
     28         .register %g2, #scratch
     29         .register %g3, #scratch
     30 
     31 	.text
     32 	.align		64
     33 
     34 one_dot_zero:
     35 	.word		0x3f800000	/* 1.0f */
     36 
     37 	/* This trick is shamelessly stolen from the x86
     38 	 * Mesa asm.  Very clever, and we can do it too
     39 	 * since we have the necessary add with carry
     40 	 * instructions on Sparc.
     41 	 */
     42 clip_table:
     43 	.byte	 0,  1,  0,  2,  4,  5,  4,  6
     44 	.byte	 0,  1,  0,  2,  8,  9,  8, 10
     45 	.byte	32, 33, 32, 34, 36, 37, 36, 38
     46 	.byte	32, 33, 32, 34, 40, 41, 40, 42
     47 	.byte	 0,  1,  0,  2,  4,  5,  4,  6
     48 	.byte	 0,  1,  0,  2,  8,  9,  8, 10
     49 	.byte	16, 17, 16, 18, 20, 21, 20, 22
     50 	.byte	16, 17, 16, 18, 24, 25, 24, 26
     51 	.byte	63, 61, 63, 62, 55, 53, 55, 54
     52 	.byte	63, 61, 63, 62, 59, 57, 59, 58
     53 	.byte	47, 45, 47, 46, 39, 37, 39, 38
     54 	.byte	47, 45, 47, 46, 43, 41, 43, 42
     55 	.byte	63, 61, 63, 62, 55, 53, 55, 54
     56 	.byte	63, 61, 63, 62, 59, 57, 59, 58
     57 	.byte	31, 29, 31, 30, 23, 21, 23, 22
     58 	.byte	31, 29, 31, 30, 27, 25, 27, 26
     59 
     60 /* GLvector4f *clip_vec, GLvector4f *proj_vec,
     61    GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask,
     62    GLboolean viewport_z_enable */
     63 
     64 	.align		64
     65 __pc_tramp:
     66 	retl
     67 	 nop
     68 
     69 	.globl		_mesa_sparc_cliptest_points4
     70 _mesa_sparc_cliptest_points4:
     71 	save		%sp, -64, %sp
     72 	call		__pc_tramp
     73 	 sub		%o7, (. - one_dot_zero - 4), %g1
     74 	ld		[%g1 + 0x0], %f4
     75 	add		%g1, 0x4, %g1
     76 
     77 	ld		[%i0 + V4F_STRIDE], %l1
     78 	ld		[%i0 + V4F_COUNT], %l3
     79 	LDPTR		[%i0 + V4F_START], %i0
     80 	LDPTR		[%i1 + V4F_START], %i5
     81 	ldub		[%i3], %g2
     82 	ldub		[%i4], %g3
     83 	sll		%g3, 8, %g3
     84 	or		%g2, %g3, %g2
     85 
     86 	ld		[%i1 + V4F_FLAGS], %g3
     87 	or		%g3, VEC_SIZE_4, %g3
     88 	st		%g3, [%i1 + V4F_FLAGS]
     89 	mov		3, %g3
     90 	st		%g3, [%i1 + V4F_SIZE]
     91 	st		%l3, [%i1 + V4F_COUNT]
     92 	clr		%l2
     93 	clr		%l0
     94 
     95 	/* l0:	i
     96 	 * l3:	count
     97 	 * l1:	stride
     98 	 * l2:	c
     99 	 * g2:	(tmpAndMask << 8) | tmpOrMask
    100 	 * g1:	clip_table
    101 	 * i0:	from[stride][i]
    102 	 * i2:	clipMask
    103 	 * i5:	vProj[4][i]
    104 	 */
    105 
    106 1:	ld		[%i0 + 0x0c], %f3	! LSU	Group
    107 	ld		[%i0 + 0x0c], %g5	! LSU	Group
    108 	ld		[%i0 + 0x08], %g4	! LSU	Group
    109 	fdivs		%f4, %f3, %f8		! FGM
    110 	addcc		%g5, %g5, %g5		! IEU1	Group
    111 	addx		%g0, 0x0, %g3		! IEU1	Group
    112 	addcc		%g4, %g4, %g4		! IEU1	Group
    113 	addx		%g3, %g3, %g3		! IEU1	Group
    114 	subcc		%g5, %g4, %g0		! IEU1	Group
    115 	ld		[%i0 + 0x04], %g4	! LSU	Group
    116 	addx		%g3, %g3, %g3		! IEU1	Group
    117 	addcc		%g4, %g4, %g4		! IEU1	Group
    118 	addx		%g3, %g3, %g3		! IEU1	Group
    119 	subcc		%g5, %g4, %g0		! IEU1	Group
    120 	ld		[%i0 + 0x00], %g4	! LSU	Group
    121 	addx		%g3, %g3, %g3		! IEU1	Group
    122 	addcc		%g4, %g4, %g4		! IEU1	Group
    123 	addx		%g3, %g3, %g3		! IEU1	Group
    124 	subcc		%g5, %g4, %g0		! IEU1	Group
    125 	addx		%g3, %g3, %g3		! IEU1	Group
    126 	ldub		[%g1 + %g3], %g3	! LSU	Group
    127 	cmp		%g3, 0			! IEU1	Group, stall
    128 	be		2f			! CTI
    129 	 stb		%g3, [%i2]		! LSU
    130 	sll		%g3, 8, %g4		! IEU1	Group
    131 	add		%l2, 1, %l2		! IEU0
    132 	st		%g0, [%i5 + 0x00]	! LSU
    133 	or		%g4, 0xff, %g4		! IEU0	Group
    134 	or		%g2, %g3, %g2		! IEU1
    135 	st		%g0, [%i5 + 0x04]	! LSU
    136 	and		%g2, %g4, %g2		! IEU0	Group
    137 	st		%g0, [%i5 + 0x08]	! LSU
    138 	b		3f			! CTI
    139 	 st		%f4, [%i5 + 0x0c]	! LSU	Group
    140 2:	ld		[%i0 + 0x00], %f0	! LSU	Group
    141 	ld		[%i0 + 0x04], %f1	! LSU	Group
    142 	ld		[%i0 + 0x08], %f2	! LSU	Group
    143 	fmuls		%f0, %f8, %f0		! FGM
    144 	st		%f0, [%i5 + 0x00]	! LSU	Group
    145 	fmuls		%f1, %f8, %f1		! FGM
    146 	st		%f1, [%i5 + 0x04]	! LSU	Group
    147 	fmuls		%f2, %f8, %f2		! FGM
    148 	st		%f2, [%i5 + 0x08]	! LSU	Group
    149 	st		%f8, [%i5 + 0x0c]	! LSU	Group
    150 3:	add		%i5, 0x10, %i5		! IEU1
    151 	add		%l0, 1, %l0		! IEU0	Group
    152 	add		%i2, 1, %i2		! IEU0	Group
    153 	cmp		%l0, %l3		! IEU1	Group
    154 	bne		1b			! CTI
    155 	 add		%i0, %l1, %i0		! IEU0	Group
    156 	stb		%g2, [%i3]		! LSU
    157 	srl		%g2, 8, %g3		! IEU0	Group
    158 	cmp		%l2, %l3		! IEU1	Group
    159 	bl,a		1f			! CTI
    160 	 clr		%g3			! IEU0
    161 1:	stb		%g3, [%i4]		! LSU	Group
    162 	ret					! CTI	Group
    163 	 restore	%i1, 0x0, %o0
    164 
    165 	.globl		_mesa_sparc_cliptest_points4_np
    166 _mesa_sparc_cliptest_points4_np:
    167 	save		%sp, -64, %sp
    168 
    169 	call		__pc_tramp
    170 	 sub		%o7, (. - one_dot_zero - 4), %g1
    171 	add		%g1, 0x4, %g1
    172 
    173 	ld		[%i0 + V4F_STRIDE], %l1
    174 	ld		[%i0 + V4F_COUNT], %l3
    175 	LDPTR		[%i0 + V4F_START], %i0
    176 	ldub		[%i3], %g2
    177 	ldub		[%i4], %g3
    178 	sll		%g3, 8, %g3
    179 	or		%g2, %g3, %g2
    180 
    181 	clr		%l2
    182 	clr		%l0
    183 
    184 	/* l0:	i
    185 	 * l3:	count
    186 	 * l1:	stride
    187 	 * l2:	c
    188 	 * g2:	(tmpAndMask << 8) | tmpOrMask
    189 	 * g1:	clip_table
    190 	 * i0:	from[stride][i]
    191 	 * i2:	clipMask
    192 	 */
    193 
    194 1:	ld		[%i0 + 0x0c], %g5	! LSU	Group
    195 	ld		[%i0 + 0x08], %g4	! LSU	Group
    196 	addcc		%g5, %g5, %g5		! IEU1	Group
    197 	addx		%g0, 0x0, %g3		! IEU1	Group
    198 	addcc		%g4, %g4, %g4		! IEU1	Group
    199 	addx		%g3, %g3, %g3		! IEU1	Group
    200 	subcc		%g5, %g4, %g0		! IEU1	Group
    201 	ld		[%i0 + 0x04], %g4	! LSU	Group
    202 	addx		%g3, %g3, %g3		! IEU1	Group
    203 	addcc		%g4, %g4, %g4		! IEU1	Group
    204 	addx		%g3, %g3, %g3		! IEU1	Group
    205 	subcc		%g5, %g4, %g0		! IEU1	Group
    206 	ld		[%i0 + 0x00], %g4	! LSU	Group
    207 	addx		%g3, %g3, %g3		! IEU1	Group
    208 	addcc		%g4, %g4, %g4		! IEU1	Group
    209 	addx		%g3, %g3, %g3		! IEU1	Group
    210 	subcc		%g5, %g4, %g0		! IEU1	Group
    211 	addx		%g3, %g3, %g3		! IEU1	Group
    212 	ldub		[%g1 + %g3], %g3	! LSU	Group
    213 	cmp		%g3, 0			! IEU1	Group, stall
    214 	be		2f			! CTI
    215 	 stb		%g3, [%i2]		! LSU
    216 	sll		%g3, 8, %g4		! IEU1	Group
    217 	add		%l2, 1, %l2		! IEU0
    218 	or		%g4, 0xff, %g4		! IEU0	Group
    219 	or		%g2, %g3, %g2		! IEU1
    220 	and		%g2, %g4, %g2		! IEU0	Group
    221 2:	add		%l0, 1, %l0		! IEU0	Group
    222 	add		%i2, 1, %i2		! IEU0	Group
    223 	cmp		%l0, %l3		! IEU1	Group
    224 	bne		1b			! CTI
    225 	 add		%i0, %l1, %i0		! IEU0	Group
    226 	stb		%g2, [%i3]		! LSU
    227 	srl		%g2, 8, %g3		! IEU0	Group
    228 	cmp		%l2, %l3		! IEU1	Group
    229 	bl,a		1f			! CTI
    230 	 clr		%g3			! IEU0
    231 1:	stb		%g3, [%i4]		! LSU	Group
    232 	ret					! CTI	Group
    233 	 restore	%i1, 0x0, %o0
    234