Home | History | Annotate | Download | only in x86
      1 
      2 /*
      3  * Mesa 3-D graphics library
      4  *
      5  * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a
      8  * copy of this software and associated documentation files (the "Software"),
      9  * to deal in the Software without restriction, including without limitation
     10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     11  * and/or sell copies of the Software, and to permit persons to whom the
     12  * Software is furnished to do so, subject to the following conditions:
     13  *
     14  * The above copyright notice and this permission notice shall be included
     15  * in all copies or substantial portions of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
     21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     23  * OTHER DEALINGS IN THE SOFTWARE.
     24  */
     25 
     26 /*
     27  * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
     28  * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces
     29  * in there will break the build on some platforms.
     30  */
     31 
     32 #include "assyntax.h"
     33 #include "matypes.h"
     34 #include "clip_args.h"
     35 
     36 #define SRC0		REGOFF(0, ESI)
     37 #define SRC1		REGOFF(4, ESI)
     38 #define SRC2		REGOFF(8, ESI)
     39 #define SRC3		REGOFF(12, ESI)
     40 #define DST0		REGOFF(0, EDI)
     41 #define DST1		REGOFF(4, EDI)
     42 #define DST2		REGOFF(8, EDI)
     43 #define DST3		REGOFF(12, EDI)
     44 #define MAT0		REGOFF(0, EDX)
     45 #define MAT1		REGOFF(4, EDX)
     46 #define MAT2		REGOFF(8, EDX)
     47 #define MAT3		REGOFF(12, EDX)
     48 
     49 
     50 /*
     51  * Table for clip test.
     52  *
     53  * 	bit6 = SRC3 < 0
     54  * 	bit5 = SRC2 < 0
     55  * 	bit4 = abs(S(2)) > abs(S(3))
     56  * 	bit3 = SRC1 < 0
     57  * 	bit2 = abs(S(1)) > abs(S(3))
     58  * 	bit1 = SRC0 < 0
     59  * 	bit0 = abs(S(0)) > abs(S(3))
     60  */
     61 
     62 	SEG_DATA
     63 
     64 clip_table:
     65 	D_BYTE 0x00, 0x01, 0x00, 0x02, 0x04, 0x05, 0x04, 0x06
     66 	D_BYTE 0x00, 0x01, 0x00, 0x02, 0x08, 0x09, 0x08, 0x0a
     67 	D_BYTE 0x20, 0x21, 0x20, 0x22, 0x24, 0x25, 0x24, 0x26
     68 	D_BYTE 0x20, 0x21, 0x20, 0x22, 0x28, 0x29, 0x28, 0x2a
     69 	D_BYTE 0x00, 0x01, 0x00, 0x02, 0x04, 0x05, 0x04, 0x06
     70 	D_BYTE 0x00, 0x01, 0x00, 0x02, 0x08, 0x09, 0x08, 0x0a
     71 	D_BYTE 0x10, 0x11, 0x10, 0x12, 0x14, 0x15, 0x14, 0x16
     72 	D_BYTE 0x10, 0x11, 0x10, 0x12, 0x18, 0x19, 0x18, 0x1a
     73 	D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x37, 0x35, 0x37, 0x36
     74 	D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x3b, 0x39, 0x3b, 0x3a
     75 	D_BYTE 0x2f, 0x2d, 0x2f, 0x2e, 0x27, 0x25, 0x27, 0x26
     76 	D_BYTE 0x2f, 0x2d, 0x2f, 0x2e, 0x2b, 0x29, 0x2b, 0x2a
     77 	D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x37, 0x35, 0x37, 0x36
     78 	D_BYTE 0x3f, 0x3d, 0x3f, 0x3e, 0x3b, 0x39, 0x3b, 0x3a
     79 	D_BYTE 0x1f, 0x1d, 0x1f, 0x1e, 0x17, 0x15, 0x17, 0x16
     80 	D_BYTE 0x1f, 0x1d, 0x1f, 0x1e, 0x1b, 0x19, 0x1b, 0x1a
     81 
     82 
     83 	SEG_TEXT
     84 
     85 /*
     86  * _mesa_x86_cliptest_points4
     87  *
     88  *   AL:  ormask
     89  *   AH:  andmask
     90  *   EBX: temp0
     91  *   ECX: temp1
     92  *   EDX: clipmask[]
     93  *   ESI: clip[]
     94  *   EDI: proj[]
     95  *   EBP: temp2
     96  */
     97 
     98 #if defined(__ELF__) && defined(__PIC__) && defined(GNU_ASSEMBLER) && !defined(ELFPIC)
     99 #define ELFPIC
    100 #endif
    101 
    102 ALIGNTEXT16
    103 GLOBL GLNAME( _mesa_x86_cliptest_points4 )
    104 HIDDEN(_mesa_x86_cliptest_points4)
    105 GLNAME( _mesa_x86_cliptest_points4 ):
    106 
    107 #ifdef ELFPIC
    108 #define FRAME_OFFSET 20
    109 #else
    110 #define FRAME_OFFSET 16
    111 #endif
    112 	PUSH_L( ESI )
    113 	PUSH_L( EDI )
    114 	PUSH_L( EBP )
    115 	PUSH_L( EBX )
    116 
    117 #ifdef ELFPIC
    118 	/* store pointer to clip_table on stack */
    119 	CALL( LLBL(ctp4_get_eip) )
    120 	ADD_L( CONST(_GLOBAL_OFFSET_TABLE_), EBX )
    121 	MOV_L( REGOFF(clip_table@GOT, EBX), EBX )
    122 	PUSH_L( EBX )
    123 	JMP( LLBL(ctp4_clip_table_ready) )
    124 
    125 LLBL(ctp4_get_eip):
    126 	/* store eip in ebx */
    127 	MOV_L( REGIND(ESP), EBX )
    128 	RET
    129 
    130 LLBL(ctp4_clip_table_ready):
    131 #endif
    132 
    133 	MOV_L( ARG_SOURCE, ESI )
    134 	MOV_L( ARG_DEST, EDI )
    135 
    136 	MOV_L( ARG_CLIP, EDX )
    137 	MOV_L( ARG_OR, EBX )
    138 
    139 	MOV_L( ARG_AND, EBP )
    140 	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
    141 
    142 	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
    143 	MOV_L( REGOFF(V4F_START, ESI), ESI )
    144 
    145 	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
    146 	MOV_L( EAX, ARG_SOURCE )	/* put stride in ARG_SOURCE */
    147 
    148 	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
    149 	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
    150 
    151 	MOV_L( REGOFF(V4F_START, EDI), EDI )
    152 	ADD_L( EDX, ECX )
    153 
    154 	MOV_L( ECX, ARG_CLIP )		/* put clipmask + count in ARG_CLIP */
    155 	CMP_L( ECX, EDX )
    156 
    157 	MOV_B( REGIND(EBX), AL )
    158 	MOV_B( REGIND(EBP), AH )
    159 
    160 	JZ( LLBL(ctp4_finish) )
    161 
    162 ALIGNTEXT16
    163 LLBL(ctp4_top):
    164 
    165 	FLD1				/* F3 */
    166 	FDIV_S( SRC3 )		/* GH: don't care about div-by-zero */
    167 
    168 	MOV_L( SRC3, EBP )
    169 	MOV_L( SRC2, EBX )
    170 
    171 	XOR_L( ECX, ECX )
    172 	ADD_L( EBP, EBP )	/* ebp = abs(S(3))*2 ; carry = sign of S(3) */
    173 
    174 	ADC_L( ECX, ECX )
    175 	ADD_L( EBX, EBX )	/* ebx = abs(S(2))*2 ; carry = sign of S(2) */
    176 
    177 	ADC_L( ECX, ECX )
    178 	CMP_L( EBX, EBP )	/* carry = abs(S(2))*2 > abs(S(3))*2 */
    179 
    180 	ADC_L( ECX, ECX )
    181 	MOV_L( SRC1, EBX )
    182 
    183 	ADD_L( EBX, EBX )	/* ebx = abs(S(1))*2 ; carry = sign of S(1) */
    184 
    185 	ADC_L( ECX, ECX )
    186 	CMP_L( EBX, EBP )	/* carry = abs(S(1))*2 > abs(S(3))*2 */
    187 
    188 	ADC_L( ECX, ECX )
    189 	MOV_L( SRC0, EBX )
    190 
    191 	ADD_L( EBX, EBX )	/* ebx = abs(S(0))*2 ; carry = sign of S(0) */
    192 
    193 	ADC_L( ECX, ECX )
    194 	CMP_L( EBX, EBP )	/* carry = abs(S(0))*2 > abs(S(3))*2 */
    195 
    196 	ADC_L( ECX, ECX )
    197 
    198 #ifdef ELFPIC
    199 	MOV_L( REGIND(ESP), EBP )	/* clip_table */
    200 
    201 	MOV_B( REGBI(EBP, ECX), CL )
    202 #else
    203 	MOV_B( REGOFF(clip_table,ECX), CL )
    204 #endif
    205 
    206 	OR_B( CL, AL )
    207 	AND_B( CL, AH )
    208 
    209 	TEST_B( CL, CL )
    210 	MOV_B( CL, REGIND(EDX) )
    211 
    212 	JZ( LLBL(ctp4_proj) )
    213 
    214 LLBL(ctp4_noproj):
    215 
    216 	FSTP( ST(0) )			/* */
    217 
    218 	MOV_L( CONST(0), DST0 )
    219 	MOV_L( CONST(0), DST1 )
    220 	MOV_L( CONST(0), DST2 )
    221 	MOV_L( CONST(0x3f800000), DST3 )
    222 
    223 	JMP( LLBL(ctp4_next) )
    224 
    225 LLBL(ctp4_proj):
    226 
    227 	FLD_S( SRC0 )			/* F0 F3 */
    228 	FMUL2( ST(1), ST0 )
    229 
    230 	FLD_S( SRC1 )			/* F1 F0 F3 */
    231 	FMUL2( ST(2), ST0 )
    232 
    233 	FLD_S( SRC2 )			/* F2 F1 F0 F3 */
    234 	FMUL2( ST(3), ST0 )
    235 
    236 	FXCH( ST(2) )			/* F0 F1 F2 F3 */
    237 	FSTP_S( DST0 )		/* F1 F2 F3 */
    238 	FSTP_S( DST1 )		/* F2 F3 */
    239 	FSTP_S( DST2 )		/* F3 */
    240 	FSTP_S( DST3 )		/* */
    241 
    242 LLBL(ctp4_next):
    243 
    244 	INC_L( EDX )
    245 	ADD_L( CONST(16), EDI )
    246 
    247 	ADD_L( ARG_SOURCE, ESI )
    248 	CMP_L( EDX, ARG_CLIP )
    249 
    250 	JNZ( LLBL(ctp4_top) )
    251 
    252 	MOV_L( ARG_OR, ECX )
    253 	MOV_L( ARG_AND, EDX )
    254 
    255 	MOV_B( AL, REGIND(ECX) )
    256 	MOV_B( AH, REGIND(EDX) )
    257 
    258 LLBL(ctp4_finish):
    259 
    260 	MOV_L( ARG_DEST, EAX )
    261 #ifdef ELFPIC
    262 	POP_L( ESI )			/* discard ptr to clip_table */
    263 #endif
    264 	POP_L( EBX )
    265 	POP_L( EBP )
    266 	POP_L( EDI )
    267 	POP_L( ESI )
    268 
    269 	RET
    270 
    271 
    272 
    273 
    274 
    275 
    276 
    277 ALIGNTEXT16
    278 GLOBL GLNAME( _mesa_x86_cliptest_points4_np )
    279 HIDDEN(_mesa_x86_cliptest_points4_np)
    280 GLNAME( _mesa_x86_cliptest_points4_np ):
    281 
    282 #ifdef ELFPIC
    283 #define FRAME_OFFSET 20
    284 #else
    285 #define FRAME_OFFSET 16
    286 #endif
    287 	PUSH_L( ESI )
    288 	PUSH_L( EDI )
    289 	PUSH_L( EBP )
    290 	PUSH_L( EBX )
    291 
    292 #ifdef ELFPIC
    293 	/* store pointer to clip_table on stack */
    294 	CALL( LLBL(ctp4_np_get_eip) )
    295 	ADD_L( CONST(_GLOBAL_OFFSET_TABLE_), EBX )
    296 	MOV_L( REGOFF(clip_table@GOT, EBX), EBX )
    297 	PUSH_L( EBX )
    298 	JMP( LLBL(ctp4_np_clip_table_ready) )
    299 
    300 LLBL(ctp4_np_get_eip):
    301 	/* store eip in ebx */
    302 	MOV_L( REGIND(ESP), EBX )
    303 	RET
    304 
    305 LLBL(ctp4_np_clip_table_ready):
    306 #endif
    307 
    308 	MOV_L( ARG_SOURCE, ESI )
    309 	/* slot */
    310 
    311 	MOV_L( ARG_CLIP, EDX )
    312 	MOV_L( ARG_OR, EBX )
    313 
    314 	MOV_L( ARG_AND, EBP )
    315 	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
    316 
    317 	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
    318 	MOV_L( REGOFF(V4F_START, ESI), ESI )
    319 
    320 	MOV_L( EAX, ARG_DEST )   	/* put stride in ARG_DEST */
    321 	ADD_L( EDX, ECX )
    322 
    323 	MOV_L( ECX, EDI )		/* put clipmask + count in EDI */
    324 	CMP_L( ECX, EDX )
    325 
    326 	MOV_B( REGIND(EBX), AL )
    327 	MOV_B( REGIND(EBP), AH )
    328 
    329 	JZ( LLBL(ctp4_np_finish) )
    330 
    331 ALIGNTEXT16
    332 LLBL(ctp4_np_top):
    333 
    334 	MOV_L( SRC3, EBP )
    335 	MOV_L( SRC2, EBX )
    336 
    337 	XOR_L( ECX, ECX )
    338 	ADD_L( EBP, EBP )	/* ebp = abs(S(3))*2 ; carry = sign of S(3) */
    339 
    340 	ADC_L( ECX, ECX )
    341 	ADD_L( EBX, EBX )	/* ebx = abs(S(2))*2 ; carry = sign of S(2) */
    342 
    343 	ADC_L( ECX, ECX )
    344 	CMP_L( EBX, EBP )	/* carry = abs(S(2))*2 > abs(S(3))*2 */
    345 
    346 	ADC_L( ECX, ECX )
    347 	MOV_L( SRC1, EBX )
    348 
    349 	ADD_L( EBX, EBX )	/* ebx = abs(S(1))*2 ; carry = sign of S(1) */
    350 
    351 	ADC_L( ECX, ECX )
    352 	CMP_L( EBX, EBP )	/* carry = abs(S(1))*2 > abs(S(3))*2 */
    353 
    354 	ADC_L( ECX, ECX )
    355 	MOV_L( SRC0, EBX )
    356 
    357 	ADD_L( EBX, EBX )	/* ebx = abs(S(0))*2 ; carry = sign of S(0) */
    358 
    359 	ADC_L( ECX, ECX )
    360 	CMP_L( EBX, EBP )	/* carry = abs(S(0))*2 > abs(S(3))*2 */
    361 
    362 	ADC_L( ECX, ECX )
    363 
    364 #ifdef ELFPIC
    365 	MOV_L( REGIND(ESP), EBP )	/* clip_table */
    366 
    367 	MOV_B( REGBI(EBP, ECX), CL )
    368 #else
    369 	MOV_B( REGOFF(clip_table,ECX), CL )
    370 #endif
    371 
    372 	OR_B( CL, AL )
    373 	AND_B( CL, AH )
    374 
    375 	TEST_B( CL, CL )
    376 	MOV_B( CL, REGIND(EDX) )
    377 
    378 	INC_L( EDX )
    379 	/* slot */
    380 
    381 	ADD_L( ARG_DEST, ESI )
    382 	CMP_L( EDX, EDI )
    383 
    384 	JNZ( LLBL(ctp4_np_top) )
    385 
    386 	MOV_L( ARG_OR, ECX )
    387 	MOV_L( ARG_AND, EDX )
    388 
    389 	MOV_B( AL, REGIND(ECX) )
    390 	MOV_B( AH, REGIND(EDX) )
    391 
    392 LLBL(ctp4_np_finish):
    393 
    394 	MOV_L( ARG_SOURCE, EAX )
    395 #ifdef ELFPIC
    396 	POP_L( ESI )			/* discard ptr to clip_table */
    397 #endif
    398 	POP_L( EBX )
    399 	POP_L( EBP )
    400 	POP_L( EDI )
    401 	POP_L( ESI )
    402 
    403 	RET
    404 
    405 #if defined (__ELF__) && defined (__linux__)
    406 	.section .note.GNU-stack,"",%progbits
    407 #endif
    408