Home | History | Annotate | Download | only in x86
      1 
      2 /*
      3  * Mesa 3-D graphics library
      4  *
      5  * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a
      8  * copy of this software and associated documentation files (the "Software"),
      9  * to deal in the Software without restriction, including without limitation
     10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     11  * and/or sell copies of the Software, and to permit persons to whom the
     12  * Software is furnished to do so, subject to the following conditions:
     13  *
     14  * The above copyright notice and this permission notice shall be included
     15  * in all copies or substantial portions of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
     21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     23  * OTHER DEALINGS IN THE SOFTWARE.
     24  */
     25 
     26 /*
     27  * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
     28  * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces
     29  * in there will break the build on some platforms.
     30  */
     31 
     32 #include "assyntax.h"
     33 #include "matypes.h"
     34 #include "xform_args.h"
     35 
     36 	SEG_TEXT
     37 
     38 #define FP_ONE		1065353216
     39 #define FP_ZERO		0
     40 
     41 #define SRC0		REGOFF(0, ESI)
     42 #define SRC1		REGOFF(4, ESI)
     43 #define SRC2		REGOFF(8, ESI)
     44 #define SRC3		REGOFF(12, ESI)
     45 #define DST0		REGOFF(0, EDI)
     46 #define DST1		REGOFF(4, EDI)
     47 #define DST2		REGOFF(8, EDI)
     48 #define DST3		REGOFF(12, EDI)
     49 #define MAT0		REGOFF(0, EDX)
     50 #define MAT1		REGOFF(4, EDX)
     51 #define MAT2		REGOFF(8, EDX)
     52 #define MAT3		REGOFF(12, EDX)
     53 #define MAT4		REGOFF(16, EDX)
     54 #define MAT5		REGOFF(20, EDX)
     55 #define MAT6		REGOFF(24, EDX)
     56 #define MAT7		REGOFF(28, EDX)
     57 #define MAT8		REGOFF(32, EDX)
     58 #define MAT9		REGOFF(36, EDX)
     59 #define MAT10		REGOFF(40, EDX)
     60 #define MAT11		REGOFF(44, EDX)
     61 #define MAT12		REGOFF(48, EDX)
     62 #define MAT13		REGOFF(52, EDX)
     63 #define MAT14		REGOFF(56, EDX)
     64 #define MAT15		REGOFF(60, EDX)
     65 
     66 
     67 ALIGNTEXT16
     68 GLOBL GLNAME( _mesa_x86_transform_points4_general )
     69 HIDDEN(_mesa_x86_transform_points4_general)
     70 GLNAME( _mesa_x86_transform_points4_general ):
     71 
     72 #define FRAME_OFFSET 8
     73 	PUSH_L( ESI )
     74 	PUSH_L( EDI )
     75 
     76 	MOV_L( ARG_SOURCE, ESI )
     77 	MOV_L( ARG_DEST, EDI )
     78 
     79 	MOV_L( ARG_MATRIX, EDX )
     80 	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
     81 
     82 	TEST_L( ECX, ECX )
     83 	JZ( LLBL(x86_p4_gr_done) )
     84 
     85 	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
     86 	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
     87 
     88 	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
     89 	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
     90 
     91 	SHL_L( CONST(4), ECX )
     92 	MOV_L( REGOFF(V4F_START, ESI), ESI )
     93 
     94 	MOV_L( REGOFF(V4F_START, EDI), EDI )
     95 	ADD_L( EDI, ECX )
     96 
     97 ALIGNTEXT16
     98 LLBL(x86_p4_gr_loop):
     99 
    100 	FLD_S( SRC0 )			/* F4 */
    101 	FMUL_S( MAT0 )
    102 	FLD_S( SRC0 )			/* F5 F4 */
    103 	FMUL_S( MAT1 )
    104 	FLD_S( SRC0 )			/* F6 F5 F4 */
    105 	FMUL_S( MAT2 )
    106 	FLD_S( SRC0 )			/* F7 F6 F5 F4 */
    107 	FMUL_S( MAT3 )
    108 
    109 	FLD_S( SRC1 )			/* F0 F7 F6 F5 F4 */
    110 	FMUL_S( MAT4 )
    111 	FLD_S( SRC1 )			/* F1 F0 F7 F6 F5 F4 */
    112 	FMUL_S( MAT5 )
    113 	FLD_S( SRC1 )			/* F2 F1 F0 F7 F6 F5 F4 */
    114 	FMUL_S( MAT6 )
    115 	FLD_S( SRC1 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
    116 	FMUL_S( MAT7 )
    117 
    118 	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
    119 	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
    120 	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
    121 	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
    122 	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
    123 	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
    124 
    125 	FLD_S( SRC2 )			/* F0 F7 F6 F5 F4 */
    126 	FMUL_S( MAT8 )
    127 	FLD_S( SRC2 )			/* F1 F0 F7 F6 F5 F4 */
    128 	FMUL_S( MAT9 )
    129 	FLD_S( SRC2 )			/* F2 F1 F0 F7 F6 F5 F4 */
    130 	FMUL_S( MAT10 )
    131 	FLD_S( SRC2 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
    132 	FMUL_S( MAT11 )
    133 
    134 	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
    135 	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
    136 	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
    137 	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
    138 	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
    139 	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
    140 
    141 	FLD_S( SRC3 )			/* F0 F7 F6 F5 F4 */
    142 	FMUL_S( MAT12 )
    143 	FLD_S( SRC3 )			/* F1 F0 F7 F6 F5 F4 */
    144 	FMUL_S( MAT13 )
    145 	FLD_S( SRC3 )			/* F2 F1 F0 F7 F6 F5 F4 */
    146 	FMUL_S( MAT14 )
    147 	FLD_S( SRC3 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
    148 	FMUL_S( MAT15 )
    149 
    150 	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
    151 	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
    152 	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
    153 	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
    154 	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
    155 	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
    156 
    157 	FXCH( ST(3) )			/* F4 F6 F5 F7 */
    158 	FSTP_S( DST0 )		/* F6 F5 F7 */
    159 	FXCH( ST(1) )			/* F5 F6 F7 */
    160 	FSTP_S( DST1 )		/* F6 F7 */
    161 	FSTP_S( DST2 )		/* F7 */
    162 	FSTP_S( DST3 )		/* */
    163 
    164 LLBL(x86_p4_gr_skip):
    165 
    166 	ADD_L( CONST(16), EDI )
    167 	ADD_L( EAX, ESI )
    168 	CMP_L( ECX, EDI )
    169 	JNE( LLBL(x86_p4_gr_loop) )
    170 
    171 LLBL(x86_p4_gr_done):
    172 
    173 	POP_L( EDI )
    174 	POP_L( ESI )
    175 	RET
    176 #undef FRAME_OFFSET
    177 
    178 
    179 
    180 
    181 ALIGNTEXT16
    182 GLOBL GLNAME( _mesa_x86_transform_points4_perspective )
    183 HIDDEN(_mesa_x86_transform_points4_perspective)
    184 GLNAME( _mesa_x86_transform_points4_perspective ):
    185 
    186 #define FRAME_OFFSET 12
    187 	PUSH_L( ESI )
    188 	PUSH_L( EDI )
    189 	PUSH_L( EBX )
    190 
    191 	MOV_L( ARG_SOURCE, ESI )
    192 	MOV_L( ARG_DEST, EDI )
    193 
    194 	MOV_L( ARG_MATRIX, EDX )
    195 	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
    196 
    197 	TEST_L( ECX, ECX )
    198 	JZ( LLBL(x86_p4_pr_done) )
    199 
    200 	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
    201 	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
    202 
    203 	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
    204 	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
    205 
    206 	SHL_L( CONST(4), ECX )
    207 	MOV_L( REGOFF(V4F_START, ESI), ESI )
    208 
    209 	MOV_L( REGOFF(V4F_START, EDI), EDI )
    210 	ADD_L( EDI, ECX )
    211 
    212 ALIGNTEXT16
    213 LLBL(x86_p4_pr_loop):
    214 
    215 	FLD_S( SRC0 )			/* F4 */
    216 	FMUL_S( MAT0 )
    217 
    218 	FLD_S( SRC1 )			/* F5 F4 */
    219 	FMUL_S( MAT5 )
    220 
    221 	FLD_S( SRC2 )			/* F0 F5 F4 */
    222 	FMUL_S( MAT8 )
    223 	FLD_S( SRC2 )			/* F1 F0 F5 F4 */
    224 	FMUL_S( MAT9 )
    225 	FLD_S( SRC2 )			/* F6 F1 F0 F5 F4 */
    226 	FMUL_S( MAT10 )
    227 
    228 	FXCH( ST(2) )			/* F0 F1 F6 F5 F4 */
    229 	FADDP( ST0, ST(4) )		/* F1 F6 F5 F4 */
    230 	FADDP( ST0, ST(2) )		/* F6 F5 F4 */
    231 
    232 	FLD_S( SRC3 )			/* F2 F6 F5 F4 */
    233 	FMUL_S( MAT14 )
    234 
    235 	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
    236 
    237 	MOV_L( SRC2, EBX )
    238 	XOR_L( CONST(-2147483648), EBX )/* change sign */
    239 
    240 	FXCH( ST(2) )			/* F4 F5 F6 */
    241 	FSTP_S( DST0 )		/* F5 F6 */
    242 	FSTP_S( DST1 )		/* F6 */
    243 	FSTP_S( DST2 )		/* */
    244 	MOV_L( EBX, DST3 )
    245 
    246 LLBL(x86_p4_pr_skip):
    247 
    248 	ADD_L( CONST(16), EDI )
    249 	ADD_L( EAX, ESI )
    250 	CMP_L( ECX, EDI )
    251 	JNE( LLBL(x86_p4_pr_loop) )
    252 
    253 LLBL(x86_p4_pr_done):
    254 
    255 	POP_L( EBX )
    256 	POP_L( EDI )
    257 	POP_L( ESI )
    258 	RET
    259 #undef FRAME_OFFSET
    260 
    261 
    262 
    263 
    264 ALIGNTEXT16
    265 GLOBL GLNAME( _mesa_x86_transform_points4_3d )
    266 HIDDEN(_mesa_x86_transform_points4_3d)
    267 GLNAME( _mesa_x86_transform_points4_3d ):
    268 
    269 #define FRAME_OFFSET 12
    270 	PUSH_L( ESI )
    271 	PUSH_L( EDI )
    272 	PUSH_L( EBX )
    273 
    274 	MOV_L( ARG_SOURCE, ESI )
    275 	MOV_L( ARG_DEST, EDI )
    276 
    277 	MOV_L( ARG_MATRIX, EDX )
    278 	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
    279 
    280 	TEST_L( ECX, ECX )
    281 	JZ( LLBL(x86_p4_3dr_done) )
    282 
    283 	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
    284 	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
    285 
    286 	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
    287 	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
    288 
    289 	SHL_L( CONST(4), ECX )
    290 	MOV_L( REGOFF(V4F_START, ESI), ESI )
    291 
    292 	MOV_L( REGOFF(V4F_START, EDI), EDI )
    293 	ADD_L( EDI, ECX )
    294 
    295 ALIGNTEXT16
    296 LLBL(x86_p4_3dr_loop):
    297 
    298 	FLD_S( SRC0 )			/* F4 */
    299 	FMUL_S( MAT0 )
    300 	FLD_S( SRC0 )			/* F5 F4 */
    301 	FMUL_S( MAT1 )
    302 	FLD_S( SRC0 )			/* F6 F5 F4 */
    303 	FMUL_S( MAT2 )
    304 
    305 	FLD_S( SRC1 )			/* F0 F6 F5 F4 */
    306 	FMUL_S( MAT4 )
    307 	FLD_S( SRC1 )			/* F1 F0 F6 F5 F4 */
    308 	FMUL_S( MAT5 )
    309 	FLD_S( SRC1 )			/* F2 F1 F0 F6 F5 F4 */
    310 	FMUL_S( MAT6 )
    311 
    312 	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
    313 	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
    314 	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
    315 	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
    316 
    317 	FLD_S( SRC2 )			/* F0 F6 F5 F4 */
    318 	FMUL_S( MAT8 )
    319 	FLD_S( SRC2 )			/* F1 F0 F6 F5 F4 */
    320 	FMUL_S( MAT9 )
    321 	FLD_S( SRC2 )			/* F2 F1 F0 F6 F5 F4 */
    322 	FMUL_S( MAT10 )
    323 
    324 	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
    325 	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
    326 	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
    327 	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
    328 
    329 	FLD_S( SRC3 )			/* F0 F6 F5 F4 */
    330 	FMUL_S( MAT12 )
    331 	FLD_S( SRC3 )			/* F1 F0 F6 F5 F4 */
    332 	FMUL_S( MAT13 )
    333 	FLD_S( SRC3 )			/* F2 F1 F0 F6 F5 F4 */
    334 	FMUL_S( MAT14 )
    335 
    336 	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
    337 	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
    338 	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
    339 	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
    340 
    341 	MOV_L( SRC3, EBX )
    342 
    343 	FXCH( ST(2) )			/* F4 F5 F6 */
    344 	FSTP_S( DST0 )		/* F5 F6 */
    345 	FSTP_S( DST1 )		/* F6 */
    346 	FSTP_S( DST2 )		/* */
    347 	MOV_L( EBX, DST3 )
    348 
    349 LLBL(x86_p4_3dr_skip):
    350 
    351 	ADD_L( CONST(16), EDI )
    352 	ADD_L( EAX, ESI )
    353 	CMP_L( ECX, EDI )
    354 	JNE( LLBL(x86_p4_3dr_loop) )
    355 
    356 LLBL(x86_p4_3dr_done):
    357 
    358 	POP_L( EBX )
    359 	POP_L( EDI )
    360 	POP_L( ESI )
    361 	RET
    362 #undef FRAME_OFFSET
    363 
    364 
    365 
    366 
    367 ALIGNTEXT16
    368 GLOBL GLNAME(_mesa_x86_transform_points4_3d_no_rot)
    369 HIDDEN(_mesa_x86_transform_points4_3d_no_rot)
    370 GLNAME(_mesa_x86_transform_points4_3d_no_rot):
    371 
    372 #define FRAME_OFFSET 12
    373 	PUSH_L( ESI )
    374 	PUSH_L( EDI )
    375 	PUSH_L( EBX )
    376 
    377 	MOV_L( ARG_SOURCE, ESI )
    378 	MOV_L( ARG_DEST, EDI )
    379 
    380 	MOV_L( ARG_MATRIX, EDX )
    381 	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
    382 
    383 	TEST_L( ECX, ECX )
    384 	JZ( LLBL(x86_p4_3dnrr_done) )
    385 
    386 	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
    387 	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
    388 
    389 	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
    390 	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
    391 
    392 	SHL_L( CONST(4), ECX )
    393 	MOV_L( REGOFF(V4F_START, ESI), ESI )
    394 
    395 	MOV_L( REGOFF(V4F_START, EDI), EDI )
    396 	ADD_L( EDI, ECX )
    397 
    398 ALIGNTEXT16
    399 LLBL(x86_p4_3dnrr_loop):
    400 
    401 	FLD_S( SRC0 )			/* F4 */
    402 	FMUL_S( MAT0 )
    403 
    404 	FLD_S( SRC1 )			/* F5 F4 */
    405 	FMUL_S( MAT5 )
    406 
    407 	FLD_S( SRC2 )			/* F6 F5 F4 */
    408 	FMUL_S( MAT10 )
    409 
    410 	FLD_S( SRC3 )			/* F0 F6 F5 F4 */
    411 	FMUL_S( MAT12 )
    412 	FLD_S( SRC3 )			/* F1 F0 F6 F5 F4 */
    413 	FMUL_S( MAT13 )
    414 	FLD_S( SRC3 )			/* F2 F1 F0 F6 F5 F4 */
    415 	FMUL_S( MAT14 )
    416 
    417 	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
    418 	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
    419 	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
    420 	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
    421 
    422 	MOV_L( SRC3, EBX )
    423 
    424 	FXCH( ST(2) )			/* F4 F5 F6 */
    425 	FSTP_S( DST0   )		/* F5 F6 */
    426 	FSTP_S( DST1   )		/* F6 */
    427 	FSTP_S( DST2   )		/* */
    428 	MOV_L( EBX, DST3 )
    429 
    430 LLBL(x86_p4_3dnrr_skip):
    431 
    432 	ADD_L( CONST(16), EDI )
    433 	ADD_L( EAX, ESI )
    434 	CMP_L( ECX, EDI )
    435 	JNE( LLBL(x86_p4_3dnrr_loop) )
    436 
    437 LLBL(x86_p4_3dnrr_done):
    438 
    439 	POP_L( EBX )
    440 	POP_L( EDI )
    441 	POP_L( ESI )
    442 	RET
    443 #undef FRAME_OFFSET
    444 
    445 
    446 
    447 
    448 ALIGNTEXT16
    449 GLOBL GLNAME( _mesa_x86_transform_points4_2d )
    450 HIDDEN(_mesa_x86_transform_points4_2d)
    451 GLNAME( _mesa_x86_transform_points4_2d ):
    452 
    453 #define FRAME_OFFSET 16
    454 	PUSH_L( ESI )
    455 	PUSH_L( EDI )
    456 	PUSH_L( EBX )
    457 	PUSH_L( EBP )
    458 
    459 	MOV_L( ARG_SOURCE, ESI )
    460 	MOV_L( ARG_DEST, EDI )
    461 
    462 	MOV_L( ARG_MATRIX, EDX )
    463 	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
    464 
    465 	TEST_L( ECX, ECX )
    466 	JZ( LLBL(x86_p4_2dr_done) )
    467 
    468 	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
    469 	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
    470 
    471 	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
    472 	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
    473 
    474 	SHL_L( CONST(4), ECX )
    475 	MOV_L( REGOFF(V4F_START, ESI), ESI )
    476 
    477 	MOV_L( REGOFF(V4F_START, EDI), EDI )
    478 	ADD_L( EDI, ECX )
    479 
    480 ALIGNTEXT16
    481 LLBL(x86_p4_2dr_loop):
    482 
    483 	FLD_S( SRC0 )			/* F4 */
    484 	FMUL_S( MAT0 )
    485 	FLD_S( SRC0 )			/* F5 F4 */
    486 	FMUL_S( MAT1 )
    487 
    488 	FLD_S( SRC1 )			/* F0 F5 F4 */
    489 	FMUL_S( MAT4 )
    490 	FLD_S( SRC1 )			/* F1 F0 F5 F4 */
    491 	FMUL_S( MAT5 )
    492 
    493 	FXCH( ST(1) )			/* F0 F1 F5 F4 */
    494 	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
    495 	FADDP( ST0, ST(1) )		/* F5 F4 */
    496 
    497 	FLD_S( SRC3 )			/* F0 F5 F4 */
    498 	FMUL_S( MAT12 )
    499 	FLD_S( SRC3 )			/* F1 F0 F5 F4 */
    500 	FMUL_S( MAT13 )
    501 
    502 	FXCH( ST(1) )			/* F0 F1 F5 F4 */
    503 	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
    504 	FADDP( ST0, ST(1) )		/* F5 F4 */
    505 
    506 	MOV_L( SRC2, EBX )
    507 	MOV_L( SRC3, EBP )
    508 
    509 	FXCH( ST(1) )			/* F4 F5 */
    510 	FSTP_S( DST0 )		/* F5 */
    511 	FSTP_S( DST1 )		/* */
    512 	MOV_L( EBX, DST2 )
    513 	MOV_L( EBP, DST3 )
    514 
    515 LLBL(x86_p4_2dr_skip):
    516 
    517 	ADD_L( CONST(16), EDI )
    518 	ADD_L( EAX, ESI )
    519 	CMP_L( ECX, EDI )
    520 	JNE( LLBL(x86_p4_2dr_loop) )
    521 
    522 LLBL(x86_p4_2dr_done):
    523 
    524 	POP_L( EBP )
    525 	POP_L( EBX )
    526 	POP_L( EDI )
    527 	POP_L( ESI )
    528 	RET
    529 #undef FRAME_OFFSET
    530 
    531 
    532 
    533 
    534 ALIGNTEXT16
    535 GLOBL GLNAME( _mesa_x86_transform_points4_2d_no_rot )
    536 HIDDEN(_mesa_x86_transform_points4_2d_no_rot)
    537 GLNAME( _mesa_x86_transform_points4_2d_no_rot ):
    538 
    539 #define FRAME_OFFSET 16
    540 	PUSH_L( ESI )
    541 	PUSH_L( EDI )
    542 	PUSH_L( EBX )
    543 	PUSH_L( EBP )
    544 
    545 	MOV_L( ARG_SOURCE, ESI )
    546 	MOV_L( ARG_DEST, EDI )
    547 
    548 	MOV_L( ARG_MATRIX, EDX )
    549 	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
    550 
    551 	TEST_L( ECX, ECX )
    552 	JZ( LLBL(x86_p4_2dnrr_done) )
    553 
    554 	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
    555 	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
    556 
    557 	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
    558 	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
    559 
    560 	SHL_L( CONST(4), ECX )
    561 	MOV_L( REGOFF(V4F_START, ESI), ESI )
    562 
    563 	MOV_L( REGOFF(V4F_START, EDI), EDI )
    564 	ADD_L( EDI, ECX )
    565 
    566 ALIGNTEXT16
    567 LLBL(x86_p4_2dnrr_loop):
    568 
    569 	FLD_S( SRC0 )			/* F4 */
    570 	FMUL_S( MAT0 )
    571 
    572 	FLD_S( SRC1 )			/* F5 F4 */
    573 	FMUL_S( MAT5 )
    574 
    575 	FLD_S( SRC3 )			/* F0 F5 F4 */
    576 	FMUL_S( MAT12 )
    577 	FLD_S( SRC3 )			/* F1 F0 F5 F4 */
    578 	FMUL_S( MAT13 )
    579 
    580 	FXCH( ST(1) )			/* F0 F1 F5 F4 */
    581 	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
    582 	FADDP( ST0, ST(1) )		/* F5 F4 */
    583 
    584 	MOV_L( SRC2, EBX )
    585 	MOV_L( SRC3, EBP )
    586 
    587 	FXCH( ST(1) )			/* F4 F5 */
    588 	FSTP_S( DST0   )		/* F5 */
    589 	FSTP_S( DST1   )		/* */
    590 	MOV_L( EBX, DST2 )
    591 	MOV_L( EBP, DST3 )
    592 
    593 LLBL(x86_p4_2dnrr_skip):
    594 
    595 	ADD_L( CONST(16), EDI )
    596 	ADD_L( EAX, ESI )
    597 	CMP_L( ECX, EDI )
    598 	JNE( LLBL(x86_p4_2dnrr_loop) )
    599 
    600 LLBL(x86_p4_2dnrr_done):
    601 
    602 	POP_L( EBP )
    603 	POP_L( EBX )
    604 	POP_L( EDI )
    605 	POP_L( ESI )
    606 	RET
    607 #undef FRAME_OFFSET
    608 
    609 
    610 
    611 
    612 ALIGNTEXT16
    613 GLOBL GLNAME( _mesa_x86_transform_points4_identity )
    614 HIDDEN(_mesa_x86_transform_points4_identity)
    615 GLNAME( _mesa_x86_transform_points4_identity ):
    616 
    617 #define FRAME_OFFSET 12
    618 	PUSH_L( ESI )
    619 	PUSH_L( EDI )
    620 	PUSH_L( EBX )
    621 
    622 	MOV_L( ARG_SOURCE, ESI )
    623 	MOV_L( ARG_DEST, EDI )
    624 
    625 	MOV_L( ARG_MATRIX, EDX )
    626 	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
    627 
    628 	TEST_L( ECX, ECX )
    629 	JZ( LLBL(x86_p4_ir_done) )
    630 
    631 	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
    632 	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
    633 
    634 	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
    635 	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
    636 
    637 	SHL_L( CONST(4), ECX )
    638 	MOV_L( REGOFF(V4F_START, ESI), ESI )
    639 
    640 	MOV_L( REGOFF(V4F_START, EDI), EDI )
    641 	ADD_L( EDI, ECX )
    642 
    643 	CMP_L( ESI, EDI )
    644 	JE( LLBL(x86_p4_ir_done) )
    645 
    646 ALIGNTEXT16
    647 LLBL(x86_p4_ir_loop):
    648 
    649 	MOV_L( SRC0, EBX )
    650 	MOV_L( SRC1, EDX )
    651 
    652 	MOV_L( EBX, DST0 )
    653 	MOV_L( EDX, DST1 )
    654 
    655 	MOV_L( SRC2, EBX )
    656 	MOV_L( SRC3, EDX )
    657 
    658 	MOV_L( EBX, DST2 )
    659 	MOV_L( EDX, DST3 )
    660 
    661 LLBL(x86_p4_ir_skip):
    662 
    663 	ADD_L( CONST(16), EDI )
    664 	ADD_L( EAX, ESI )
    665 	CMP_L( ECX, EDI )
    666 	JNE( LLBL(x86_p4_ir_loop) )
    667 
    668 LLBL(x86_p4_ir_done):
    669 
    670 	POP_L( EBX )
    671 	POP_L( EDI )
    672 	POP_L( ESI )
    673 	RET
    674 
    675 #if defined (__ELF__) && defined (__linux__)
    676 	.section .note.GNU-stack,"",%progbits
    677 #endif
    678