Home | History | Annotate | Download | only in x86
      1 
      2 /*
      3  * Mesa 3-D graphics library
      4  * Version:  3.5
      5  *
      6  * Copyright (C) 1999-2001  Brian Paul   All Rights Reserved.
      7  *
      8  * Permission is hereby granted, free of charge, to any person obtaining a
      9  * copy of this software and associated documentation files (the "Software"),
     10  * to deal in the Software without restriction, including without limitation
     11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     12  * and/or sell copies of the Software, and to permit persons to whom the
     13  * Software is furnished to do so, subject to the following conditions:
     14  *
     15  * The above copyright notice and this permission notice shall be included
     16  * in all copies or substantial portions of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     21  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
     22  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     23  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     24  */
     25 
     26 /*
     27  * NOTE: Avoid using spaces in between '(' ')' and arguments, especially
     28  * with macros like CONST, LLBL that expand to CONCAT(...).  Putting spaces
     29  * in there will break the build on some platforms.
     30  */
     31 
     32 #include "assyntax.h"
     33 #include "matypes.h"
     34 #include "xform_args.h"
     35 
     36 	SEG_TEXT
     37 
     38 #define FP_ONE		1065353216
     39 #define FP_ZERO		0
     40 
     41 #define SRC0		REGOFF(0, ESI)
     42 #define SRC1		REGOFF(4, ESI)
     43 #define SRC2		REGOFF(8, ESI)
     44 #define SRC3		REGOFF(12, ESI)
     45 #define DST0		REGOFF(0, EDI)
     46 #define DST1		REGOFF(4, EDI)
     47 #define DST2		REGOFF(8, EDI)
     48 #define DST3		REGOFF(12, EDI)
     49 #define MAT0		REGOFF(0, EDX)
     50 #define MAT1		REGOFF(4, EDX)
     51 #define MAT2		REGOFF(8, EDX)
     52 #define MAT3		REGOFF(12, EDX)
     53 #define MAT4		REGOFF(16, EDX)
     54 #define MAT5		REGOFF(20, EDX)
     55 #define MAT6		REGOFF(24, EDX)
     56 #define MAT7		REGOFF(28, EDX)
     57 #define MAT8		REGOFF(32, EDX)
     58 #define MAT9		REGOFF(36, EDX)
     59 #define MAT10		REGOFF(40, EDX)
     60 #define MAT11		REGOFF(44, EDX)
     61 #define MAT12		REGOFF(48, EDX)
     62 #define MAT13		REGOFF(52, EDX)
     63 #define MAT14		REGOFF(56, EDX)
     64 #define MAT15		REGOFF(60, EDX)
     65 
     66 
     67 ALIGNTEXT16
     68 GLOBL GLNAME( _mesa_x86_transform_points3_general )
     69 HIDDEN(_mesa_x86_transform_points3_general)
     70 GLNAME( _mesa_x86_transform_points3_general ):
     71 
     72 #define FRAME_OFFSET 8
     73 	PUSH_L( ESI )
     74 	PUSH_L( EDI )
     75 
     76 	MOV_L( ARG_SOURCE, ESI )
     77 	MOV_L( ARG_DEST, EDI )
     78 
     79 	MOV_L( ARG_MATRIX, EDX )
     80 	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
     81 
     82 	TEST_L( ECX, ECX )
     83 	JZ( LLBL(x86_p3_gr_done) )
     84 
     85 	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
     86 	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
     87 
     88 	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
     89 	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
     90 
     91 	SHL_L( CONST(4), ECX )
     92 	MOV_L( REGOFF(V4F_START, ESI), ESI )
     93 
     94 	MOV_L( REGOFF(V4F_START, EDI), EDI )
     95 	ADD_L( EDI, ECX )
     96 
     97 ALIGNTEXT16
     98 LLBL(x86_p3_gr_loop):
     99 
    100 	FLD_S( SRC0 )			/* F4 */
    101 	FMUL_S( MAT0 )
    102 	FLD_S( SRC0 )			/* F5 F4 */
    103 	FMUL_S( MAT1 )
    104 	FLD_S( SRC0 )			/* F6 F5 F4 */
    105 	FMUL_S( MAT2 )
    106 	FLD_S( SRC0 )			/* F7 F6 F5 F4 */
    107 	FMUL_S( MAT3 )
    108 
    109 	FLD_S( SRC1 )			/* F0 F7 F6 F5 F4 */
    110 	FMUL_S( MAT4 )
    111 	FLD_S( SRC1 )			/* F1 F0 F7 F6 F5 F4 */
    112 	FMUL_S( MAT5 )
    113 	FLD_S( SRC1 )			/* F2 F1 F0 F7 F6 F5 F4 */
    114 	FMUL_S( MAT6 )
    115 	FLD_S( SRC1 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
    116 	FMUL_S( MAT7 )
    117 
    118 	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
    119 	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
    120 	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
    121 	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
    122 	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
    123 	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
    124 
    125 	FLD_S( SRC2 )			/* F0 F7 F6 F5 F4 */
    126 	FMUL_S( MAT8 )
    127 	FLD_S( SRC2 )			/* F1 F0 F7 F6 F5 F4 */
    128 	FMUL_S( MAT9 )
    129 	FLD_S( SRC2 )			/* F2 F1 F0 F7 F6 F5 F4 */
    130 	FMUL_S( MAT10 )
    131 	FLD_S( SRC2 )			/* F3 F2 F1 F0 F7 F6 F5 F4 */
    132 	FMUL_S( MAT11 )
    133 
    134 	FXCH( ST(3) )			/* F0 F2 F1 F3 F7 F6 F5 F4 */
    135 	FADDP( ST0, ST(7) )		/* F2 F1 F3 F7 F6 F5 F4 */
    136 	FXCH( ST(1) )			/* F1 F2 F3 F7 F6 F5 F4 */
    137 	FADDP( ST0, ST(5) )		/* F2 F3 F7 F6 F5 F4 */
    138 	FADDP( ST0, ST(3) )		/* F3 F7 F6 F5 F4 */
    139 	FADDP( ST0, ST(1) )		/* F7 F6 F5 F4 */
    140 
    141 	FXCH( ST(3) )			/* F4 F6 F5 F7 */
    142 	FADD_S( MAT12 )
    143 	FXCH( ST(2) )			/* F5 F6 F4 F7 */
    144 	FADD_S( MAT13 )
    145 	FXCH( ST(1) )			/* F6 F5 F4 F7 */
    146 	FADD_S( MAT14 )
    147 	FXCH( ST(3) )			/* F7 F5 F4 F6 */
    148 	FADD_S( MAT15 )
    149 
    150 	FXCH( ST(2) )			/* F4 F5 F7 F6 */
    151 	FSTP_S( DST0 )		/* F5 F7 F6 */
    152 	FSTP_S( DST1 )		/* F7 F6 */
    153 	FXCH( ST(1) )			/* F6 F7 */
    154 	FSTP_S( DST2 )		/* F7 */
    155 	FSTP_S( DST3 )		/* */
    156 
    157 LLBL(x86_p3_gr_skip):
    158 
    159 	ADD_L( CONST(16), EDI )
    160 	ADD_L( EAX, ESI )
    161 	CMP_L( ECX, EDI )
    162 	JNE( LLBL(x86_p3_gr_loop) )
    163 
    164 LLBL(x86_p3_gr_done):
    165 
    166 	POP_L( EDI )
    167 	POP_L( ESI )
    168 	RET
    169 #undef FRAME_OFFSET
    170 
    171 
    172 
    173 
    174 ALIGNTEXT16
    175 GLOBL GLNAME( _mesa_x86_transform_points3_perspective )
    176 HIDDEN(_mesa_x86_transform_points3_perspective)
    177 GLNAME( _mesa_x86_transform_points3_perspective ):
    178 
    179 #define FRAME_OFFSET 12
    180 	PUSH_L( ESI )
    181 	PUSH_L( EDI )
    182 	PUSH_L( EBX )
    183 
    184 	MOV_L( ARG_SOURCE, ESI )
    185 	MOV_L( ARG_DEST, EDI )
    186 
    187 	MOV_L( ARG_MATRIX, EDX )
    188 	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
    189 
    190 	TEST_L( ECX, ECX )
    191 	JZ( LLBL(x86_p3_pr_done) )
    192 
    193 	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
    194 	OR_L( CONST(VEC_SIZE_4), REGOFF(V4F_FLAGS, EDI) )
    195 
    196 	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
    197 	MOV_L( CONST(4), REGOFF(V4F_SIZE, EDI) )
    198 
    199 	SHL_L( CONST(4), ECX )
    200 	MOV_L( REGOFF(V4F_START, ESI), ESI )
    201 
    202 	MOV_L( REGOFF(V4F_START, EDI), EDI )
    203 	ADD_L( EDI, ECX )
    204 
    205 ALIGNTEXT16
    206 LLBL(x86_p3_pr_loop):
    207 
    208 	FLD_S( SRC0 )			/* F4 */
    209 	FMUL_S( MAT0 )
    210 
    211 	FLD_S( SRC1 )			/* F5 F4 */
    212 	FMUL_S( MAT5 )
    213 
    214 	FLD_S( SRC2 )			/* F0 F5 F4 */
    215 	FMUL_S( MAT8 )
    216 	FLD_S( SRC2 )			/* F1 F0 F5 F4 */
    217 	FMUL_S( MAT9 )
    218 	FLD_S( SRC2 )			/* F2 F1 F0 F5 F4 */
    219 	FMUL_S( MAT10 )
    220 
    221 	FXCH( ST(2) )			/* F0 F1 F2 F5 F4 */
    222 	FADDP( ST0, ST(4) )		/* F1 F2 F5 F4 */
    223 	FADDP( ST0, ST(2) )		/* F2 F5 F4 */
    224 	FLD_S( MAT14 )		/* F6 F2 F5 F4 */
    225 	FXCH( ST(1) )			/* F2 F6 F5 F4 */
    226 	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
    227 
    228 	MOV_L( SRC2, EBX )
    229 	XOR_L( CONST(-2147483648), EBX )/* change sign */
    230 
    231 	FXCH( ST(2) )			/* F4 F5 F6 */
    232 	FSTP_S( DST0 )		/* F5 F6 */
    233 	FSTP_S( DST1 )		/* F6 */
    234 	FSTP_S( DST2 )		/* */
    235 	MOV_L( EBX, DST3 )
    236 
    237 LLBL(x86_p3_pr_skip):
    238 
    239 	ADD_L( CONST(16), EDI )
    240 	ADD_L( EAX, ESI )
    241 	CMP_L( ECX, EDI )
    242 	JNE( LLBL(x86_p3_pr_loop) )
    243 
    244 LLBL(x86_p3_pr_done):
    245 
    246 	POP_L( EBX )
    247 	POP_L( EDI )
    248 	POP_L( ESI )
    249 	RET
    250 #undef FRAME_OFFSET
    251 
    252 
    253 
    254 
    255 ALIGNTEXT16
    256 GLOBL GLNAME( _mesa_x86_transform_points3_3d )
    257 HIDDEN(_mesa_x86_transform_points3_3d)
    258 GLNAME( _mesa_x86_transform_points3_3d ):
    259 
    260 #define FRAME_OFFSET 8
    261 	PUSH_L( ESI )
    262 	PUSH_L( EDI )
    263 
    264 	MOV_L( ARG_SOURCE, ESI )
    265 	MOV_L( ARG_DEST, EDI )
    266 
    267 	MOV_L( ARG_MATRIX, EDX )
    268 	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
    269 
    270 	TEST_L( ECX, ECX )
    271 	JZ( LLBL(x86_p3_3dr_done) )
    272 
    273 	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
    274 	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
    275 
    276 	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
    277 	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
    278 
    279 	SHL_L( CONST(4), ECX )
    280 	MOV_L( REGOFF(V4F_START, ESI), ESI )
    281 
    282 	MOV_L( REGOFF(V4F_START, EDI), EDI )
    283 	ADD_L( EDI, ECX )
    284 
    285 ALIGNTEXT16
    286 LLBL(x86_p3_3dr_loop):
    287 
    288 	FLD_S( SRC0 )			/* F4 */
    289 	FMUL_S( MAT0 )
    290 	FLD_S( SRC0 )			/* F5 F4 */
    291 	FMUL_S( MAT1 )
    292 	FLD_S( SRC0 )			/* F6 F5 F4 */
    293 	FMUL_S( MAT2 )
    294 
    295 	FLD_S( SRC1 )			/* F0 F6 F5 F4 */
    296 	FMUL_S( MAT4 )
    297 	FLD_S( SRC1 )			/* F1 F0 F6 F5 F4 */
    298 	FMUL_S( MAT5 )
    299 	FLD_S( SRC1 )			/* F2 F1 F0 F6 F5 F4 */
    300 	FMUL_S( MAT6 )
    301 
    302 	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
    303 	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
    304 	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
    305 	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
    306 
    307 	FLD_S( SRC2 )			/* F0 F6 F5 F4 */
    308 	FMUL_S( MAT8 )
    309 	FLD_S( SRC2 )			/* F1 F0 F6 F5 F4 */
    310 	FMUL_S( MAT9 )
    311 	FLD_S( SRC2 )			/* F2 F1 F0 F6 F5 F4 */
    312 	FMUL_S( MAT10 )
    313 
    314 	FXCH( ST(2) )			/* F0 F1 F2 F6 F5 F4 */
    315 	FADDP( ST0, ST(5) )		/* F1 F2 F6 F5 F4 */
    316 	FADDP( ST0, ST(3) )		/* F2 F6 F5 F4 */
    317 	FADDP( ST0, ST(1) )		/* F6 F5 F4 */
    318 
    319 	FXCH( ST(2) )			/* F4 F5 F6 */
    320 	FADD_S( MAT12 )
    321 	FXCH( ST(1) )			/* F5 F4 F6 */
    322 	FADD_S( MAT13 )
    323 	FXCH( ST(2) )			/* F6 F4 F5 */
    324 	FADD_S( MAT14 )
    325 
    326 	FXCH( ST(1) )			/* F4 F6 F5 */
    327 	FSTP_S( DST0   )		/* F6 F5 */
    328 	FXCH( ST(1) )			/* F5 F6 */
    329 	FSTP_S( DST1   )		/* F6 */
    330 	FSTP_S( DST2   )		/* */
    331 
    332 LLBL(x86_p3_3dr_skip):
    333 
    334 	ADD_L( CONST(16), EDI )
    335 	ADD_L( EAX, ESI )
    336 	CMP_L( ECX, EDI )
    337 	JNE( LLBL(x86_p3_3dr_loop) )
    338 
    339 LLBL(x86_p3_3dr_done):
    340 
    341 	POP_L( EDI )
    342 	POP_L( ESI )
    343 	RET
    344 #undef FRAME_OFFSET
    345 
    346 
    347 
    348 
    349 ALIGNTEXT16
    350 GLOBL GLNAME( _mesa_x86_transform_points3_3d_no_rot )
    351 HIDDEN(_mesa_x86_transform_points3_3d_no_rot)
    352 GLNAME( _mesa_x86_transform_points3_3d_no_rot ):
    353 
    354 #define FRAME_OFFSET 8
    355 	PUSH_L( ESI )
    356 	PUSH_L( EDI )
    357 
    358 	MOV_L( ARG_SOURCE, ESI )
    359 	MOV_L( ARG_DEST, EDI )
    360 
    361 
    362 	MOV_L( ARG_MATRIX, EDX )
    363 	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
    364 
    365 	TEST_L( ECX, ECX )
    366 	JZ( LLBL(x86_p3_3dnrr_done) )
    367 
    368 	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
    369 	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
    370 
    371 	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
    372 	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
    373 
    374 	SHL_L( CONST(4), ECX )
    375 	MOV_L( REGOFF(V4F_START, ESI), ESI )
    376 
    377 	MOV_L( REGOFF(V4F_START, EDI), EDI )
    378 	ADD_L( EDI, ECX )
    379 
    380 ALIGNTEXT16
    381 LLBL(x86_p3_3dnrr_loop):
    382 
    383 	FLD_S( SRC0 )			/* F4 */
    384 	FMUL_S( MAT0 )
    385 
    386 	FLD_S( SRC1 )			/* F1 F4 */
    387 	FMUL_S( MAT5 )
    388 
    389 	FLD_S( SRC2 )			/* F2 F1 F4 */
    390 	FMUL_S( MAT10 )
    391 
    392 	FXCH( ST(2) )			/* F4 F1 F2 */
    393 	FADD_S( MAT12 )
    394 	FLD_S( MAT13 )		/* F5 F4 F1 F2 */
    395 	FXCH( ST(2) )			/* F1 F4 F5 F2 */
    396 	FADDP( ST0, ST(2) )		/* F4 F5 F2 */
    397 	FLD_S( MAT14 )		/* F6 F4 F5 F2 */
    398 	FXCH( ST(3) )			/* F2 F4 F5 F6 */
    399 	FADDP( ST0, ST(3) )		/* F4 F5 F6 */
    400 
    401 	FSTP_S( DST0   )		/* F5 F6 */
    402 	FSTP_S( DST1   )		/* F6 */
    403 	FSTP_S( DST2   )		/* */
    404 
    405 LLBL(x86_p3_3dnrr_skip):
    406 
    407 	ADD_L( CONST(16), EDI )
    408 	ADD_L( EAX, ESI )
    409 	CMP_L( ECX, EDI )
    410 	JNE( LLBL(x86_p3_3dnrr_loop) )
    411 
    412 LLBL(x86_p3_3dnrr_done):
    413 
    414 	POP_L( EDI )
    415 	POP_L( ESI )
    416 	RET
    417 #undef FRAME_OFFSET
    418 
    419 
    420 
    421 
    422 ALIGNTEXT16
    423 GLOBL GLNAME( _mesa_x86_transform_points3_2d )
    424 HIDDEN(_mesa_x86_transform_points3_2d)
    425 GLNAME( _mesa_x86_transform_points3_2d ):
    426 
    427 #define FRAME_OFFSET 12
    428 	PUSH_L( ESI )
    429 	PUSH_L( EDI )
    430 	PUSH_L( EBX )
    431 
    432 	MOV_L( ARG_SOURCE, ESI )
    433 	MOV_L( ARG_DEST, EDI )
    434 
    435 	MOV_L( ARG_MATRIX, EDX )
    436 	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
    437 
    438 	TEST_L( ECX, ECX )
    439 	JZ( LLBL(x86_p3_2dr_done) )
    440 
    441 	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
    442 	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
    443 
    444 	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
    445 	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
    446 
    447 	SHL_L( CONST(4), ECX )
    448 	MOV_L( REGOFF(V4F_START, ESI), ESI )
    449 
    450 	MOV_L( REGOFF(V4F_START, EDI), EDI )
    451 	ADD_L( EDI, ECX )
    452 
    453 ALIGNTEXT16
    454 LLBL(x86_p3_2dr_loop):
    455 
    456 	FLD_S( SRC0 )			/* F4 */
    457 	FMUL_S( MAT0 )
    458 	FLD_S( SRC0 )			/* F5 F4 */
    459 	FMUL_S( MAT1 )
    460 
    461 	FLD_S( SRC1 )			/* F0 F5 F4 */
    462 	FMUL_S( MAT4 )
    463 	FLD_S( SRC1 )			/* F1 F0 F5 F4 */
    464 	FMUL_S( MAT5 )
    465 
    466 	FXCH( ST(1) )			/* F0 F1 F5 F4 */
    467 	FADDP( ST0, ST(3) )		/* F1 F5 F4 */
    468 	FADDP( ST0, ST(1) )		/* F5 F4 */
    469 
    470 	FXCH( ST(1) )			/* F4 F5 */
    471 	FADD_S( MAT12 )
    472 	FXCH( ST(1) )			/* F5 F4 */
    473 	FADD_S( MAT13 )
    474 
    475 	MOV_L( SRC2, EBX )
    476 
    477 	FXCH( ST(1) )			/* F4 F5 */
    478 	FSTP_S( DST0   )		/* F5 */
    479 	FSTP_S( DST1   )		/* */
    480 	MOV_L( EBX, DST2 )
    481 
    482 LLBL(x86_p3_2dr_skip):
    483 
    484 	ADD_L( CONST(16), EDI )
    485 	ADD_L( EAX, ESI )
    486 	CMP_L( ECX, EDI )
    487 	JNE( LLBL(x86_p3_2dr_loop) )
    488 
    489 LLBL(x86_p3_2dr_done):
    490 
    491 	POP_L( EBX )
    492 	POP_L( EDI )
    493 	POP_L( ESI )
    494 	RET
    495 #undef FRAME_OFFSET
    496 
    497 
    498 
    499 
    500 ALIGNTEXT16
    501 GLOBL GLNAME( _mesa_x86_transform_points3_2d_no_rot )
    502 HIDDEN(_mesa_x86_transform_points3_2d_no_rot)
    503 GLNAME( _mesa_x86_transform_points3_2d_no_rot ):
    504 
    505 #define FRAME_OFFSET 12
    506 	PUSH_L( ESI )
    507 	PUSH_L( EDI )
    508 	PUSH_L( EBX )
    509 
    510 	MOV_L( ARG_SOURCE, ESI )
    511 	MOV_L( ARG_DEST, EDI )
    512 
    513 	MOV_L( ARG_MATRIX, EDX )
    514 	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
    515 
    516 	TEST_L( ECX, ECX )
    517 	JZ( LLBL(x86_p3_2dnrr_done) )
    518 
    519 	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
    520 	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
    521 
    522 	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
    523 	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
    524 
    525 	SHL_L( CONST(4), ECX )
    526 	MOV_L( REGOFF(V4F_START, ESI), ESI )
    527 
    528 	MOV_L( REGOFF(V4F_START, EDI), EDI )
    529 	ADD_L( EDI, ECX )
    530 
    531 ALIGNTEXT16
    532 LLBL(x86_p3_2dnrr_loop):
    533 
    534 	FLD_S( SRC0 )			/* F4 */
    535 	FMUL_S( MAT0 )
    536 
    537 	FLD_S( SRC1 )			/* F1 F4 */
    538 	FMUL_S( MAT5 )
    539 
    540 	FXCH( ST(1) )			/* F4 F1 */
    541 	FADD_S( MAT12 )
    542 	FLD_S( MAT13 )		/* F5 F4 F1 */
    543 
    544 	FXCH( ST(2) )			/* F1 F4 F5 */
    545 	FADDP( ST0, ST(2) )		/* F4 F5 */
    546 
    547 	MOV_L( SRC2, EBX )
    548 
    549 	FSTP_S( DST0 )		/* F5 */
    550 	FSTP_S( DST1 )		/* */
    551 	MOV_L( EBX, DST2 )
    552 
    553 LLBL(x86_p3_2dnrr_skip):
    554 
    555 	ADD_L( CONST(16), EDI )
    556 	ADD_L( EAX, ESI )
    557 	CMP_L( ECX, EDI )
    558 	JNE( LLBL(x86_p3_2dnrr_loop) )
    559 
    560 LLBL(x86_p3_2dnrr_done):
    561 
    562 	POP_L( EBX )
    563 	POP_L( EDI )
    564 	POP_L( ESI )
    565 	RET
    566 #undef FRAME_OFFSET
    567 
    568 
    569 
    570 
    571 ALIGNTEXT16
    572 GLOBL GLNAME( _mesa_x86_transform_points3_identity )
    573 HIDDEN(_mesa_x86_transform_points3_identity)
    574 GLNAME(_mesa_x86_transform_points3_identity ):
    575 
    576 #define FRAME_OFFSET 16
    577 	PUSH_L( ESI )
    578 	PUSH_L( EDI )
    579 	PUSH_L( EBX )
    580 	PUSH_L( EBP )
    581 
    582 	MOV_L( ARG_SOURCE, ESI )
    583 	MOV_L( ARG_DEST, EDI )
    584 
    585 	MOV_L( ARG_MATRIX, EDX )
    586 	MOV_L( REGOFF(V4F_COUNT, ESI), ECX )
    587 
    588 	TEST_L( ECX, ECX )
    589 	JZ( LLBL(x86_p3_ir_done) )
    590 
    591 	MOV_L( REGOFF(V4F_STRIDE, ESI), EAX )
    592 	OR_L( CONST(VEC_SIZE_3), REGOFF(V4F_FLAGS, EDI) )
    593 
    594 	MOV_L( ECX, REGOFF(V4F_COUNT, EDI) )
    595 	MOV_L( CONST(3), REGOFF(V4F_SIZE, EDI) )
    596 
    597 	SHL_L( CONST(4), ECX )
    598 	MOV_L( REGOFF(V4F_START, ESI), ESI )
    599 
    600 	MOV_L( REGOFF(V4F_START, EDI), EDI )
    601 	ADD_L( EDI, ECX )
    602 
    603 	CMP_L( ESI, EDI )
    604 	JE( LLBL(x86_p3_ir_done) )
    605 
    606 ALIGNTEXT16
    607 LLBL(x86_p3_ir_loop):
    608 
    609 #if 1
    610 	MOV_L( SRC0, EBX )
    611 	MOV_L( SRC1, EBP )
    612 	MOV_L( SRC2, EDX )
    613 
    614 	MOV_L( EBX, DST0 )
    615 	MOV_L( EBP, DST1 )
    616 	MOV_L( EDX, DST2 )
    617 #else
    618 	FLD_S( SRC0 )
    619 	FLD_S( SRC1 )
    620 	FLD_S( SRC2 )
    621 
    622 	FSTP_S( DST2 )
    623 	FSTP_S( DST1 )
    624 	FSTP_S( DST0 )
    625 #endif
    626 
    627 LLBL(x86_p3_ir_skip):
    628 
    629 	ADD_L( CONST(16), EDI )
    630 	ADD_L( EAX, ESI )
    631 	CMP_L( ECX, EDI )
    632 	JNE( LLBL(x86_p3_ir_loop) )
    633 
    634 LLBL(x86_p3_ir_done):
    635 
    636 	POP_L( EBP )
    637 	POP_L( EBX )
    638 	POP_L( EDI )
    639 	POP_L( ESI )
    640 	RET
    641 
    642 #if defined (__ELF__) && defined (__linux__)
    643 	.section .note.GNU-stack,"",%progbits
    644 #endif
    645