Home | History | Annotate | Download | only in client
      1 /*
      2 Copyright (C) 1996-1997 Id Software, Inc.
      3 
      4 This program is free software; you can redistribute it and/or
      5 modify it under the terms of the GNU General Public License
      6 as published by the Free Software Foundation; either version 2
      7 of the License, or (at your option) any later version.
      8 
      9 This program is distributed in the hope that it will be useful,
     10 but WITHOUT ANY WARRANTY; without even the implied warranty of
     11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
     12 
     13 See the GNU General Public License for more details.
     14 
     15 You should have received a copy of the GNU General Public License
     16 along with this program; if not, write to the Free Software
     17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
     18 
     19 */
     20 //
     21 // d_polysa.s
     22 // x86 assembly-language polygon model drawing code
     23 //
     24 
     25 #include "asm_i386.h"
     26 #include "quakeasm.h"
     27 #include "asm_draw.h"
     28 #include "d_ifacea.h"
     29 
     30 #if	id386
     31 
     32 // !!! if this is changed, it must be changed in d_polyse.c too !!!
     33 #define DPS_MAXSPANS			MAXHEIGHT+1
     34 									// 1 extra for spanpackage that marks end
     35 
     36 //#define	SPAN_SIZE	(((DPS_MAXSPANS + 1 + ((CACHE_SIZE - 1) / spanpackage_t_size)) + 1) * spanpackage_t_size)
     37 #define SPAN_SIZE (1024+1+1+1)*32
     38 
     39 
     40 	.data
     41 
     42 	.align	4
     43 p10_minus_p20:	.single		0
     44 p01_minus_p21:	.single		0
     45 temp0:			.single		0
     46 temp1:			.single		0
     47 Ltemp:			.single		0
     48 
     49 aff8entryvec_table:	.long	LDraw8, LDraw7, LDraw6, LDraw5
     50 				.long	LDraw4, LDraw3, LDraw2, LDraw1
     51 
     52 lzistepx:		.long	0
     53 
     54 
     55 	.text
     56 
     57 #ifndef NeXT
     58 	.extern C(D_PolysetSetEdgeTable)
     59 	.extern C(D_RasterizeAliasPolySmooth)
     60 #endif
     61 
     62 //----------------------------------------------------------------------
     63 // affine triangle gradient calculation code
     64 //----------------------------------------------------------------------
     65 
     66 #define skinwidth	4+0
     67 
     68 .globl C(D_PolysetCalcGradients)
     69 C(D_PolysetCalcGradients):
     70 
     71 //	p00_minus_p20 = r_p0[0] - r_p2[0];
     72 //	p01_minus_p21 = r_p0[1] - r_p2[1];
     73 //	p10_minus_p20 = r_p1[0] - r_p2[0];
     74 //	p11_minus_p21 = r_p1[1] - r_p2[1];
     75 //
     76 //	xstepdenominv = 1.0 / (p10_minus_p20 * p01_minus_p21 -
     77 //			     p00_minus_p20 * p11_minus_p21);
     78 //
     79 //	ystepdenominv = -xstepdenominv;
     80 
     81 	fildl	C(r_p0)+0		// r_p0[0]
     82 	fildl	C(r_p2)+0		// r_p2[0] | r_p0[0]
     83 	fildl	C(r_p0)+4		// r_p0[1] | r_p2[0] | r_p0[0]
     84 	fildl	C(r_p2)+4		// r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
     85 	fildl	C(r_p1)+0		// r_p1[0] | r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0]
     86 	fildl	C(r_p1)+4		// r_p1[1] | r_p1[0] | r_p2[1] | r_p0[1] |
     87 							//  r_p2[0] | r_p0[0]
     88 	fxch	%st(3)			// r_p0[1] | r_p1[0] | r_p2[1] | r_p1[1] |
     89 							//  r_p2[0] | r_p0[0]
     90 	fsub	%st(2),%st(0)	// p01_minus_p21 | r_p1[0] | r_p2[1] | r_p1[1] |
     91 							//  r_p2[0] | r_p0[0]
     92 	fxch	%st(1)			// r_p1[0] | p01_minus_p21 | r_p2[1] | r_p1[1] |
     93 							//  r_p2[0] | r_p0[0]
     94 	fsub	%st(4),%st(0)	// p10_minus_p20 | p01_minus_p21 | r_p2[1] |
     95 							//  r_p1[1] | r_p2[0] | r_p0[0]
     96 	fxch	%st(5)			// r_p0[0] | p01_minus_p21 | r_p2[1] |
     97 							//  r_p1[1] | r_p2[0] | p10_minus_p20
     98 	fsubp	%st(0),%st(4)	// p01_minus_p21 | r_p2[1] | r_p1[1] |
     99 							//  p00_minus_p20 | p10_minus_p20
    100 	fxch	%st(2)			// r_p1[1] | r_p2[1] | p01_minus_p21 |
    101 							//  p00_minus_p20 | p10_minus_p20
    102 	fsubp	%st(0),%st(1)	// p11_minus_p21 | p01_minus_p21 |
    103 							//  p00_minus_p20 | p10_minus_p20
    104 	fxch	%st(1)			// p01_minus_p21 | p11_minus_p21 |
    105 							//  p00_minus_p20 | p10_minus_p20
    106 	flds	C(d_xdenom)		// d_xdenom | p01_minus_p21 | p11_minus_p21 |
    107 							//  p00_minus_p20 | p10_minus_p20
    108 	fxch	%st(4)			// p10_minus_p20 | p01_minus_p21 | p11_minus_p21 |
    109 							//  p00_minus_p20 | d_xdenom
    110 	fstps	p10_minus_p20	// p01_minus_p21 | p11_minus_p21 |
    111 							//  p00_minus_p20 | d_xdenom
    112 	fstps	p01_minus_p21	// p11_minus_p21 | p00_minus_p20 | xstepdenominv
    113 	fxch	%st(2)			// xstepdenominv | p00_minus_p20 | p11_minus_p21
    114 
    115 //// ceil () for light so positive steps are exaggerated, negative steps
    116 //// diminished,  pushing us away from underflow toward overflow. Underflow is
    117 //// very visible, overflow is very unlikely, because of ambient lighting
    118 //	t0 = r_p0[4] - r_p2[4];
    119 //	t1 = r_p1[4] - r_p2[4];
    120 
    121 	fildl	C(r_p2)+16		// r_p2[4] | xstepdenominv | p00_minus_p20 |
    122 							//  p11_minus_p21
    123 	fildl	C(r_p0)+16		// r_p0[4] | r_p2[4] | xstepdenominv |
    124 							//  p00_minus_p20 | p11_minus_p21
    125 	fildl	C(r_p1)+16		// r_p1[4] | r_p0[4] | r_p2[4] | xstepdenominv |
    126 							//  p00_minus_p20 | p11_minus_p21
    127 	fxch	%st(2)			// r_p2[4] | r_p0[4] | r_p1[4] | xstepdenominv |
    128 							//  p00_minus_p20 | p11_minus_p21
    129 	fld		%st(0)			// r_p2[4] | r_p2[4] | r_p0[4] | r_p1[4] |
    130 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    131 	fsubrp	%st(0),%st(2)	// r_p2[4] | t0 | r_p1[4] | xstepdenominv |
    132 							//  p00_minus_p20 | p11_minus_p21
    133 	fsubrp	%st(0),%st(2)	// t0 | t1 | xstepdenominv | p00_minus_p20 |
    134 							//  p11_minus_p21
    135 
    136 //	r_lstepx = (int)
    137 //			ceil((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv);
    138 //	r_lstepy = (int)
    139 //			ceil((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv);
    140 
    141 	fld		%st(0)			// t0 | t0 | t1 | xstepdenominv | p00_minus_p20 |
    142 							//  p11_minus_p21
    143 	fmul	%st(5),%st(0)	// t0*p11_minus_p21 | t0 | t1 | xstepdenominv |
    144 							//  p00_minus_p20 | p11_minus_p21
    145 	fxch	%st(2)			// t1 | t0 | t0*p11_minus_p21 | xstepdenominv |
    146 							//  p00_minus_p20 | p11_minus_p21
    147 	fld		%st(0)			// t1 | t1 | t0 | t0*p11_minus_p21 |
    148 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    149 	fmuls	p01_minus_p21	// t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
    150 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    151 	fxch	%st(2)			// t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
    152 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    153 	fmuls	p10_minus_p20	// t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
    154 							//  t0*p11_minus_p21 | xstepdenominv |
    155 							//  p00_minus_p20 | p11_minus_p21
    156 	fxch	%st(1)			// t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
    157 							//  t0*p11_minus_p21 | xstepdenominv |
    158 							//  p00_minus_p20 | p11_minus_p21
    159 	fmul	%st(5),%st(0)	// t1*p00_minus_p20 | t0*p10_minus_p20 |
    160 							//  t1*p01_minus_p21 | t0*p11_minus_p21 |
    161 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    162 	fxch	%st(2)			// t1*p01_minus_p21 | t0*p10_minus_p20 |
    163 							//  t1*p00_minus_p20 | t0*p11_minus_p21 |
    164 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    165 	fsubp	%st(0),%st(3)	// t0*p10_minus_p20 | t1*p00_minus_p20 |
    166 							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
    167 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    168 	fsubrp	%st(0),%st(1)	// t1*p00_minus_p20 - t0*p10_minus_p20 |
    169 							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
    170 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    171 	fld		%st(2)			// xstepdenominv |
    172 							//  t1*p00_minus_p20 - t0*p10_minus_p20 |
    173 							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
    174 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    175 	fmuls	float_minus_1	// ystepdenominv |
    176 							//  t1*p00_minus_p20 - t0*p10_minus_p20 |
    177 							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
    178 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    179 	fxch	%st(2)			// t1*p01_minus_p21 - t0*p11_minus_p21 |
    180 							//  t1*p00_minus_p20 - t0*p10_minus_p20 |
    181 							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
    182 							//  p11_minus_p21
    183 	fmul	%st(3),%st(0)	// (t1*p01_minus_p21 - t0*p11_minus_p21)*
    184 							//   xstepdenominv |
    185 							//  t1*p00_minus_p20 - t0*p10_minus_p20 |
    186 							//   | ystepdenominv | xstepdenominv |
    187 							//   p00_minus_p20 | p11_minus_p21
    188 	fxch	%st(1)			// t1*p00_minus_p20 - t0*p10_minus_p20 |
    189 							//  (t1*p01_minus_p21 - t0*p11_minus_p21)*
    190 							//   xstepdenominv | ystepdenominv |
    191 							//   xstepdenominv | p00_minus_p20 | p11_minus_p21
    192 	fmul	%st(2),%st(0)	// (t1*p00_minus_p20 - t0*p10_minus_p20)*
    193 							//  ystepdenominv |
    194 							//  (t1*p01_minus_p21 - t0*p11_minus_p21)*
    195 							//  xstepdenominv | ystepdenominv |
    196 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    197 	fldcw	ceil_cw
    198 	fistpl	C(r_lstepy)		// r_lstepx | ystepdenominv | xstepdenominv |
    199 							//  p00_minus_p20 | p11_minus_p21
    200 	fistpl	C(r_lstepx)		// ystepdenominv | xstepdenominv | p00_minus_p20 |
    201 							//  p11_minus_p21
    202 	fldcw	single_cw
    203 
    204 //	t0 = r_p0[2] - r_p2[2];
    205 //	t1 = r_p1[2] - r_p2[2];
    206 
    207 	fildl	C(r_p2)+8		// r_p2[2] | ystepdenominv | xstepdenominv |
    208 							//  p00_minus_p20 | p11_minus_p21
    209 	fildl	C(r_p0)+8		// r_p0[2] | r_p2[2] | ystepdenominv |
    210 							//   xstepdenominv | p00_minus_p20 | p11_minus_p21
    211 	fildl	C(r_p1)+8		// r_p1[2] | r_p0[2] | r_p2[2] | ystepdenominv |
    212 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    213 	fxch	%st(2)			// r_p2[2] | r_p0[2] | r_p1[2] | ystepdenominv |
    214 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    215 	fld		%st(0)			// r_p2[2] | r_p2[2] | r_p0[2] | r_p1[2] |
    216 							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
    217 							//  p11_minus_p21
    218 	fsubrp	%st(0),%st(2)	// r_p2[2] | t0 | r_p1[2] | ystepdenominv |
    219 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    220 	fsubrp	%st(0),%st(2)	// t0 | t1 | ystepdenominv | xstepdenominv |
    221 							//  p00_minus_p20 | p11_minus_p21
    222 
    223 //	r_sstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
    224 //			xstepdenominv);
    225 //	r_sstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
    226 //			ystepdenominv);
    227 
    228 	fld		%st(0)			// t0 | t0 | t1 | ystepdenominv | xstepdenominv
    229 	fmul	%st(6),%st(0)	// t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
    230 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    231 	fxch	%st(2)			// t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
    232 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    233 	fld		%st(0)			// t1 | t1 | t0 | t0*p11_minus_p21 |
    234 							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
    235 							//  p11_minus_p21
    236 	fmuls	p01_minus_p21	// t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
    237 							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
    238 							//  p11_minus_p21
    239 	fxch	%st(2)			// t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
    240 							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
    241 							//  p11_minus_p21
    242 	fmuls	p10_minus_p20	// t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
    243 							//  t0*p11_minus_p21 | ystepdenominv |
    244 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    245 	fxch	%st(1)			// t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
    246 							//  t0*p11_minus_p21 | ystepdenominv |
    247 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    248 	fmul	%st(6),%st(0)	// t1*p00_minus_p20 | t0*p10_minus_p20 |
    249 							//  t1*p01_minus_p21 | t0*p11_minus_p21 |
    250 							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
    251 							//  p11_minus_p21
    252 	fxch	%st(2)			// t1*p01_minus_p21 | t0*p10_minus_p20 |
    253 							//  t1*p00_minus_p20 | t0*p11_minus_p21 |
    254 							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
    255 							//  p11_minus_p21
    256 	fsubp	%st(0),%st(3)	// t0*p10_minus_p20 | t1*p00_minus_p20 |
    257 							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
    258 							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
    259 							//  p11_minus_p21
    260 	fsubrp	%st(0),%st(1)	// t1*p00_minus_p20 - t0*p10_minus_p20 |
    261 							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
    262 							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
    263 							//  p11_minus_p21
    264 	fmul	%st(2),%st(0)	// (t1*p00_minus_p20 - t0*p10_minus_p20)*
    265 							//   ystepdenominv |
    266 							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
    267 							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
    268 							//  p11_minus_p21
    269 	fxch	%st(1)			// t1*p01_minus_p21 - t0*p11_minus_p21 |
    270 							//  (t1*p00_minus_p20 - t0*p10_minus_p20)*
    271 							//   ystepdenominv | ystepdenominv |
    272 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    273 	fmul	%st(3),%st(0)	// (t1*p01_minus_p21 - t0*p11_minus_p21)*
    274 							//  xstepdenominv |
    275 							//  (t1*p00_minus_p20 - t0*p10_minus_p20)*
    276 							//  ystepdenominv | ystepdenominv |
    277 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    278 	fxch	%st(1)			// (t1*p00_minus_p20 - t0*p10_minus_p20)*
    279 							//  ystepdenominv |
    280 							//  (t1*p01_minus_p21 - t0*p11_minus_p21)*
    281 							//  xstepdenominv | ystepdenominv |
    282 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    283 	fistpl	C(r_sstepy)		// r_sstepx | ystepdenominv | xstepdenominv |
    284 							//  p00_minus_p20 | p11_minus_p21
    285 	fistpl	C(r_sstepx)		// ystepdenominv | xstepdenominv | p00_minus_p20 |
    286 							//  p11_minus_p21
    287 
    288 //	t0 = r_p0[3] - r_p2[3];
    289 //	t1 = r_p1[3] - r_p2[3];
    290 
    291 	fildl	C(r_p2)+12		// r_p2[3] | ystepdenominv | xstepdenominv |
    292 							//  p00_minus_p20 | p11_minus_p21
    293 	fildl	C(r_p0)+12		// r_p0[3] | r_p2[3] | ystepdenominv |
    294 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    295 	fildl	C(r_p1)+12		// r_p1[3] | r_p0[3] | r_p2[3] | ystepdenominv |
    296 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    297 	fxch	%st(2)			// r_p2[3] | r_p0[3] | r_p1[3] | ystepdenominv |
    298 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    299 	fld		%st(0)			// r_p2[3] | r_p2[3] | r_p0[3] | r_p1[3] |
    300 							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
    301 							//  p11_minus_p21
    302 	fsubrp	%st(0),%st(2)	// r_p2[3] | t0 | r_p1[3] | ystepdenominv |
    303 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    304 	fsubrp	%st(0),%st(2)	// t0 | t1 | ystepdenominv | xstepdenominv |
    305 							//  p00_minus_p20 | p11_minus_p21
    306 
    307 //	r_tstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
    308 //			xstepdenominv);
    309 //	r_tstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
    310 //			ystepdenominv);
    311 
    312 	fld		%st(0)			// t0 | t0 | t1 | ystepdenominv | xstepdenominv |
    313 							//  p00_minus_p20 | p11_minus_p21
    314 	fmul	%st(6),%st(0)	// t0*p11_minus_p21 | t0 | t1 | ystepdenominv |
    315 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    316 	fxch	%st(2)			// t1 | t0 | t0*p11_minus_p21 | ystepdenominv |
    317 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    318 	fld		%st(0)			// t1 | t1 | t0 | t0*p11_minus_p21 |
    319 							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
    320 							//  p11_minus_p21
    321 	fmuls	p01_minus_p21	// t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 |
    322 							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
    323 							//  p11_minus_p21
    324 	fxch	%st(2)			// t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 |
    325 							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
    326 							//  p11_minus_p21
    327 	fmuls	p10_minus_p20	// t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
    328 							//  t0*p11_minus_p21 | ystepdenominv |
    329 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    330 	fxch	%st(1)			// t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
    331 							//  t0*p11_minus_p21 | ystepdenominv |
    332 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    333 	fmul	%st(6),%st(0)	// t1*p00_minus_p20 | t0*p10_minus_p20 |
    334 							//  t1*p01_minus_p21 | t0*p11_minus_p21 |
    335 							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
    336 							//  p11_minus_p21
    337 	fxch	%st(2)			// t1*p01_minus_p21 | t0*p10_minus_p20 |
    338 							//  t1*p00_minus_p20 | t0*p11_minus_p21 |
    339 							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
    340 							//  p11_minus_p21
    341 	fsubp	%st(0),%st(3)	// t0*p10_minus_p20 | t1*p00_minus_p20 |
    342 							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
    343 							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
    344 							//  p11_minus_p21
    345 	fsubrp	%st(0),%st(1)	// t1*p00_minus_p20 - t0*p10_minus_p20 |
    346 							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
    347 							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
    348 							//  p11_minus_p21
    349 	fmul	%st(2),%st(0)	// (t1*p00_minus_p20 - t0*p10_minus_p20)*
    350 							//   ystepdenominv |
    351 							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
    352 							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
    353 							//  p11_minus_p21
    354 	fxch	%st(1)			// t1*p01_minus_p21 - t0*p11_minus_p21 |
    355 							//  (t1*p00_minus_p20 - t0*p10_minus_p20)*
    356 							//  ystepdenominv | ystepdenominv |
    357 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    358 	fmul	%st(3),%st(0)	// (t1*p01_minus_p21 - t0*p11_minus_p21)*
    359 							//  xstepdenominv |
    360 							//  (t1*p00_minus_p20 - t0*p10_minus_p20)*
    361 							//  ystepdenominv | ystepdenominv |
    362 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    363 	fxch	%st(1)			// (t1*p00_minus_p20 - t0*p10_minus_p20)*
    364 							//  ystepdenominv |
    365 							//  (t1*p01_minus_p21 - t0*p11_minus_p21)*
    366 							//  xstepdenominv | ystepdenominv |
    367 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    368 	fistpl	C(r_tstepy)		// r_tstepx | ystepdenominv | xstepdenominv |
    369 							//  p00_minus_p20 | p11_minus_p21
    370 	fistpl	C(r_tstepx)		// ystepdenominv | xstepdenominv | p00_minus_p20 |
    371 							//  p11_minus_p21
    372 
    373 //	t0 = r_p0[5] - r_p2[5];
    374 //	t1 = r_p1[5] - r_p2[5];
    375 
    376 	fildl	C(r_p2)+20		// r_p2[5] | ystepdenominv | xstepdenominv |
    377 							//  p00_minus_p20 | p11_minus_p21
    378 	fildl	C(r_p0)+20		// r_p0[5] | r_p2[5] | ystepdenominv |
    379 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    380 	fildl	C(r_p1)+20		// r_p1[5] | r_p0[5] | r_p2[5] | ystepdenominv |
    381 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    382 	fxch	%st(2)			// r_p2[5] | r_p0[5] | r_p1[5] | ystepdenominv |
    383 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    384 	fld		%st(0)			// r_p2[5] | r_p2[5] | r_p0[5] | r_p1[5] |
    385 							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
    386 							//  p11_minus_p21
    387 	fsubrp	%st(0),%st(2)	// r_p2[5] | t0 | r_p1[5] | ystepdenominv |
    388 							//  xstepdenominv | p00_minus_p20 | p11_minus_p21
    389 	fsubrp	%st(0),%st(2)	// t0 | t1 | ystepdenominv | xstepdenominv |
    390 							//  p00_minus_p20 | p11_minus_p21
    391 
    392 //	r_zistepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) *
    393 //			xstepdenominv);
    394 //	r_zistepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) *
    395 //			ystepdenominv);
    396 
    397 	fld		%st(0)			// t0 | t0 | t1 | ystepdenominv | xstepdenominv |
    398 							//  p00_minus_p20 | p11_minus_p21
    399 	fmulp	%st(0),%st(6)	// t0 | t1 | ystepdenominv | xstepdenominv |
    400 							//  p00_minus_p20 | t0*p11_minus_p21
    401 	fxch	%st(1)			// t1 | t0 | ystepdenominv | xstepdenominv |
    402 							//  p00_minus_p20 | t0*p11_minus_p21
    403 	fld		%st(0)			// t1 | t1 | t0 | ystepdenominv | xstepdenominv |
    404 							//  p00_minus_p20 | t0*p11_minus_p21
    405 	fmuls	p01_minus_p21	// t1*p01_minus_p21 | t1 | t0 | ystepdenominv |
    406 							//  xstepdenominv | p00_minus_p20 |
    407 							//  t0*p11_minus_p21
    408 	fxch	%st(2)			// t0 | t1 | t1*p01_minus_p21 | ystepdenominv |
    409 							//  xstepdenominv | p00_minus_p20 |
    410 							//  t0*p11_minus_p21
    411 	fmuls	p10_minus_p20	// t0*p10_minus_p20 | t1 | t1*p01_minus_p21 |
    412 							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
    413 							//  t0*p11_minus_p21
    414 	fxch	%st(1)			// t1 | t0*p10_minus_p20 | t1*p01_minus_p21 |
    415 							//  ystepdenominv | xstepdenominv | p00_minus_p20 |
    416 							//  t0*p11_minus_p21
    417 	fmulp	%st(0),%st(5)	// t0*p10_minus_p20 | t1*p01_minus_p21 |
    418 							//  ystepdenominv | xstepdenominv |
    419 							//  t1*p00_minus_p20 | t0*p11_minus_p21
    420 	fxch	%st(5)			// t0*p11_minus_p21 | t1*p01_minus_p21 |
    421 							//  ystepdenominv | xstepdenominv |
    422 							//  t1*p00_minus_p20 | t0*p10_minus_p20
    423 	fsubrp	%st(0),%st(1)	// t1*p01_minus_p21 - t0*p11_minus_p21 |
    424 							//  ystepdenominv | xstepdenominv |
    425 							//  t1*p00_minus_p20 | t0*p10_minus_p20
    426 	fxch	%st(3)			// t1*p00_minus_p20 | ystepdenominv |
    427 							//  xstepdenominv |
    428 							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
    429 							//  t0*p10_minus_p20
    430 	fsubp	%st(0),%st(4)	// ystepdenominv | xstepdenominv |
    431 							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
    432 							//  t1*p00_minus_p20 - t0*p10_minus_p20
    433 	fxch	%st(1)			// xstepdenominv | ystepdenominv |
    434 							//  t1*p01_minus_p21 - t0*p11_minus_p21 |
    435 							//  t1*p00_minus_p20 - t0*p10_minus_p20
    436 	fmulp	%st(0),%st(2)	// ystepdenominv |
    437 							//  (t1*p01_minus_p21 - t0*p11_minus_p21) *
    438 							//  xstepdenominv |
    439 							//  t1*p00_minus_p20 - t0*p10_minus_p20
    440 	fmulp	%st(0),%st(2)	// (t1*p01_minus_p21 - t0*p11_minus_p21) *
    441 							//  xstepdenominv |
    442 							//  (t1*p00_minus_p20 - t0*p10_minus_p20) *
    443 							//  ystepdenominv
    444 	fistpl	C(r_zistepx)	// (t1*p00_minus_p20 - t0*p10_minus_p20) *
    445 							//  ystepdenominv
    446 	fistpl	C(r_zistepy)
    447 
    448 //	a_sstepxfrac = r_sstepx << 16;
    449 //	a_tstepxfrac = r_tstepx << 16;
    450 //
    451 //	a_ststepxwhole = r_affinetridesc.skinwidth * (r_tstepx >> 16) +
    452 //			(r_sstepx >> 16);
    453 
    454 	movl	C(r_sstepx),%eax
    455 	movl	C(r_tstepx),%edx
    456 	shll	$16,%eax
    457 	shll	$16,%edx
    458 	movl	%eax,C(a_sstepxfrac)
    459 	movl	%edx,C(a_tstepxfrac)
    460 
    461 	movl	C(r_sstepx),%ecx
    462 	movl	C(r_tstepx),%eax
    463 	sarl	$16,%ecx
    464 	sarl	$16,%eax
    465 	imull	skinwidth(%esp)
    466 	addl	%ecx,%eax
    467 	movl	%eax,C(a_ststepxwhole)
    468 
    469 	ret
    470 
    471 
    472 //----------------------------------------------------------------------
    473 // recursive subdivision affine triangle drawing code
    474 //
    475 // not C-callable because of stdcall return
    476 //----------------------------------------------------------------------
    477 
    478 #define lp1	4+16
    479 #define lp2	8+16
    480 #define lp3	12+16
    481 
    482 .globl C(D_PolysetRecursiveTriangle)
    483 C(D_PolysetRecursiveTriangle):
    484 	pushl	%ebp				// preserve caller stack frame pointer
    485 	pushl	%esi				// preserve register variables
    486 	pushl	%edi
    487 	pushl	%ebx
    488 
    489 //	int		*temp;
    490 //	int		d;
    491 //	int		new[6];
    492 //	int		i;
    493 //	int		z;
    494 //	short	*zbuf;
    495 	movl	lp2(%esp),%esi
    496 	movl	lp1(%esp),%ebx
    497 	movl	lp3(%esp),%edi
    498 
    499 //	d = lp2[0] - lp1[0];
    500 //	if (d < -1 || d > 1)
    501 //		goto split;
    502 	movl	0(%esi),%eax
    503 
    504 	movl	0(%ebx),%edx
    505 	movl	4(%esi),%ebp
    506 
    507 	subl	%edx,%eax
    508 	movl	4(%ebx),%ecx
    509 
    510 	subl	%ecx,%ebp
    511 	incl	%eax
    512 
    513 	cmpl	$2,%eax
    514 	ja		LSplit
    515 
    516 //	d = lp2[1] - lp1[1];
    517 //	if (d < -1 || d > 1)
    518 //		goto split;
    519 	movl	0(%edi),%eax
    520 	incl	%ebp
    521 
    522 	cmpl	$2,%ebp
    523 	ja		LSplit
    524 
    525 //	d = lp3[0] - lp2[0];
    526 //	if (d < -1 || d > 1)
    527 //		goto split2;
    528 	movl	0(%esi),%edx
    529 	movl	4(%edi),%ebp
    530 
    531 	subl	%edx,%eax
    532 	movl	4(%esi),%ecx
    533 
    534 	subl	%ecx,%ebp
    535 	incl	%eax
    536 
    537 	cmpl	$2,%eax
    538 	ja		LSplit2
    539 
    540 //	d = lp3[1] - lp2[1];
    541 //	if (d < -1 || d > 1)
    542 //		goto split2;
    543 	movl	0(%ebx),%eax
    544 	incl	%ebp
    545 
    546 	cmpl	$2,%ebp
    547 	ja		LSplit2
    548 
    549 //	d = lp1[0] - lp3[0];
    550 //	if (d < -1 || d > 1)
    551 //		goto split3;
    552 	movl	0(%edi),%edx
    553 	movl	4(%ebx),%ebp
    554 
    555 	subl	%edx,%eax
    556 	movl	4(%edi),%ecx
    557 
    558 	subl	%ecx,%ebp
    559 	incl	%eax
    560 
    561 	incl	%ebp
    562 	movl	%ebx,%edx
    563 
    564 	cmpl	$2,%eax
    565 	ja		LSplit3
    566 
    567 //	d = lp1[1] - lp3[1];
    568 //	if (d < -1 || d > 1)
    569 //	{
    570 //split3:
    571 //		temp = lp1;
    572 //		lp3 = lp2;
    573 //		lp1 = lp3;
    574 //		lp2 = temp;
    575 //		goto split;
    576 //	}
    577 //
    578 //	return;			// entire tri is filled
    579 //
    580 	cmpl	$2,%ebp
    581 	jna		LDone
    582 
    583 LSplit3:
    584 	movl	%edi,%ebx
    585 	movl	%esi,%edi
    586 	movl	%edx,%esi
    587 	jmp		LSplit
    588 
    589 //split2:
    590 LSplit2:
    591 
    592 //	temp = lp1;
    593 //	lp1 = lp2;
    594 //	lp2 = lp3;
    595 //	lp3 = temp;
    596 	movl	%ebx,%eax
    597 	movl	%esi,%ebx
    598 	movl	%edi,%esi
    599 	movl	%eax,%edi
    600 
    601 //split:
    602 LSplit:
    603 
    604 	subl	$24,%esp		// allocate space for a new vertex
    605 
    606 //// split this edge
    607 //	new[0] = (lp1[0] + lp2[0]) >> 1;
    608 //	new[1] = (lp1[1] + lp2[1]) >> 1;
    609 //	new[2] = (lp1[2] + lp2[2]) >> 1;
    610 //	new[3] = (lp1[3] + lp2[3]) >> 1;
    611 //	new[5] = (lp1[5] + lp2[5]) >> 1;
    612 	movl	8(%ebx),%eax
    613 
    614 	movl	8(%esi),%edx
    615 	movl	12(%ebx),%ecx
    616 
    617 	addl	%edx,%eax
    618 	movl	12(%esi),%edx
    619 
    620 	sarl	$1,%eax
    621 	addl	%edx,%ecx
    622 
    623 	movl	%eax,8(%esp)
    624 	movl	20(%ebx),%eax
    625 
    626 	sarl	$1,%ecx
    627 	movl	20(%esi),%edx
    628 
    629 	movl	%ecx,12(%esp)
    630 	addl	%edx,%eax
    631 
    632 	movl	0(%ebx),%ecx
    633 	movl	0(%esi),%edx
    634 
    635 	sarl	$1,%eax
    636 	addl	%ecx,%edx
    637 
    638 	movl	%eax,20(%esp)
    639 	movl	4(%ebx),%eax
    640 
    641 	sarl	$1,%edx
    642 	movl	4(%esi),%ebp
    643 
    644 	movl	%edx,0(%esp)
    645 	addl	%eax,%ebp
    646 
    647 	sarl	$1,%ebp
    648 	movl	%ebp,4(%esp)
    649 
    650 //// draw the point if splitting a leading edge
    651 //	if (lp2[1] > lp1[1])
    652 //		goto nodraw;
    653 	cmpl	%eax,4(%esi)
    654 	jg		LNoDraw
    655 
    656 //	if ((lp2[1] == lp1[1]) && (lp2[0] < lp1[0]))
    657 //		goto nodraw;
    658 	movl	0(%esi),%edx
    659 	jnz		LDraw
    660 
    661 	cmpl	%ecx,%edx
    662 	jl		LNoDraw
    663 
    664 LDraw:
    665 
    666 // z = new[5] >> 16;
    667 	movl	20(%esp),%edx
    668 	movl	4(%esp),%ecx
    669 
    670 	sarl	$16,%edx
    671 	movl	0(%esp),%ebp
    672 
    673 //	zbuf = zspantable[new[1]] + new[0];
    674 	movl	C(zspantable)(,%ecx,4),%eax
    675 
    676 //	if (z >= *zbuf)
    677 //	{
    678 	cmpw	(%eax,%ebp,2),%dx
    679 	jnge	LNoDraw
    680 
    681 //		int		pix;
    682 //
    683 //		*zbuf = z;
    684 	movw	%dx,(%eax,%ebp,2)
    685 
    686 //		pix = d_pcolormap[skintable[new[3]>>16][new[2]>>16]];
    687 	movl	12(%esp),%eax
    688 
    689 	sarl	$16,%eax
    690 	movl	8(%esp),%edx
    691 
    692 	sarl	$16,%edx
    693 	subl	%ecx,%ecx
    694 
    695 	movl	C(skintable)(,%eax,4),%eax
    696 	movl	4(%esp),%ebp
    697 
    698 	movb	(%eax,%edx,),%cl
    699 	movl	C(d_pcolormap),%edx
    700 
    701 	movb	(%edx,%ecx,),%dl
    702 	movl	0(%esp),%ecx
    703 
    704 //		d_viewbuffer[d_scantable[new[1]] + new[0]] = pix;
    705 	movl	C(d_scantable)(,%ebp,4),%eax
    706 	addl	%eax,%ecx
    707 	movl	C(d_viewbuffer),%eax
    708 	movb	%dl,(%eax,%ecx,1)
    709 
    710 //	}
    711 //
    712 //nodraw:
    713 LNoDraw:
    714 
    715 //// recursively continue
    716 //	D_PolysetRecursiveTriangle (lp3, lp1, new);
    717 	pushl	%esp
    718 	pushl	%ebx
    719 	pushl	%edi
    720 	call	C(D_PolysetRecursiveTriangle)
    721 
    722 //	D_PolysetRecursiveTriangle (lp3, new, lp2);
    723 	movl	%esp,%ebx
    724 	pushl	%esi
    725 	pushl	%ebx
    726 	pushl	%edi
    727 	call	C(D_PolysetRecursiveTriangle)
    728 	addl	$24,%esp
    729 
    730 LDone:
    731 	popl	%ebx				// restore register variables
    732 	popl	%edi
    733 	popl	%esi
    734 	popl	%ebp				// restore caller stack frame pointer
    735 	ret		$12
    736 
    737 
    738 //----------------------------------------------------------------------
    739 // 8-bpp horizontal span drawing code for affine polygons, with smooth
    740 // shading and no transparency
    741 //----------------------------------------------------------------------
    742 
    743 #define pspans	4+8
    744 
    745 .globl C(D_PolysetAff8Start)
    746 C(D_PolysetAff8Start):
    747 
    748 .globl C(D_PolysetDrawSpans8)
    749 C(D_PolysetDrawSpans8):
    750 	pushl	%esi				// preserve register variables
    751 	pushl	%ebx
    752 
    753 	movl	pspans(%esp),%esi	// point to the first span descriptor
    754 	movl	C(r_zistepx),%ecx
    755 
    756 	pushl	%ebp				// preserve caller's stack frame
    757 	pushl	%edi
    758 
    759 	rorl	$16,%ecx			// put high 16 bits of 1/z step in low word
    760 	movl	spanpackage_t_count(%esi),%edx
    761 
    762 	movl	%ecx,lzistepx
    763 
    764 LSpanLoop:
    765 
    766 //		lcount = d_aspancount - pspanpackage->count;
    767 //
    768 //		errorterm += erroradjustup;
    769 //		if (errorterm >= 0)
    770 //		{
    771 //			d_aspancount += d_countextrastep;
    772 //			errorterm -= erroradjustdown;
    773 //		}
    774 //		else
    775 //		{
    776 //			d_aspancount += ubasestep;
    777 //		}
    778 	movl	C(d_aspancount),%eax
    779 	subl	%edx,%eax
    780 
    781 	movl	C(erroradjustup),%edx
    782 	movl	C(errorterm),%ebx
    783 	addl	%edx,%ebx
    784 	js		LNoTurnover
    785 
    786 	movl	C(erroradjustdown),%edx
    787 	movl	C(d_countextrastep),%edi
    788 	subl	%edx,%ebx
    789 	movl	C(d_aspancount),%ebp
    790 	movl	%ebx,C(errorterm)
    791 	addl	%edi,%ebp
    792 	movl	%ebp,C(d_aspancount)
    793 	jmp		LRightEdgeStepped
    794 
    795 LNoTurnover:
    796 	movl	C(d_aspancount),%edi
    797 	movl	C(ubasestep),%edx
    798 	movl	%ebx,C(errorterm)
    799 	addl	%edx,%edi
    800 	movl	%edi,C(d_aspancount)
    801 
    802 LRightEdgeStepped:
    803 	cmpl	$1,%eax
    804 
    805 	jl		LNextSpan
    806 	jz		LExactlyOneLong
    807 
    808 //
    809 // set up advancetable
    810 //
    811 	movl	C(a_ststepxwhole),%ecx
    812 	movl	C(r_affinetridesc)+atd_skinwidth,%edx
    813 
    814 	movl	%ecx,advancetable+4	// advance base in t
    815 	addl	%edx,%ecx
    816 
    817 	movl	%ecx,advancetable	// advance extra in t
    818 	movl	C(a_tstepxfrac),%ecx
    819 
    820 	movw	C(r_lstepx),%cx
    821 	movl	%eax,%edx			// count
    822 
    823 	movl	%ecx,tstep
    824 	addl	$7,%edx
    825 
    826 	shrl	$3,%edx				// count of full and partial loops
    827 	movl	spanpackage_t_sfrac(%esi),%ebx
    828 
    829 	movw	%dx,%bx
    830 	movl	spanpackage_t_pz(%esi),%ecx
    831 
    832 	negl	%eax
    833 
    834 	movl	spanpackage_t_pdest(%esi),%edi
    835 	andl	$7,%eax		// 0->0, 1->7, 2->6, ... , 7->1
    836 
    837 	subl	%eax,%edi	// compensate for hardwired offsets
    838 	subl	%eax,%ecx
    839 
    840 	subl	%eax,%ecx
    841 	movl	spanpackage_t_tfrac(%esi),%edx
    842 
    843 	movw	spanpackage_t_light(%esi),%dx
    844 	movl	spanpackage_t_zi(%esi),%ebp
    845 
    846 	rorl	$16,%ebp	// put high 16 bits of 1/z in low word
    847 	pushl	%esi
    848 
    849 	movl	spanpackage_t_ptex(%esi),%esi
    850 	jmp		aff8entryvec_table(,%eax,4)
    851 
    852 // %bx = count of full and partial loops
    853 // %ebx high word = sfrac
    854 // %ecx = pz
    855 // %dx = light
    856 // %edx high word = tfrac
    857 // %esi = ptex
    858 // %edi = pdest
    859 // %ebp = 1/z
    860 // tstep low word = C(r_lstepx)
    861 // tstep high word = C(a_tstepxfrac)
    862 // C(a_sstepxfrac) low word = 0
    863 // C(a_sstepxfrac) high word = C(a_sstepxfrac)
    864 
    865 LDrawLoop:
    866 
    867 // FIXME: do we need to clamp light? We may need at least a buffer bit to
    868 // keep it from poking into tfrac and causing problems
    869 
    870 LDraw8:
    871 	cmpw	(%ecx),%bp
    872 	jl		Lp1
    873 	xorl	%eax,%eax
    874 	movb	%dh,%ah
    875 	movb	(%esi),%al
    876 	movw	%bp,(%ecx)
    877 	movb	0x12345678(%eax),%al
    878 LPatch8:
    879 	movb	%al,(%edi)
    880 Lp1:
    881 	addl	tstep,%edx
    882 	sbbl	%eax,%eax
    883 	addl	lzistepx,%ebp
    884 	adcl	$0,%ebp
    885 	addl	C(a_sstepxfrac),%ebx
    886 	adcl	advancetable+4(,%eax,4),%esi
    887 
    888 LDraw7:
    889 	cmpw	2(%ecx),%bp
    890 	jl		Lp2
    891 	xorl	%eax,%eax
    892 	movb	%dh,%ah
    893 	movb	(%esi),%al
    894 	movw	%bp,2(%ecx)
    895 	movb	0x12345678(%eax),%al
    896 LPatch7:
    897 	movb	%al,1(%edi)
    898 Lp2:
    899 	addl	tstep,%edx
    900 	sbbl	%eax,%eax
    901 	addl	lzistepx,%ebp
    902 	adcl	$0,%ebp
    903 	addl	C(a_sstepxfrac),%ebx
    904 	adcl	advancetable+4(,%eax,4),%esi
    905 
    906 LDraw6:
    907 	cmpw	4(%ecx),%bp
    908 	jl		Lp3
    909 	xorl	%eax,%eax
    910 	movb	%dh,%ah
    911 	movb	(%esi),%al
    912 	movw	%bp,4(%ecx)
    913 	movb	0x12345678(%eax),%al
    914 LPatch6:
    915 	movb	%al,2(%edi)
    916 Lp3:
    917 	addl	tstep,%edx
    918 	sbbl	%eax,%eax
    919 	addl	lzistepx,%ebp
    920 	adcl	$0,%ebp
    921 	addl	C(a_sstepxfrac),%ebx
    922 	adcl	advancetable+4(,%eax,4),%esi
    923 
    924 LDraw5:
    925 	cmpw	6(%ecx),%bp
    926 	jl		Lp4
    927 	xorl	%eax,%eax
    928 	movb	%dh,%ah
    929 	movb	(%esi),%al
    930 	movw	%bp,6(%ecx)
    931 	movb	0x12345678(%eax),%al
    932 LPatch5:
    933 	movb	%al,3(%edi)
    934 Lp4:
    935 	addl	tstep,%edx
    936 	sbbl	%eax,%eax
    937 	addl	lzistepx,%ebp
    938 	adcl	$0,%ebp
    939 	addl	C(a_sstepxfrac),%ebx
    940 	adcl	advancetable+4(,%eax,4),%esi
    941 
    942 LDraw4:
    943 	cmpw	8(%ecx),%bp
    944 	jl		Lp5
    945 	xorl	%eax,%eax
    946 	movb	%dh,%ah
    947 	movb	(%esi),%al
    948 	movw	%bp,8(%ecx)
    949 	movb	0x12345678(%eax),%al
    950 LPatch4:
    951 	movb	%al,4(%edi)
    952 Lp5:
    953 	addl	tstep,%edx
    954 	sbbl	%eax,%eax
    955 	addl	lzistepx,%ebp
    956 	adcl	$0,%ebp
    957 	addl	C(a_sstepxfrac),%ebx
    958 	adcl	advancetable+4(,%eax,4),%esi
    959 
    960 LDraw3:
    961 	cmpw	10(%ecx),%bp
    962 	jl		Lp6
    963 	xorl	%eax,%eax
    964 	movb	%dh,%ah
    965 	movb	(%esi),%al
    966 	movw	%bp,10(%ecx)
    967 	movb	0x12345678(%eax),%al
    968 LPatch3:
    969 	movb	%al,5(%edi)
    970 Lp6:
    971 	addl	tstep,%edx
    972 	sbbl	%eax,%eax
    973 	addl	lzistepx,%ebp
    974 	adcl	$0,%ebp
    975 	addl	C(a_sstepxfrac),%ebx
    976 	adcl	advancetable+4(,%eax,4),%esi
    977 
    978 LDraw2:
    979 	cmpw	12(%ecx),%bp
    980 	jl		Lp7
    981 	xorl	%eax,%eax
    982 	movb	%dh,%ah
    983 	movb	(%esi),%al
    984 	movw	%bp,12(%ecx)
    985 	movb	0x12345678(%eax),%al
    986 LPatch2:
    987 	movb	%al,6(%edi)
    988 Lp7:
    989 	addl	tstep,%edx
    990 	sbbl	%eax,%eax
    991 	addl	lzistepx,%ebp
    992 	adcl	$0,%ebp
    993 	addl	C(a_sstepxfrac),%ebx
    994 	adcl	advancetable+4(,%eax,4),%esi
    995 
    996 LDraw1:
    997 	cmpw	14(%ecx),%bp
    998 	jl		Lp8
    999 	xorl	%eax,%eax
   1000 	movb	%dh,%ah
   1001 	movb	(%esi),%al
   1002 	movw	%bp,14(%ecx)
   1003 	movb	0x12345678(%eax),%al
   1004 LPatch1:
   1005 	movb	%al,7(%edi)
   1006 Lp8:
   1007 	addl	tstep,%edx
   1008 	sbbl	%eax,%eax
   1009 	addl	lzistepx,%ebp
   1010 	adcl	$0,%ebp
   1011 	addl	C(a_sstepxfrac),%ebx
   1012 	adcl	advancetable+4(,%eax,4),%esi
   1013 
   1014 	addl	$8,%edi
   1015 	addl	$16,%ecx
   1016 
   1017 	decw	%bx
   1018 	jnz		LDrawLoop
   1019 
   1020 	popl	%esi				// restore spans pointer
   1021 LNextSpan:
   1022 	addl	$(spanpackage_t_size),%esi	// point to next span
   1023 LNextSpanESISet:
   1024 	movl	spanpackage_t_count(%esi),%edx
   1025 	cmpl	$-999999,%edx		// any more spans?
   1026 	jnz		LSpanLoop			// yes
   1027 
   1028 	popl	%edi
   1029 	popl	%ebp				// restore the caller's stack frame
   1030 	popl	%ebx				// restore register variables
   1031 	popl	%esi
   1032 	ret
   1033 
   1034 
   1035 // draw a one-long span
   1036 
   1037 LExactlyOneLong:
   1038 
   1039 	movl	spanpackage_t_pz(%esi),%ecx
   1040 	movl	spanpackage_t_zi(%esi),%ebp
   1041 
   1042 	rorl	$16,%ebp	// put high 16 bits of 1/z in low word
   1043 	movl	spanpackage_t_ptex(%esi),%ebx
   1044 
   1045 	cmpw	(%ecx),%bp
   1046 	jl		LNextSpan
   1047 	xorl	%eax,%eax
   1048 	movl	spanpackage_t_pdest(%esi),%edi
   1049 	movb	spanpackage_t_light+1(%esi),%ah
   1050 	addl	$(spanpackage_t_size),%esi	// point to next span
   1051 	movb	(%ebx),%al
   1052 	movw	%bp,(%ecx)
   1053 	movb	0x12345678(%eax),%al
   1054 LPatch9:
   1055 	movb	%al,(%edi)
   1056 
   1057 	jmp		LNextSpanESISet
   1058 
   1059 .globl C(D_PolysetAff8End)
   1060 C(D_PolysetAff8End):
   1061 
   1062 
   1063 #define pcolormap		4
   1064 
   1065 .globl C(D_Aff8Patch)
   1066 C(D_Aff8Patch):
   1067 	movl	pcolormap(%esp),%eax
   1068 	movl	%eax,LPatch1-4
   1069 	movl	%eax,LPatch2-4
   1070 	movl	%eax,LPatch3-4
   1071 	movl	%eax,LPatch4-4
   1072 	movl	%eax,LPatch5-4
   1073 	movl	%eax,LPatch6-4
   1074 	movl	%eax,LPatch7-4
   1075 	movl	%eax,LPatch8-4
   1076 	movl	%eax,LPatch9-4
   1077 
   1078 	ret
   1079 
   1080 
   1081 //----------------------------------------------------------------------
   1082 // Alias model polygon dispatching code, combined with subdivided affine
   1083 // triangle drawing code
   1084 //----------------------------------------------------------------------
   1085 
   1086 .globl C(D_PolysetDraw)
   1087 C(D_PolysetDraw):
   1088 
   1089 //	spanpackage_t	spans[DPS_MAXSPANS + 1 +
   1090 //			((CACHE_SIZE - 1) / sizeof(spanpackage_t)) + 1];
   1091 //						// one extra because of cache line pretouching
   1092 //
   1093 //	a_spans = (spanpackage_t *)
   1094 //			(((long)&spans[0] + CACHE_SIZE - 1) & ~(CACHE_SIZE - 1));
   1095 	subl	$(SPAN_SIZE),%esp
   1096 	movl	%esp,%eax
   1097 	addl	$(CACHE_SIZE - 1),%eax
   1098 	andl	$(~(CACHE_SIZE - 1)),%eax
   1099 	movl	%eax,C(a_spans)
   1100 
   1101 //	if (r_affinetridesc.drawtype)
   1102 //		D_DrawSubdiv ();
   1103 //	else
   1104 //		D_DrawNonSubdiv ();
   1105 	movl	C(r_affinetridesc)+atd_drawtype,%eax
   1106 	testl	%eax,%eax
   1107 	jz		C(D_DrawNonSubdiv)
   1108 
   1109 	pushl	%ebp				// preserve caller stack frame pointer
   1110 
   1111 //	lnumtriangles = r_affinetridesc.numtriangles;
   1112 	movl	C(r_affinetridesc)+atd_numtriangles,%ebp
   1113 
   1114 	pushl	%esi				// preserve register variables
   1115 	shll	$4,%ebp
   1116 
   1117 	pushl	%ebx
   1118 //	ptri = r_affinetridesc.ptriangles;
   1119 	movl	C(r_affinetridesc)+atd_ptriangles,%ebx
   1120 
   1121 	pushl	%edi
   1122 
   1123 //	mtriangle_t		*ptri;
   1124 //	finalvert_t		*pfv, *index0, *index1, *index2;
   1125 //	int				i;
   1126 //	int				lnumtriangles;
   1127 //	int				s0, s1, s2;
   1128 
   1129 //	pfv = r_affinetridesc.pfinalverts;
   1130 	movl	C(r_affinetridesc)+atd_pfinalverts,%edi
   1131 
   1132 //	for (i=0 ; i<lnumtriangles ; i++)
   1133 //	{
   1134 
   1135 Llooptop:
   1136 
   1137 //		index0 = pfv + ptri[i].vertindex[0];
   1138 //		index1 = pfv + ptri[i].vertindex[1];
   1139 //		index2 = pfv + ptri[i].vertindex[2];
   1140 	movl	mtri_vertindex-16+0(%ebx,%ebp,),%ecx
   1141 	movl	mtri_vertindex-16+4(%ebx,%ebp,),%esi
   1142 
   1143 	shll	$(fv_shift),%ecx
   1144 	movl	mtri_vertindex-16+8(%ebx,%ebp,),%edx
   1145 
   1146 	shll	$(fv_shift),%esi
   1147 	addl	%edi,%ecx
   1148 
   1149 	shll	$(fv_shift),%edx
   1150 	addl	%edi,%esi
   1151 
   1152 	addl	%edi,%edx
   1153 
   1154 //		if (((index0->v[1]-index1->v[1]) *
   1155 //				(index0->v[0]-index2->v[0]) -
   1156 //				(index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1])) >= 0)
   1157 //		{
   1158 //			continue;
   1159 //		}
   1160 //
   1161 //		d_pcolormap = &((byte *)acolormap)[index0->v[4] & 0xFF00];
   1162 	fildl	fv_v+4(%ecx)	// i0v1
   1163 	fildl	fv_v+4(%esi)	// i1v1 | i0v1
   1164 	fildl	fv_v+0(%ecx)	// i0v0 | i1v1 | i0v1
   1165 	fildl	fv_v+0(%edx)	// i2v0 | i0v0 | i1v1 | i0v1
   1166 	fxch	%st(2)			// i1v1 | i0v0 | i2v0 | i0v1
   1167 	fsubr	%st(3),%st(0)	// i0v1-i1v1 | i0v0 | i2v0 | i0v1
   1168 	fildl	fv_v+0(%esi)	// i1v0 | i0v1-i1v1 | i0v0 | i2v0 | i0v1
   1169 	fxch	%st(2)			// i0v0 | i0v1-i1v1 | i1v0 | i2v0 | i0v1
   1170 	fsub	%st(0),%st(3)	// i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0 | i0v1
   1171 	fildl	fv_v+4(%edx)	// i2v1 | i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1
   1172 	fxch	%st(1)			// i0v0 | i2v1 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1
   1173 	fsubp	%st(0),%st(3)	// i2v1 | i0v1-i1v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1
   1174 	fxch	%st(1)			// i0v1-i1v1 | i2v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1
   1175 	fmulp	%st(0),%st(3)	// i2v1 | i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1
   1176 	fsubrp	%st(0),%st(3)	// i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1-i2v1
   1177 	movl	fv_v+16(%ecx),%eax
   1178 	andl	$0xFF00,%eax
   1179 	fmulp	%st(0),%st(2)	// i0v1-i1v1*i0v0-i2v0 | i0v0-i1v0*i0v1-i2v1
   1180 	addl	C(acolormap),%eax
   1181 	fsubp	%st(0),%st(1)	// (i0v1-i1v1)*(i0v0-i2v0)-(i0v0-i1v0)*(i0v1-i2v1)
   1182 	movl	%eax,C(d_pcolormap)
   1183 	fstps	Ltemp
   1184 	movl	Ltemp,%eax
   1185 	subl	$0x80000001,%eax
   1186 	jc		Lskip
   1187 
   1188 //		if (ptri[i].facesfront)
   1189 //		{
   1190 //			D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v);
   1191 	movl	mtri_facesfront-16(%ebx,%ebp,),%eax
   1192 	testl	%eax,%eax
   1193 	jz		Lfacesback
   1194 
   1195 	pushl	%edx
   1196 	pushl	%esi
   1197 	pushl	%ecx
   1198 	call	C(D_PolysetRecursiveTriangle)
   1199 
   1200 	subl	$16,%ebp
   1201 	jnz		Llooptop
   1202 	jmp		Ldone2
   1203 
   1204 //		}
   1205 //		else
   1206 //		{
   1207 Lfacesback:
   1208 
   1209 //			s0 = index0->v[2];
   1210 //			s1 = index1->v[2];
   1211 //			s2 = index2->v[2];
   1212 	movl	fv_v+8(%ecx),%eax
   1213 	pushl	%eax
   1214 	movl	fv_v+8(%esi),%eax
   1215 	pushl	%eax
   1216 	movl	fv_v+8(%edx),%eax
   1217 	pushl	%eax
   1218 	pushl	%ecx
   1219 	pushl	%edx
   1220 
   1221 //			if (index0->flags & ALIAS_ONSEAM)
   1222 //				index0->v[2] += r_affinetridesc.seamfixupX16;
   1223 	movl	C(r_affinetridesc)+atd_seamfixupX16,%eax
   1224 	testl	$(ALIAS_ONSEAM),fv_flags(%ecx)
   1225 	jz		Lp11
   1226 	addl	%eax,fv_v+8(%ecx)
   1227 Lp11:
   1228 
   1229 //			if (index1->flags & ALIAS_ONSEAM)
   1230 //				index1->v[2] += r_affinetridesc.seamfixupX16;
   1231 	testl	$(ALIAS_ONSEAM),fv_flags(%esi)
   1232 	jz		Lp12
   1233 	addl	%eax,fv_v+8(%esi)
   1234 Lp12:
   1235 
   1236 //			if (index2->flags & ALIAS_ONSEAM)
   1237 //				index2->v[2] += r_affinetridesc.seamfixupX16;
   1238 	testl	$(ALIAS_ONSEAM),fv_flags(%edx)
   1239 	jz		Lp13
   1240 	addl	%eax,fv_v+8(%edx)
   1241 Lp13:
   1242 
   1243 //			D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v);
   1244 	pushl	%edx
   1245 	pushl	%esi
   1246 	pushl	%ecx
   1247 	call	C(D_PolysetRecursiveTriangle)
   1248 
   1249 //			index0->v[2] = s0;
   1250 //			index1->v[2] = s1;
   1251 //			index2->v[2] = s2;
   1252 	popl	%edx
   1253 	popl	%ecx
   1254 	popl	%eax
   1255 	movl	%eax,fv_v+8(%edx)
   1256 	popl	%eax
   1257 	movl	%eax,fv_v+8(%esi)
   1258 	popl	%eax
   1259 	movl	%eax,fv_v+8(%ecx)
   1260 
   1261 //		}
   1262 //	}
   1263 Lskip:
   1264 	subl	$16,%ebp
   1265 	jnz		Llooptop
   1266 
   1267 Ldone2:
   1268 	popl	%edi				// restore the caller's stack frame
   1269 	popl	%ebx
   1270 	popl	%esi				// restore register variables
   1271 	popl	%ebp
   1272 
   1273 	addl	$(SPAN_SIZE),%esp
   1274 
   1275 	ret
   1276 
   1277 
   1278 //----------------------------------------------------------------------
   1279 // Alias model triangle left-edge scanning code
   1280 //----------------------------------------------------------------------
   1281 
   1282 #define height	4+16
   1283 
   1284 .globl C(D_PolysetScanLeftEdge)
   1285 C(D_PolysetScanLeftEdge):
   1286 	pushl	%ebp				// preserve caller stack frame pointer
   1287 	pushl	%esi				// preserve register variables
   1288 	pushl	%edi
   1289 	pushl	%ebx
   1290 
   1291 	movl	height(%esp),%eax
   1292 	movl	C(d_sfrac),%ecx
   1293 	andl	$0xFFFF,%eax
   1294 	movl	C(d_ptex),%ebx
   1295 	orl		%eax,%ecx
   1296 	movl	C(d_pedgespanpackage),%esi
   1297 	movl	C(d_tfrac),%edx
   1298 	movl	C(d_light),%edi
   1299 	movl	C(d_zi),%ebp
   1300 
   1301 // %eax: scratch
   1302 // %ebx: d_ptex
   1303 // %ecx: d_sfrac in high word, count in low word
   1304 // %edx: d_tfrac
   1305 // %esi: d_pedgespanpackage, errorterm, scratch alternately
   1306 // %edi: d_light
   1307 // %ebp: d_zi
   1308 
   1309 //	do
   1310 //	{
   1311 
   1312 LScanLoop:
   1313 
   1314 //		d_pedgespanpackage->ptex = ptex;
   1315 //		d_pedgespanpackage->pdest = d_pdest;
   1316 //		d_pedgespanpackage->pz = d_pz;
   1317 //		d_pedgespanpackage->count = d_aspancount;
   1318 //		d_pedgespanpackage->light = d_light;
   1319 //		d_pedgespanpackage->zi = d_zi;
   1320 //		d_pedgespanpackage->sfrac = d_sfrac << 16;
   1321 //		d_pedgespanpackage->tfrac = d_tfrac << 16;
   1322 	movl	%ebx,spanpackage_t_ptex(%esi)
   1323 	movl	C(d_pdest),%eax
   1324 	movl	%eax,spanpackage_t_pdest(%esi)
   1325 	movl	C(d_pz),%eax
   1326 	movl	%eax,spanpackage_t_pz(%esi)
   1327 	movl	C(d_aspancount),%eax
   1328 	movl	%eax,spanpackage_t_count(%esi)
   1329 	movl	%edi,spanpackage_t_light(%esi)
   1330 	movl	%ebp,spanpackage_t_zi(%esi)
   1331 	movl	%ecx,spanpackage_t_sfrac(%esi)
   1332 	movl	%edx,spanpackage_t_tfrac(%esi)
   1333 
   1334 // pretouch the next cache line
   1335 	movb	spanpackage_t_size(%esi),%al
   1336 
   1337 //		d_pedgespanpackage++;
   1338 	addl	$(spanpackage_t_size),%esi
   1339 	movl	C(erroradjustup),%eax
   1340 	movl	%esi,C(d_pedgespanpackage)
   1341 
   1342 //		errorterm += erroradjustup;
   1343 	movl	C(errorterm),%esi
   1344 	addl	%eax,%esi
   1345 	movl	C(d_pdest),%eax
   1346 
   1347 //		if (errorterm >= 0)
   1348 //		{
   1349 	js		LNoLeftEdgeTurnover
   1350 
   1351 //			errorterm -= erroradjustdown;
   1352 //			d_pdest += d_pdestextrastep;
   1353 	subl	C(erroradjustdown),%esi
   1354 	addl	C(d_pdestextrastep),%eax
   1355 	movl	%esi,C(errorterm)
   1356 	movl	%eax,C(d_pdest)
   1357 
   1358 //			d_pz += d_pzextrastep;
   1359 //			d_aspancount += d_countextrastep;
   1360 //			d_ptex += d_ptexextrastep;
   1361 //			d_sfrac += d_sfracextrastep;
   1362 //			d_ptex += d_sfrac >> 16;
   1363 //			d_sfrac &= 0xFFFF;
   1364 //			d_tfrac += d_tfracextrastep;
   1365 	movl	C(d_pz),%eax
   1366 	movl	C(d_aspancount),%esi
   1367 	addl	C(d_pzextrastep),%eax
   1368 	addl	C(d_sfracextrastep),%ecx
   1369 	adcl	C(d_ptexextrastep),%ebx
   1370 	addl	C(d_countextrastep),%esi
   1371 	movl	%eax,C(d_pz)
   1372 	movl	C(d_tfracextrastep),%eax
   1373 	movl	%esi,C(d_aspancount)
   1374 	addl	%eax,%edx
   1375 
   1376 //			if (d_tfrac & 0x10000)
   1377 //			{
   1378 	jnc		LSkip1
   1379 
   1380 //				d_ptex += r_affinetridesc.skinwidth;
   1381 //				d_tfrac &= 0xFFFF;
   1382 	addl	C(r_affinetridesc)+atd_skinwidth,%ebx
   1383 
   1384 //			}
   1385 
   1386 LSkip1:
   1387 
   1388 //			d_light += d_lightextrastep;
   1389 //			d_zi += d_ziextrastep;
   1390 	addl	C(d_lightextrastep),%edi
   1391 	addl	C(d_ziextrastep),%ebp
   1392 
   1393 //		}
   1394 	movl	C(d_pedgespanpackage),%esi
   1395 	decl	%ecx
   1396 	testl	$0xFFFF,%ecx
   1397 	jnz		LScanLoop
   1398 
   1399 	popl	%ebx
   1400 	popl	%edi
   1401 	popl	%esi
   1402 	popl	%ebp
   1403 	ret
   1404 
   1405 //		else
   1406 //		{
   1407 
   1408 LNoLeftEdgeTurnover:
   1409 	movl	%esi,C(errorterm)
   1410 
   1411 //			d_pdest += d_pdestbasestep;
   1412 	addl	C(d_pdestbasestep),%eax
   1413 	movl	%eax,C(d_pdest)
   1414 
   1415 //			d_pz += d_pzbasestep;
   1416 //			d_aspancount += ubasestep;
   1417 //			d_ptex += d_ptexbasestep;
   1418 //			d_sfrac += d_sfracbasestep;
   1419 //			d_ptex += d_sfrac >> 16;
   1420 //			d_sfrac &= 0xFFFF;
   1421 	movl	C(d_pz),%eax
   1422 	movl	C(d_aspancount),%esi
   1423 	addl	C(d_pzbasestep),%eax
   1424 	addl	C(d_sfracbasestep),%ecx
   1425 	adcl	C(d_ptexbasestep),%ebx
   1426 	addl	C(ubasestep),%esi
   1427 	movl	%eax,C(d_pz)
   1428 	movl	%esi,C(d_aspancount)
   1429 
   1430 //			d_tfrac += d_tfracbasestep;
   1431 	movl	C(d_tfracbasestep),%esi
   1432 	addl	%esi,%edx
   1433 
   1434 //			if (d_tfrac & 0x10000)
   1435 //			{
   1436 	jnc		LSkip2
   1437 
   1438 //				d_ptex += r_affinetridesc.skinwidth;
   1439 //				d_tfrac &= 0xFFFF;
   1440 	addl	C(r_affinetridesc)+atd_skinwidth,%ebx
   1441 
   1442 //			}
   1443 
   1444 LSkip2:
   1445 
   1446 //			d_light += d_lightbasestep;
   1447 //			d_zi += d_zibasestep;
   1448 	addl	C(d_lightbasestep),%edi
   1449 	addl	C(d_zibasestep),%ebp
   1450 
   1451 //		}
   1452 //	} while (--height);
   1453 	movl	C(d_pedgespanpackage),%esi
   1454 	decl	%ecx
   1455 	testl	$0xFFFF,%ecx
   1456 	jnz		LScanLoop
   1457 
   1458 	popl	%ebx
   1459 	popl	%edi
   1460 	popl	%esi
   1461 	popl	%ebp
   1462 	ret
   1463 
   1464 
   1465 //----------------------------------------------------------------------
   1466 // Alias model vertex drawing code
   1467 //----------------------------------------------------------------------
   1468 
   1469 #define fv			4+8
   1470 #define	numverts	8+8
   1471 
   1472 .globl C(D_PolysetDrawFinalVerts)
   1473 C(D_PolysetDrawFinalVerts):
   1474 	pushl	%ebp				// preserve caller stack frame pointer
   1475 	pushl	%ebx
   1476 
   1477 //	int		i, z;
   1478 //	short	*zbuf;
   1479 
   1480 	movl	numverts(%esp),%ecx
   1481 	movl	fv(%esp),%ebx
   1482 
   1483 	pushl	%esi				// preserve register variables
   1484 	pushl	%edi
   1485 
   1486 LFVLoop:
   1487 
   1488 //	for (i=0 ; i<numverts ; i++, fv++)
   1489 //	{
   1490 //	// valid triangle coordinates for filling can include the bottom and
   1491 //	// right clip edges, due to the fill rule; these shouldn't be drawn
   1492 //		if ((fv->v[0] < r_refdef.vrectright) &&
   1493 //			(fv->v[1] < r_refdef.vrectbottom))
   1494 //		{
   1495 	movl	fv_v+0(%ebx),%eax
   1496 	movl	C(r_refdef)+rd_vrectright,%edx
   1497 	cmpl	%edx,%eax
   1498 	jge		LNextVert
   1499 	movl	fv_v+4(%ebx),%esi
   1500 	movl	C(r_refdef)+rd_vrectbottom,%edx
   1501 	cmpl	%edx,%esi
   1502 	jge		LNextVert
   1503 
   1504 //			zbuf = zspantable[fv->v[1]] + fv->v[0];
   1505 	movl	C(zspantable)(,%esi,4),%edi
   1506 
   1507 //			z = fv->v[5]>>16;
   1508 	movl	fv_v+20(%ebx),%edx
   1509 	shrl	$16,%edx
   1510 
   1511 //			if (z >= *zbuf)
   1512 //			{
   1513 //				int		pix;
   1514 	cmpw	(%edi,%eax,2),%dx
   1515 	jl		LNextVert
   1516 
   1517 //				*zbuf = z;
   1518 	movw	%dx,(%edi,%eax,2)
   1519 
   1520 //				pix = skintable[fv->v[3]>>16][fv->v[2]>>16];
   1521 	movl	fv_v+12(%ebx),%edi
   1522 	shrl	$16,%edi
   1523 	movl	C(skintable)(,%edi,4),%edi
   1524 	movl	fv_v+8(%ebx),%edx
   1525 	shrl	$16,%edx
   1526 	movb	(%edi,%edx),%dl
   1527 
   1528 //				pix = ((byte *)acolormap)[pix + (fv->v[4] & 0xFF00)];
   1529 	movl	fv_v+16(%ebx),%edi
   1530 	andl	$0xFF00,%edi
   1531 	andl	$0x00FF,%edx
   1532 	addl	%edx,%edi
   1533 	movl	C(acolormap),%edx
   1534 	movb	(%edx,%edi,1),%dl
   1535 
   1536 //				d_viewbuffer[d_scantable[fv->v[1]] + fv->v[0]] = pix;
   1537 	movl	C(d_scantable)(,%esi,4),%edi
   1538 	movl	C(d_viewbuffer),%esi
   1539 	addl	%eax,%edi
   1540 	movb	%dl,(%esi,%edi)
   1541 
   1542 //			}
   1543 //		}
   1544 //	}
   1545 LNextVert:
   1546 	addl	$(fv_size),%ebx
   1547 	decl	%ecx
   1548 	jnz		LFVLoop
   1549 
   1550 	popl	%edi
   1551 	popl	%esi
   1552 	popl	%ebx
   1553 	popl	%ebp
   1554 	ret
   1555 
   1556 
   1557 //----------------------------------------------------------------------
   1558 // Alias model non-subdivided polygon dispatching code
   1559 //
   1560 // not C-callable because of stack buffer cleanup
   1561 //----------------------------------------------------------------------
   1562 
   1563 .globl C(D_DrawNonSubdiv)
   1564 C(D_DrawNonSubdiv):
   1565 	pushl	%ebp				// preserve caller stack frame pointer
   1566 	movl	C(r_affinetridesc)+atd_numtriangles,%ebp
   1567 	pushl	%ebx
   1568 	shll	$(mtri_shift),%ebp
   1569 	pushl	%esi				// preserve register variables
   1570 	movl	C(r_affinetridesc)+atd_ptriangles,%esi
   1571 	pushl	%edi
   1572 
   1573 //	mtriangle_t		*ptri;
   1574 //	finalvert_t		*pfv, *index0, *index1, *index2;
   1575 //	int				i;
   1576 //	int				lnumtriangles;
   1577 
   1578 //	pfv = r_affinetridesc.pfinalverts;
   1579 //	ptri = r_affinetridesc.ptriangles;
   1580 //	lnumtriangles = r_affinetridesc.numtriangles;
   1581 
   1582 LNDLoop:
   1583 
   1584 //	for (i=0 ; i<lnumtriangles ; i++, ptri++)
   1585 //	{
   1586 //		index0 = pfv + ptri->vertindex[0];
   1587 //		index1 = pfv + ptri->vertindex[1];
   1588 //		index2 = pfv + ptri->vertindex[2];
   1589 	movl	C(r_affinetridesc)+atd_pfinalverts,%edi
   1590 	movl	mtri_vertindex+0-mtri_size(%esi,%ebp,1),%ecx
   1591 	shll	$(fv_shift),%ecx
   1592 	movl	mtri_vertindex+4-mtri_size(%esi,%ebp,1),%edx
   1593 	shll	$(fv_shift),%edx
   1594 	movl	mtri_vertindex+8-mtri_size(%esi,%ebp,1),%ebx
   1595 	shll	$(fv_shift),%ebx
   1596 	addl	%edi,%ecx
   1597 	addl	%edi,%edx
   1598 	addl	%edi,%ebx
   1599 
   1600 //		d_xdenom = (index0->v[1]-index1->v[1]) *
   1601 //				(index0->v[0]-index2->v[0]) -
   1602 //				(index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1]);
   1603 	movl	fv_v+4(%ecx),%eax
   1604 	movl	fv_v+0(%ecx),%esi
   1605 	subl	fv_v+4(%edx),%eax
   1606 	subl	fv_v+0(%ebx),%esi
   1607 	imull	%esi,%eax
   1608 	movl	fv_v+0(%ecx),%esi
   1609 	movl	fv_v+4(%ecx),%edi
   1610 	subl	fv_v+0(%edx),%esi
   1611 	subl	fv_v+4(%ebx),%edi
   1612 	imull	%esi,%edi
   1613 	subl	%edi,%eax
   1614 
   1615 //		if (d_xdenom >= 0)
   1616 //		{
   1617 //			continue;
   1618 	jns		LNextTri
   1619 
   1620 //		}
   1621 
   1622 	movl	%eax,C(d_xdenom)
   1623 	fildl	C(d_xdenom)
   1624 
   1625 //		r_p0[0] = index0->v[0];		// u
   1626 //		r_p0[1] = index0->v[1];		// v
   1627 //		r_p0[2] = index0->v[2];		// s
   1628 //		r_p0[3] = index0->v[3];		// t
   1629 //		r_p0[4] = index0->v[4];		// light
   1630 //		r_p0[5] = index0->v[5];		// iz
   1631 	movl	fv_v+0(%ecx),%eax
   1632 	movl	fv_v+4(%ecx),%esi
   1633 	movl	%eax,C(r_p0)+0
   1634 	movl	%esi,C(r_p0)+4
   1635 	movl	fv_v+8(%ecx),%eax
   1636 	movl	fv_v+12(%ecx),%esi
   1637 	movl	%eax,C(r_p0)+8
   1638 	movl	%esi,C(r_p0)+12
   1639 	movl	fv_v+16(%ecx),%eax
   1640 	movl	fv_v+20(%ecx),%esi
   1641 	movl	%eax,C(r_p0)+16
   1642 	movl	%esi,C(r_p0)+20
   1643 
   1644 	fdivrs	float_1
   1645 
   1646 //		r_p1[0] = index1->v[0];
   1647 //		r_p1[1] = index1->v[1];
   1648 //		r_p1[2] = index1->v[2];
   1649 //		r_p1[3] = index1->v[3];
   1650 //		r_p1[4] = index1->v[4];
   1651 //		r_p1[5] = index1->v[5];
   1652 	movl	fv_v+0(%edx),%eax
   1653 	movl	fv_v+4(%edx),%esi
   1654 	movl	%eax,C(r_p1)+0
   1655 	movl	%esi,C(r_p1)+4
   1656 	movl	fv_v+8(%edx),%eax
   1657 	movl	fv_v+12(%edx),%esi
   1658 	movl	%eax,C(r_p1)+8
   1659 	movl	%esi,C(r_p1)+12
   1660 	movl	fv_v+16(%edx),%eax
   1661 	movl	fv_v+20(%edx),%esi
   1662 	movl	%eax,C(r_p1)+16
   1663 	movl	%esi,C(r_p1)+20
   1664 
   1665 //		r_p2[0] = index2->v[0];
   1666 //		r_p2[1] = index2->v[1];
   1667 //		r_p2[2] = index2->v[2];
   1668 //		r_p2[3] = index2->v[3];
   1669 //		r_p2[4] = index2->v[4];
   1670 //		r_p2[5] = index2->v[5];
   1671 	movl	fv_v+0(%ebx),%eax
   1672 	movl	fv_v+4(%ebx),%esi
   1673 	movl	%eax,C(r_p2)+0
   1674 	movl	%esi,C(r_p2)+4
   1675 	movl	fv_v+8(%ebx),%eax
   1676 	movl	fv_v+12(%ebx),%esi
   1677 	movl	%eax,C(r_p2)+8
   1678 	movl	%esi,C(r_p2)+12
   1679 	movl	fv_v+16(%ebx),%eax
   1680 	movl	fv_v+20(%ebx),%esi
   1681 	movl	%eax,C(r_p2)+16
   1682 	movl	C(r_affinetridesc)+atd_ptriangles,%edi
   1683 	movl	%esi,C(r_p2)+20
   1684 	movl	mtri_facesfront-mtri_size(%edi,%ebp,1),%eax
   1685 
   1686 //		if (!ptri->facesfront)
   1687 //		{
   1688 	testl	%eax,%eax
   1689 	jnz		LFacesFront
   1690 
   1691 //			if (index0->flags & ALIAS_ONSEAM)
   1692 //				r_p0[2] += r_affinetridesc.seamfixupX16;
   1693 	movl	fv_flags(%ecx),%eax
   1694 	movl	fv_flags(%edx),%esi
   1695 	movl	fv_flags(%ebx),%edi
   1696 	testl	$(ALIAS_ONSEAM),%eax
   1697 	movl	C(r_affinetridesc)+atd_seamfixupX16,%eax
   1698 	jz		LOnseamDone0
   1699 	addl	%eax,C(r_p0)+8
   1700 LOnseamDone0:
   1701 
   1702 //			if (index1->flags & ALIAS_ONSEAM)
   1703 // 				r_p1[2] += r_affinetridesc.seamfixupX16;
   1704 	testl	$(ALIAS_ONSEAM),%esi
   1705 	jz		LOnseamDone1
   1706 	addl	%eax,C(r_p1)+8
   1707 LOnseamDone1:
   1708 
   1709 //			if (index2->flags & ALIAS_ONSEAM)
   1710 //				r_p2[2] += r_affinetridesc.seamfixupX16;
   1711 	testl	$(ALIAS_ONSEAM),%edi
   1712 	jz		LOnseamDone2
   1713 	addl	%eax,C(r_p2)+8
   1714 LOnseamDone2:
   1715 
   1716 //		}
   1717 
   1718 LFacesFront:
   1719 
   1720 	fstps	C(d_xdenom)
   1721 
   1722 //		D_PolysetSetEdgeTable ();
   1723 //		D_RasterizeAliasPolySmooth ();
   1724 		call	C(D_PolysetSetEdgeTable)
   1725 		call	C(D_RasterizeAliasPolySmooth)
   1726 
   1727 LNextTri:
   1728 		movl	C(r_affinetridesc)+atd_ptriangles,%esi
   1729 		subl	$16,%ebp
   1730 		jnz		LNDLoop
   1731 //	}
   1732 
   1733 	popl	%edi
   1734 	popl	%esi
   1735 	popl	%ebx
   1736 	popl	%ebp
   1737 
   1738 	addl	$(SPAN_SIZE),%esp
   1739 
   1740 	ret
   1741 
   1742 
   1743 #endif	// id386
   1744 
   1745