Home | History | Annotate | Download | only in WinQuake
      1 /*
      2 Copyright (C) 1996-1997 Id Software, Inc.
      3 
      4 This program is free software; you can redistribute it and/or
      5 modify it under the terms of the GNU General Public License
      6 as published by the Free Software Foundation; either version 2
      7 of the License, or (at your option) any later version.
      8 
      9 This program is distributed in the hope that it will be useful,
     10 but WITHOUT ANY WARRANTY; without even the implied warranty of
     11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
     12 
     13 See the GNU General Public License for more details.
     14 
     15 You should have received a copy of the GNU General Public License
     16 along with this program; if not, write to the Free Software
     17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
     18 
     19 */
     20 //
     21 // d_parta.s
     22 // x86 assembly-language 8-bpp particle-drawing code.
     23 //
     24 
     25 #include "asm_i386.h"
     26 #include "quakeasm.h"
     27 #include "d_ifacea.h"
     28 #include "asm_draw.h"
     29 
     30 #if	id386
     31 
     32 //----------------------------------------------------------------------
     33 // 8-bpp particle drawing code.
     34 //----------------------------------------------------------------------
     35 
     36 //FIXME: comments, full optimization
     37 
     38 //----------------------------------------------------------------------
     39 // 8-bpp particle queueing code.
     40 //----------------------------------------------------------------------
     41 
     42 	.text
     43 
     44 #define P	12+4
     45 
     46 	.align 4
     47 .globl C(D_DrawParticle)
     48 C(D_DrawParticle):
     49 	pushl	%ebp				// preserve caller's stack frame
     50 	pushl	%edi				// preserve register variables
     51 	pushl	%ebx
     52 
     53 	movl	P(%esp),%edi
     54 
     55 // FIXME: better FP overlap in general here
     56 
     57 // transform point
     58 //	VectorSubtract (p->org, r_origin, local);
     59 	flds	C(r_origin)
     60 	fsubrs	pt_org(%edi)
     61 	flds	pt_org+4(%edi)
     62 	fsubs	C(r_origin)+4
     63 	flds	pt_org+8(%edi)
     64 	fsubs	C(r_origin)+8
     65 	fxch	%st(2)			// local[0] | local[1] | local[2]
     66 
     67 //	transformed[2] = DotProduct(local, r_ppn);
     68 	flds	C(r_ppn)		// r_ppn[0] | local[0] | local[1] | local[2]
     69 	fmul	%st(1),%st(0)	// dot0 | local[0] | local[1] | local[2]
     70 	flds	C(r_ppn)+4	// r_ppn[1] | dot0 | local[0] | local[1] | local[2]
     71 	fmul	%st(3),%st(0)	// dot1 | dot0 | local[0] | local[1] | local[2]
     72 	flds	C(r_ppn)+8	// r_ppn[2] | dot1 | dot0 | local[0] |
     73 						//  local[1] | local[2]
     74 	fmul	%st(5),%st(0)	// dot2 | dot1 | dot0 | local[0] | local[1] | local[2]
     75 	fxch	%st(2)		// dot0 | dot1 | dot2 | local[0] | local[1] | local[2]
     76 	faddp	%st(0),%st(1) // dot0 + dot1 | dot2 | local[0] | local[1] |
     77 						  //  local[2]
     78 	faddp	%st(0),%st(1) // z | local[0] | local[1] | local[2]
     79 	fld		%st(0)		// z | z | local[0] | local[1] |
     80 						//  local[2]
     81 	fdivrs	float_1		// 1/z | z | local[0] | local[1] | local[2]
     82 	fxch	%st(1)		// z | 1/z | local[0] | local[1] | local[2]
     83 
     84 //	if (transformed[2] < PARTICLE_Z_CLIP)
     85 //		return;
     86 	fcomps	float_particle_z_clip	// 1/z | local[0] | local[1] | local[2]
     87 	fxch	%st(3)					// local[2] | local[0] | local[1] | 1/z
     88 
     89 	flds	C(r_pup)	// r_pup[0] | local[2] | local[0] | local[1] | 1/z
     90 	fmul	%st(2),%st(0)	// dot0 | local[2] | local[0] | local[1] | 1/z
     91 	flds	C(r_pup)+4	// r_pup[1] | dot0 | local[2] | local[0] |
     92 						//  local[1] | 1/z
     93 
     94 	fnstsw	%ax
     95 	testb	$1,%ah
     96 	jnz		LPop6AndDone
     97 
     98 //	transformed[1] = DotProduct(local, r_pup);
     99 	fmul	%st(4),%st(0)	// dot1 | dot0 | local[2] | local[0] | local[1] | 1/z
    100 	flds	C(r_pup)+8	// r_pup[2] | dot1 | dot0 | local[2] |
    101 						//  local[0] | local[1] | 1/z
    102 	fmul	%st(3),%st(0)	// dot2 | dot1 | dot0 | local[2] | local[0] |
    103 						//  local[1] | 1/z
    104 	fxch	%st(2)		// dot0 | dot1 | dot2 | local[2] | local[0] |
    105 						//  local[1] | 1/z
    106 	faddp	%st(0),%st(1) // dot0 + dot1 | dot2 | local[2] | local[0] |
    107 						//  local[1] | 1/z
    108 	faddp	%st(0),%st(1) // y | local[2] | local[0] | local[1] | 1/z
    109 	fxch	%st(3)		// local[1] | local[2] | local[0] | y | 1/z
    110 
    111 //	transformed[0] = DotProduct(local, r_pright);
    112 	fmuls	C(r_pright)+4	// dot1 | local[2] | local[0] | y | 1/z
    113 	fxch	%st(2)		// local[0] | local[2] | dot1 | y | 1/z
    114 	fmuls	C(r_pright)	// dot0 | local[2] | dot1 | y | 1/z
    115 	fxch	%st(1)		// local[2] | dot0 | dot1 | y | 1/z
    116 	fmuls	C(r_pright)+8	// dot2 | dot0 | dot1 | y | 1/z
    117 	fxch	%st(2)		// dot1 | dot0 | dot2 | y | 1/z
    118 	faddp	%st(0),%st(1) // dot1 + dot0 | dot2 | y | 1/z
    119 
    120 	faddp	%st(0),%st(1)	// x | y | 1/z
    121 	fxch	%st(1)			// y | x | 1/z
    122 
    123 // project the point
    124 	fmul	%st(2),%st(0)	// y/z | x | 1/z
    125 	fxch	%st(1)			// x | y/z | 1/z
    126 	fmul	%st(2),%st(0)	// x/z | y/z | 1/z
    127 	fxch	%st(1)			// y/z | x/z | 1/z
    128 	fsubrs	C(ycenter)		// v | x/z | 1/z
    129 	fxch	%st(1)			// x/z | v | 1/z
    130 	fadds	C(xcenter)		// u | v | 1/z
    131 // FIXME: preadjust xcenter and ycenter
    132 	fxch	%st(1)			// v | u | 1/z
    133 	fadds	float_point5	// v | u | 1/z
    134 	fxch	%st(1)			// u | v | 1/z
    135 	fadds	float_point5	// u | v | 1/z
    136 	fxch	%st(2)			// 1/z | v | u
    137 	fmuls	DP_32768		// 1/z * 0x8000 | v | u
    138 	fxch	%st(2)			// u | v | 1/z * 0x8000
    139 
    140 // FIXME: use Terje's fp->int trick here?
    141 // FIXME: check we're getting proper rounding here
    142 	fistpl	DP_u			// v | 1/z * 0x8000
    143 	fistpl	DP_v			// 1/z * 0x8000
    144 
    145 	movl	DP_u,%eax
    146 	movl	DP_v,%edx
    147 
    148 // if ((v > d_vrectbottom_particle) ||
    149 // 	(u > d_vrectright_particle) ||
    150 // 	(v < d_vrecty) ||
    151 // 	(u < d_vrectx))
    152 // {
    153 // 	continue;
    154 // }
    155 
    156 	movl	C(d_vrectbottom_particle),%ebx
    157 	movl	C(d_vrectright_particle),%ecx
    158 	cmpl	%ebx,%edx
    159 	jg		LPop1AndDone
    160 	cmpl	%ecx,%eax
    161 	jg		LPop1AndDone
    162 	movl	C(d_vrecty),%ebx
    163 	movl	C(d_vrectx),%ecx
    164 	cmpl	%ebx,%edx
    165 	jl		LPop1AndDone
    166 
    167 	cmpl	%ecx,%eax
    168 	jl		LPop1AndDone
    169 
    170 	flds	pt_color(%edi)	// color | 1/z * 0x8000
    171 // FIXME: use Terje's fast fp->int trick?
    172 	fistpl	DP_Color		// 1/z * 0x8000
    173 
    174 	movl	C(d_viewbuffer),%ebx
    175 
    176 	addl	%eax,%ebx
    177 	movl	C(d_scantable)(,%edx,4),%edi		// point to the pixel
    178 
    179 	imull	C(d_zrowbytes),%edx		// point to the z pixel
    180 
    181 	leal	(%edx,%eax,2),%edx
    182 	movl	C(d_pzbuffer),%eax
    183 
    184 	fistpl	izi
    185 
    186 	addl	%ebx,%edi
    187 	addl	%eax,%edx
    188 
    189 // pix = izi >> d_pix_shift;
    190 
    191 	movl	izi,%eax
    192 	movl	C(d_pix_shift),%ecx
    193 	shrl	%cl,%eax
    194 	movl	izi,%ebp
    195 
    196 // if (pix < d_pix_min)
    197 // 		pix = d_pix_min;
    198 // else if (pix > d_pix_max)
    199 //  	pix = d_pix_max;
    200 
    201 	movl	C(d_pix_min),%ebx
    202 	movl	C(d_pix_max),%ecx
    203 	cmpl	%ebx,%eax
    204 	jnl		LTestPixMax
    205 	movl	%ebx,%eax
    206 	jmp		LTestDone
    207 
    208 LTestPixMax:
    209 	cmpl	%ecx,%eax
    210 	jng		LTestDone
    211 	movl	%ecx,%eax
    212 LTestDone:
    213 
    214 	movb	DP_Color,%ch
    215 
    216 	movl	C(d_y_aspect_shift),%ebx
    217 	testl	%ebx,%ebx
    218 	jnz		LDefault
    219 
    220 	cmpl	$4,%eax
    221 	ja		LDefault
    222 
    223 	jmp		DP_EntryTable-4(,%eax,4)
    224 
    225 // 1x1
    226 .globl	DP_1x1
    227 DP_1x1:
    228 	cmpw	%bp,(%edx)		// just one pixel to do
    229 	jg		LDone
    230 	movw	%bp,(%edx)
    231 	movb	%ch,(%edi)
    232 	jmp		LDone
    233 
    234 // 2x2
    235 .globl	DP_2x2
    236 DP_2x2:
    237 	pushl	%esi
    238 	movl	C(screenwidth),%ebx
    239 	movl	C(d_zrowbytes),%esi
    240 
    241 	cmpw	%bp,(%edx)
    242 	jg		L2x2_1
    243 	movw	%bp,(%edx)
    244 	movb	%ch,(%edi)
    245 L2x2_1:
    246 	cmpw	%bp,2(%edx)
    247 	jg		L2x2_2
    248 	movw	%bp,2(%edx)
    249 	movb	%ch,1(%edi)
    250 L2x2_2:
    251 	cmpw	%bp,(%edx,%esi,1)
    252 	jg		L2x2_3
    253 	movw	%bp,(%edx,%esi,1)
    254 	movb	%ch,(%edi,%ebx,1)
    255 L2x2_3:
    256 	cmpw	%bp,2(%edx,%esi,1)
    257 	jg		L2x2_4
    258 	movw	%bp,2(%edx,%esi,1)
    259 	movb	%ch,1(%edi,%ebx,1)
    260 L2x2_4:
    261 
    262 	popl	%esi
    263 	jmp		LDone
    264 
    265 // 3x3
    266 .globl	DP_3x3
    267 DP_3x3:
    268 	pushl	%esi
    269 	movl	C(screenwidth),%ebx
    270 	movl	C(d_zrowbytes),%esi
    271 
    272 	cmpw	%bp,(%edx)
    273 	jg		L3x3_1
    274 	movw	%bp,(%edx)
    275 	movb	%ch,(%edi)
    276 L3x3_1:
    277 	cmpw	%bp,2(%edx)
    278 	jg		L3x3_2
    279 	movw	%bp,2(%edx)
    280 	movb	%ch,1(%edi)
    281 L3x3_2:
    282 	cmpw	%bp,4(%edx)
    283 	jg		L3x3_3
    284 	movw	%bp,4(%edx)
    285 	movb	%ch,2(%edi)
    286 L3x3_3:
    287 
    288 	cmpw	%bp,(%edx,%esi,1)
    289 	jg		L3x3_4
    290 	movw	%bp,(%edx,%esi,1)
    291 	movb	%ch,(%edi,%ebx,1)
    292 L3x3_4:
    293 	cmpw	%bp,2(%edx,%esi,1)
    294 	jg		L3x3_5
    295 	movw	%bp,2(%edx,%esi,1)
    296 	movb	%ch,1(%edi,%ebx,1)
    297 L3x3_5:
    298 	cmpw	%bp,4(%edx,%esi,1)
    299 	jg		L3x3_6
    300 	movw	%bp,4(%edx,%esi,1)
    301 	movb	%ch,2(%edi,%ebx,1)
    302 L3x3_6:
    303 
    304 	cmpw	%bp,(%edx,%esi,2)
    305 	jg		L3x3_7
    306 	movw	%bp,(%edx,%esi,2)
    307 	movb	%ch,(%edi,%ebx,2)
    308 L3x3_7:
    309 	cmpw	%bp,2(%edx,%esi,2)
    310 	jg		L3x3_8
    311 	movw	%bp,2(%edx,%esi,2)
    312 	movb	%ch,1(%edi,%ebx,2)
    313 L3x3_8:
    314 	cmpw	%bp,4(%edx,%esi,2)
    315 	jg		L3x3_9
    316 	movw	%bp,4(%edx,%esi,2)
    317 	movb	%ch,2(%edi,%ebx,2)
    318 L3x3_9:
    319 
    320 	popl	%esi
    321 	jmp		LDone
    322 
    323 
    324 // 4x4
    325 .globl	DP_4x4
    326 DP_4x4:
    327 	pushl	%esi
    328 	movl	C(screenwidth),%ebx
    329 	movl	C(d_zrowbytes),%esi
    330 
    331 	cmpw	%bp,(%edx)
    332 	jg		L4x4_1
    333 	movw	%bp,(%edx)
    334 	movb	%ch,(%edi)
    335 L4x4_1:
    336 	cmpw	%bp,2(%edx)
    337 	jg		L4x4_2
    338 	movw	%bp,2(%edx)
    339 	movb	%ch,1(%edi)
    340 L4x4_2:
    341 	cmpw	%bp,4(%edx)
    342 	jg		L4x4_3
    343 	movw	%bp,4(%edx)
    344 	movb	%ch,2(%edi)
    345 L4x4_3:
    346 	cmpw	%bp,6(%edx)
    347 	jg		L4x4_4
    348 	movw	%bp,6(%edx)
    349 	movb	%ch,3(%edi)
    350 L4x4_4:
    351 
    352 	cmpw	%bp,(%edx,%esi,1)
    353 	jg		L4x4_5
    354 	movw	%bp,(%edx,%esi,1)
    355 	movb	%ch,(%edi,%ebx,1)
    356 L4x4_5:
    357 	cmpw	%bp,2(%edx,%esi,1)
    358 	jg		L4x4_6
    359 	movw	%bp,2(%edx,%esi,1)
    360 	movb	%ch,1(%edi,%ebx,1)
    361 L4x4_6:
    362 	cmpw	%bp,4(%edx,%esi,1)
    363 	jg		L4x4_7
    364 	movw	%bp,4(%edx,%esi,1)
    365 	movb	%ch,2(%edi,%ebx,1)
    366 L4x4_7:
    367 	cmpw	%bp,6(%edx,%esi,1)
    368 	jg		L4x4_8
    369 	movw	%bp,6(%edx,%esi,1)
    370 	movb	%ch,3(%edi,%ebx,1)
    371 L4x4_8:
    372 
    373 	leal	(%edx,%esi,2),%edx
    374 	leal	(%edi,%ebx,2),%edi
    375 
    376 	cmpw	%bp,(%edx)
    377 	jg		L4x4_9
    378 	movw	%bp,(%edx)
    379 	movb	%ch,(%edi)
    380 L4x4_9:
    381 	cmpw	%bp,2(%edx)
    382 	jg		L4x4_10
    383 	movw	%bp,2(%edx)
    384 	movb	%ch,1(%edi)
    385 L4x4_10:
    386 	cmpw	%bp,4(%edx)
    387 	jg		L4x4_11
    388 	movw	%bp,4(%edx)
    389 	movb	%ch,2(%edi)
    390 L4x4_11:
    391 	cmpw	%bp,6(%edx)
    392 	jg		L4x4_12
    393 	movw	%bp,6(%edx)
    394 	movb	%ch,3(%edi)
    395 L4x4_12:
    396 
    397 	cmpw	%bp,(%edx,%esi,1)
    398 	jg		L4x4_13
    399 	movw	%bp,(%edx,%esi,1)
    400 	movb	%ch,(%edi,%ebx,1)
    401 L4x4_13:
    402 	cmpw	%bp,2(%edx,%esi,1)
    403 	jg		L4x4_14
    404 	movw	%bp,2(%edx,%esi,1)
    405 	movb	%ch,1(%edi,%ebx,1)
    406 L4x4_14:
    407 	cmpw	%bp,4(%edx,%esi,1)
    408 	jg		L4x4_15
    409 	movw	%bp,4(%edx,%esi,1)
    410 	movb	%ch,2(%edi,%ebx,1)
    411 L4x4_15:
    412 	cmpw	%bp,6(%edx,%esi,1)
    413 	jg		L4x4_16
    414 	movw	%bp,6(%edx,%esi,1)
    415 	movb	%ch,3(%edi,%ebx,1)
    416 L4x4_16:
    417 
    418 	popl	%esi
    419 	jmp		LDone
    420 
    421 // default case, handling any size particle
    422 LDefault:
    423 
    424 // count = pix << d_y_aspect_shift;
    425 
    426 	movl	%eax,%ebx
    427 	movl	%eax,DP_Pix
    428 	movb	C(d_y_aspect_shift),%cl
    429 	shll	%cl,%ebx
    430 
    431 // for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth)
    432 // {
    433 // 	for (i=0 ; i<pix ; i++)
    434 // 	{
    435 // 		if (pz[i] <= izi)
    436 // 		{
    437 // 			pz[i] = izi;
    438 // 			pdest[i] = color;
    439 // 		}
    440 // 	}
    441 // }
    442 
    443 LGenRowLoop:
    444 	movl	DP_Pix,%eax
    445 
    446 LGenColLoop:
    447 	cmpw	%bp,-2(%edx,%eax,2)
    448 	jg		LGSkip
    449 	movw	%bp,-2(%edx,%eax,2)
    450 	movb	%ch,-1(%edi,%eax,1)
    451 LGSkip:
    452 	decl	%eax			// --pix
    453 	jnz		LGenColLoop
    454 
    455 	addl	C(d_zrowbytes),%edx
    456 	addl	C(screenwidth),%edi
    457 
    458 	decl	%ebx			// --count
    459 	jnz		LGenRowLoop
    460 
    461 LDone:
    462 	popl	%ebx				// restore register variables
    463 	popl	%edi
    464 	popl	%ebp				// restore the caller's stack frame
    465 	ret
    466 
    467 LPop6AndDone:
    468 	fstp	%st(0)
    469 	fstp	%st(0)
    470 	fstp	%st(0)
    471 	fstp	%st(0)
    472 	fstp	%st(0)
    473 LPop1AndDone:
    474 	fstp	%st(0)
    475 	jmp		LDone
    476 
    477 #endif	// id386
    478