1 /* 2 Copyright (C) 1996-1997 Id Software, Inc. 3 4 This program is free software; you can redistribute it and/or 5 modify it under the terms of the GNU General Public License 6 as published by the Free Software Foundation; either version 2 7 of the License, or (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 13 See the GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software 17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 19 */ 20 // 21 // d_parta.s 22 // x86 assembly-language 8-bpp particle-drawing code. 23 // 24 25 #include "asm_i386.h" 26 #include "quakeasm.h" 27 #include "d_ifacea.h" 28 #include "asm_draw.h" 29 30 #if id386 31 32 //---------------------------------------------------------------------- 33 // 8-bpp particle drawing code. 34 //---------------------------------------------------------------------- 35 36 //FIXME: comments, full optimization 37 38 //---------------------------------------------------------------------- 39 // 8-bpp particle queueing code. 40 //---------------------------------------------------------------------- 41 42 .text 43 44 #define P 12+4 45 46 .align 4 47 .globl C(D_DrawParticle) 48 C(D_DrawParticle): 49 pushl %ebp // preserve caller's stack frame 50 pushl %edi // preserve register variables 51 pushl %ebx 52 53 movl P(%esp),%edi 54 55 // FIXME: better FP overlap in general here 56 57 // transform point 58 // VectorSubtract (p->org, r_origin, local); 59 flds C(r_origin) 60 fsubrs pt_org(%edi) 61 flds pt_org+4(%edi) 62 fsubs C(r_origin)+4 63 flds pt_org+8(%edi) 64 fsubs C(r_origin)+8 65 fxch %st(2) // local[0] | local[1] | local[2] 66 67 // transformed[2] = DotProduct(local, r_ppn); 68 flds C(r_ppn) // r_ppn[0] | local[0] | local[1] | local[2] 69 fmul %st(1),%st(0) // dot0 | local[0] | local[1] | local[2] 70 flds C(r_ppn)+4 // r_ppn[1] | dot0 | local[0] | local[1] | local[2] 71 fmul %st(3),%st(0) // dot1 | dot0 | local[0] | local[1] | local[2] 72 flds C(r_ppn)+8 // r_ppn[2] | dot1 | dot0 | local[0] | 73 // local[1] | local[2] 74 fmul %st(5),%st(0) // dot2 | dot1 | dot0 | local[0] | local[1] | local[2] 75 fxch %st(2) // dot0 | dot1 | dot2 | local[0] | local[1] | local[2] 76 faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[0] | local[1] | 77 // local[2] 78 faddp %st(0),%st(1) // z | local[0] | local[1] | local[2] 79 fld %st(0) // z | z | local[0] | local[1] | 80 // local[2] 81 fdivrs float_1 // 1/z | z | local[0] | local[1] | local[2] 82 fxch %st(1) // z | 1/z | local[0] | local[1] | local[2] 83 84 // if (transformed[2] < PARTICLE_Z_CLIP) 85 // return; 86 fcomps float_particle_z_clip // 1/z | local[0] | local[1] | local[2] 87 fxch %st(3) // local[2] | local[0] | local[1] | 1/z 88 89 flds C(r_pup) // r_pup[0] | local[2] | local[0] | local[1] | 1/z 90 fmul %st(2),%st(0) // dot0 | local[2] | local[0] | local[1] | 1/z 91 flds C(r_pup)+4 // r_pup[1] | dot0 | local[2] | local[0] | 92 // local[1] | 1/z 93 94 fnstsw %ax 95 testb $1,%ah 96 jnz LPop6AndDone 97 98 // transformed[1] = DotProduct(local, r_pup); 99 fmul %st(4),%st(0) // dot1 | dot0 | local[2] | local[0] | local[1] | 1/z 100 flds C(r_pup)+8 // r_pup[2] | dot1 | dot0 | local[2] | 101 // local[0] | local[1] | 1/z 102 fmul %st(3),%st(0) // dot2 | dot1 | dot0 | local[2] | local[0] | 103 // local[1] | 1/z 104 fxch %st(2) // dot0 | dot1 | dot2 | local[2] | local[0] | 105 // local[1] | 1/z 106 faddp %st(0),%st(1) // dot0 + dot1 | dot2 | local[2] | local[0] | 107 // local[1] | 1/z 108 faddp %st(0),%st(1) // y | local[2] | local[0] | local[1] | 1/z 109 fxch %st(3) // local[1] | local[2] | local[0] | y | 1/z 110 111 // transformed[0] = DotProduct(local, r_pright); 112 fmuls C(r_pright)+4 // dot1 | local[2] | local[0] | y | 1/z 113 fxch %st(2) // local[0] | local[2] | dot1 | y | 1/z 114 fmuls C(r_pright) // dot0 | local[2] | dot1 | y | 1/z 115 fxch %st(1) // local[2] | dot0 | dot1 | y | 1/z 116 fmuls C(r_pright)+8 // dot2 | dot0 | dot1 | y | 1/z 117 fxch %st(2) // dot1 | dot0 | dot2 | y | 1/z 118 faddp %st(0),%st(1) // dot1 + dot0 | dot2 | y | 1/z 119 120 faddp %st(0),%st(1) // x | y | 1/z 121 fxch %st(1) // y | x | 1/z 122 123 // project the point 124 fmul %st(2),%st(0) // y/z | x | 1/z 125 fxch %st(1) // x | y/z | 1/z 126 fmul %st(2),%st(0) // x/z | y/z | 1/z 127 fxch %st(1) // y/z | x/z | 1/z 128 fsubrs C(ycenter) // v | x/z | 1/z 129 fxch %st(1) // x/z | v | 1/z 130 fadds C(xcenter) // u | v | 1/z 131 // FIXME: preadjust xcenter and ycenter 132 fxch %st(1) // v | u | 1/z 133 fadds float_point5 // v | u | 1/z 134 fxch %st(1) // u | v | 1/z 135 fadds float_point5 // u | v | 1/z 136 fxch %st(2) // 1/z | v | u 137 fmuls DP_32768 // 1/z * 0x8000 | v | u 138 fxch %st(2) // u | v | 1/z * 0x8000 139 140 // FIXME: use Terje's fp->int trick here? 141 // FIXME: check we're getting proper rounding here 142 fistpl DP_u // v | 1/z * 0x8000 143 fistpl DP_v // 1/z * 0x8000 144 145 movl DP_u,%eax 146 movl DP_v,%edx 147 148 // if ((v > d_vrectbottom_particle) || 149 // (u > d_vrectright_particle) || 150 // (v < d_vrecty) || 151 // (u < d_vrectx)) 152 // { 153 // continue; 154 // } 155 156 movl C(d_vrectbottom_particle),%ebx 157 movl C(d_vrectright_particle),%ecx 158 cmpl %ebx,%edx 159 jg LPop1AndDone 160 cmpl %ecx,%eax 161 jg LPop1AndDone 162 movl C(d_vrecty),%ebx 163 movl C(d_vrectx),%ecx 164 cmpl %ebx,%edx 165 jl LPop1AndDone 166 167 cmpl %ecx,%eax 168 jl LPop1AndDone 169 170 flds pt_color(%edi) // color | 1/z * 0x8000 171 // FIXME: use Terje's fast fp->int trick? 172 fistpl DP_Color // 1/z * 0x8000 173 174 movl C(d_viewbuffer),%ebx 175 176 addl %eax,%ebx 177 movl C(d_scantable)(,%edx,4),%edi // point to the pixel 178 179 imull C(d_zrowbytes),%edx // point to the z pixel 180 181 leal (%edx,%eax,2),%edx 182 movl C(d_pzbuffer),%eax 183 184 fistpl izi 185 186 addl %ebx,%edi 187 addl %eax,%edx 188 189 // pix = izi >> d_pix_shift; 190 191 movl izi,%eax 192 movl C(d_pix_shift),%ecx 193 shrl %cl,%eax 194 movl izi,%ebp 195 196 // if (pix < d_pix_min) 197 // pix = d_pix_min; 198 // else if (pix > d_pix_max) 199 // pix = d_pix_max; 200 201 movl C(d_pix_min),%ebx 202 movl C(d_pix_max),%ecx 203 cmpl %ebx,%eax 204 jnl LTestPixMax 205 movl %ebx,%eax 206 jmp LTestDone 207 208 LTestPixMax: 209 cmpl %ecx,%eax 210 jng LTestDone 211 movl %ecx,%eax 212 LTestDone: 213 214 movb DP_Color,%ch 215 216 movl C(d_y_aspect_shift),%ebx 217 testl %ebx,%ebx 218 jnz LDefault 219 220 cmpl $4,%eax 221 ja LDefault 222 223 jmp DP_EntryTable-4(,%eax,4) 224 225 // 1x1 226 .globl DP_1x1 227 DP_1x1: 228 cmpw %bp,(%edx) // just one pixel to do 229 jg LDone 230 movw %bp,(%edx) 231 movb %ch,(%edi) 232 jmp LDone 233 234 // 2x2 235 .globl DP_2x2 236 DP_2x2: 237 pushl %esi 238 movl C(screenwidth),%ebx 239 movl C(d_zrowbytes),%esi 240 241 cmpw %bp,(%edx) 242 jg L2x2_1 243 movw %bp,(%edx) 244 movb %ch,(%edi) 245 L2x2_1: 246 cmpw %bp,2(%edx) 247 jg L2x2_2 248 movw %bp,2(%edx) 249 movb %ch,1(%edi) 250 L2x2_2: 251 cmpw %bp,(%edx,%esi,1) 252 jg L2x2_3 253 movw %bp,(%edx,%esi,1) 254 movb %ch,(%edi,%ebx,1) 255 L2x2_3: 256 cmpw %bp,2(%edx,%esi,1) 257 jg L2x2_4 258 movw %bp,2(%edx,%esi,1) 259 movb %ch,1(%edi,%ebx,1) 260 L2x2_4: 261 262 popl %esi 263 jmp LDone 264 265 // 3x3 266 .globl DP_3x3 267 DP_3x3: 268 pushl %esi 269 movl C(screenwidth),%ebx 270 movl C(d_zrowbytes),%esi 271 272 cmpw %bp,(%edx) 273 jg L3x3_1 274 movw %bp,(%edx) 275 movb %ch,(%edi) 276 L3x3_1: 277 cmpw %bp,2(%edx) 278 jg L3x3_2 279 movw %bp,2(%edx) 280 movb %ch,1(%edi) 281 L3x3_2: 282 cmpw %bp,4(%edx) 283 jg L3x3_3 284 movw %bp,4(%edx) 285 movb %ch,2(%edi) 286 L3x3_3: 287 288 cmpw %bp,(%edx,%esi,1) 289 jg L3x3_4 290 movw %bp,(%edx,%esi,1) 291 movb %ch,(%edi,%ebx,1) 292 L3x3_4: 293 cmpw %bp,2(%edx,%esi,1) 294 jg L3x3_5 295 movw %bp,2(%edx,%esi,1) 296 movb %ch,1(%edi,%ebx,1) 297 L3x3_5: 298 cmpw %bp,4(%edx,%esi,1) 299 jg L3x3_6 300 movw %bp,4(%edx,%esi,1) 301 movb %ch,2(%edi,%ebx,1) 302 L3x3_6: 303 304 cmpw %bp,(%edx,%esi,2) 305 jg L3x3_7 306 movw %bp,(%edx,%esi,2) 307 movb %ch,(%edi,%ebx,2) 308 L3x3_7: 309 cmpw %bp,2(%edx,%esi,2) 310 jg L3x3_8 311 movw %bp,2(%edx,%esi,2) 312 movb %ch,1(%edi,%ebx,2) 313 L3x3_8: 314 cmpw %bp,4(%edx,%esi,2) 315 jg L3x3_9 316 movw %bp,4(%edx,%esi,2) 317 movb %ch,2(%edi,%ebx,2) 318 L3x3_9: 319 320 popl %esi 321 jmp LDone 322 323 324 // 4x4 325 .globl DP_4x4 326 DP_4x4: 327 pushl %esi 328 movl C(screenwidth),%ebx 329 movl C(d_zrowbytes),%esi 330 331 cmpw %bp,(%edx) 332 jg L4x4_1 333 movw %bp,(%edx) 334 movb %ch,(%edi) 335 L4x4_1: 336 cmpw %bp,2(%edx) 337 jg L4x4_2 338 movw %bp,2(%edx) 339 movb %ch,1(%edi) 340 L4x4_2: 341 cmpw %bp,4(%edx) 342 jg L4x4_3 343 movw %bp,4(%edx) 344 movb %ch,2(%edi) 345 L4x4_3: 346 cmpw %bp,6(%edx) 347 jg L4x4_4 348 movw %bp,6(%edx) 349 movb %ch,3(%edi) 350 L4x4_4: 351 352 cmpw %bp,(%edx,%esi,1) 353 jg L4x4_5 354 movw %bp,(%edx,%esi,1) 355 movb %ch,(%edi,%ebx,1) 356 L4x4_5: 357 cmpw %bp,2(%edx,%esi,1) 358 jg L4x4_6 359 movw %bp,2(%edx,%esi,1) 360 movb %ch,1(%edi,%ebx,1) 361 L4x4_6: 362 cmpw %bp,4(%edx,%esi,1) 363 jg L4x4_7 364 movw %bp,4(%edx,%esi,1) 365 movb %ch,2(%edi,%ebx,1) 366 L4x4_7: 367 cmpw %bp,6(%edx,%esi,1) 368 jg L4x4_8 369 movw %bp,6(%edx,%esi,1) 370 movb %ch,3(%edi,%ebx,1) 371 L4x4_8: 372 373 leal (%edx,%esi,2),%edx 374 leal (%edi,%ebx,2),%edi 375 376 cmpw %bp,(%edx) 377 jg L4x4_9 378 movw %bp,(%edx) 379 movb %ch,(%edi) 380 L4x4_9: 381 cmpw %bp,2(%edx) 382 jg L4x4_10 383 movw %bp,2(%edx) 384 movb %ch,1(%edi) 385 L4x4_10: 386 cmpw %bp,4(%edx) 387 jg L4x4_11 388 movw %bp,4(%edx) 389 movb %ch,2(%edi) 390 L4x4_11: 391 cmpw %bp,6(%edx) 392 jg L4x4_12 393 movw %bp,6(%edx) 394 movb %ch,3(%edi) 395 L4x4_12: 396 397 cmpw %bp,(%edx,%esi,1) 398 jg L4x4_13 399 movw %bp,(%edx,%esi,1) 400 movb %ch,(%edi,%ebx,1) 401 L4x4_13: 402 cmpw %bp,2(%edx,%esi,1) 403 jg L4x4_14 404 movw %bp,2(%edx,%esi,1) 405 movb %ch,1(%edi,%ebx,1) 406 L4x4_14: 407 cmpw %bp,4(%edx,%esi,1) 408 jg L4x4_15 409 movw %bp,4(%edx,%esi,1) 410 movb %ch,2(%edi,%ebx,1) 411 L4x4_15: 412 cmpw %bp,6(%edx,%esi,1) 413 jg L4x4_16 414 movw %bp,6(%edx,%esi,1) 415 movb %ch,3(%edi,%ebx,1) 416 L4x4_16: 417 418 popl %esi 419 jmp LDone 420 421 // default case, handling any size particle 422 LDefault: 423 424 // count = pix << d_y_aspect_shift; 425 426 movl %eax,%ebx 427 movl %eax,DP_Pix 428 movb C(d_y_aspect_shift),%cl 429 shll %cl,%ebx 430 431 // for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth) 432 // { 433 // for (i=0 ; i<pix ; i++) 434 // { 435 // if (pz[i] <= izi) 436 // { 437 // pz[i] = izi; 438 // pdest[i] = color; 439 // } 440 // } 441 // } 442 443 LGenRowLoop: 444 movl DP_Pix,%eax 445 446 LGenColLoop: 447 cmpw %bp,-2(%edx,%eax,2) 448 jg LGSkip 449 movw %bp,-2(%edx,%eax,2) 450 movb %ch,-1(%edi,%eax,1) 451 LGSkip: 452 decl %eax // --pix 453 jnz LGenColLoop 454 455 addl C(d_zrowbytes),%edx 456 addl C(screenwidth),%edi 457 458 decl %ebx // --count 459 jnz LGenRowLoop 460 461 LDone: 462 popl %ebx // restore register variables 463 popl %edi 464 popl %ebp // restore the caller's stack frame 465 ret 466 467 LPop6AndDone: 468 fstp %st(0) 469 fstp %st(0) 470 fstp %st(0) 471 fstp %st(0) 472 fstp %st(0) 473 LPop1AndDone: 474 fstp %st(0) 475 jmp LDone 476 477 #endif // id386 478