1 /* 2 Copyright (C) 1996-1997 Id Software, Inc. 3 4 This program is free software; you can redistribute it and/or 5 modify it under the terms of the GNU General Public License 6 as published by the Free Software Foundation; either version 2 7 of the License, or (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 13 See the GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software 17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 19 */ 20 // 21 // d_draw16.s 22 // x86 assembly-language horizontal 8-bpp span-drawing code, with 16-pixel 23 // subdivision. 24 // 25 26 #include "asm_i386.h" 27 #include "quakeasm.h" 28 #include "asm_draw.h" 29 #include "d_ifacea.h" 30 31 #if id386 32 33 //---------------------------------------------------------------------- 34 // 8-bpp horizontal span drawing code for polygons, with no transparency and 35 // 16-pixel subdivision. 36 // 37 // Assumes there is at least one span in pspans, and that every span 38 // contains at least one pixel 39 //---------------------------------------------------------------------- 40 41 .data 42 43 .text 44 45 // out-of-line, rarely-needed clamping code 46 47 LClampHigh0: 48 movl C(bbextents),%esi 49 jmp LClampReentry0 50 LClampHighOrLow0: 51 jg LClampHigh0 52 xorl %esi,%esi 53 jmp LClampReentry0 54 55 LClampHigh1: 56 movl C(bbextentt),%edx 57 jmp LClampReentry1 58 LClampHighOrLow1: 59 jg LClampHigh1 60 xorl %edx,%edx 61 jmp LClampReentry1 62 63 LClampLow2: 64 movl $4096,%ebp 65 jmp LClampReentry2 66 LClampHigh2: 67 movl C(bbextents),%ebp 68 jmp LClampReentry2 69 70 LClampLow3: 71 movl $4096,%ecx 72 jmp LClampReentry3 73 LClampHigh3: 74 movl C(bbextentt),%ecx 75 jmp LClampReentry3 76 77 LClampLow4: 78 movl $4096,%eax 79 jmp LClampReentry4 80 LClampHigh4: 81 movl C(bbextents),%eax 82 jmp LClampReentry4 83 84 LClampLow5: 85 movl $4096,%ebx 86 jmp LClampReentry5 87 LClampHigh5: 88 movl C(bbextentt),%ebx 89 jmp LClampReentry5 90 91 92 #define pspans 4+16 93 94 .align 4 95 .globl C(D_DrawSpans16) 96 C(D_DrawSpans16): 97 pushl %ebp // preserve caller's stack frame 98 pushl %edi 99 pushl %esi // preserve register variables 100 pushl %ebx 101 102 // 103 // set up scaled-by-16 steps, for 16-long segments; also set up cacheblock 104 // and span list pointers 105 // 106 // TODO: any overlap from rearranging? 107 flds C(d_sdivzstepu) 108 fmuls fp_16 109 movl C(cacheblock),%edx 110 flds C(d_tdivzstepu) 111 fmuls fp_16 112 movl pspans(%esp),%ebx // point to the first span descriptor 113 flds C(d_zistepu) 114 fmuls fp_16 115 movl %edx,pbase // pbase = cacheblock 116 fstps zi16stepu 117 fstps tdivz16stepu 118 fstps sdivz16stepu 119 120 LSpanLoop: 121 // 122 // set up the initial s/z, t/z, and 1/z on the FP stack, and generate the 123 // initial s and t values 124 // 125 // FIXME: pipeline FILD? 126 fildl espan_t_v(%ebx) 127 fildl espan_t_u(%ebx) 128 129 fld %st(1) // dv | du | dv 130 fmuls C(d_sdivzstepv) // dv*d_sdivzstepv | du | dv 131 fld %st(1) // du | dv*d_sdivzstepv | du | dv 132 fmuls C(d_sdivzstepu) // du*d_sdivzstepu | dv*d_sdivzstepv | du | dv 133 fld %st(2) // du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv 134 fmuls C(d_tdivzstepu) // du*d_tdivzstepu | du*d_sdivzstepu | 135 // dv*d_sdivzstepv | du | dv 136 fxch %st(1) // du*d_sdivzstepu | du*d_tdivzstepu | 137 // dv*d_sdivzstepv | du | dv 138 faddp %st(0),%st(2) // du*d_tdivzstepu | 139 // du*d_sdivzstepu + dv*d_sdivzstepv | du | dv 140 fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv | 141 // du*d_tdivzstepu | du | dv 142 fld %st(3) // dv | du*d_sdivzstepu + dv*d_sdivzstepv | 143 // du*d_tdivzstepu | du | dv 144 fmuls C(d_tdivzstepv) // dv*d_tdivzstepv | 145 // du*d_sdivzstepu + dv*d_sdivzstepv | 146 // du*d_tdivzstepu | du | dv 147 fxch %st(1) // du*d_sdivzstepu + dv*d_sdivzstepv | 148 // dv*d_tdivzstepv | du*d_tdivzstepu | du | dv 149 fadds C(d_sdivzorigin) // sdivz = d_sdivzorigin + dv*d_sdivzstepv + 150 // du*d_sdivzstepu; stays in %st(2) at end 151 fxch %st(4) // dv | dv*d_tdivzstepv | du*d_tdivzstepu | du | 152 // s/z 153 fmuls C(d_zistepv) // dv*d_zistepv | dv*d_tdivzstepv | 154 // du*d_tdivzstepu | du | s/z 155 fxch %st(1) // dv*d_tdivzstepv | dv*d_zistepv | 156 // du*d_tdivzstepu | du | s/z 157 faddp %st(0),%st(2) // dv*d_zistepv | 158 // dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z 159 fxch %st(2) // du | dv*d_tdivzstepv + du*d_tdivzstepu | 160 // dv*d_zistepv | s/z 161 fmuls C(d_zistepu) // du*d_zistepu | 162 // dv*d_tdivzstepv + du*d_tdivzstepu | 163 // dv*d_zistepv | s/z 164 fxch %st(1) // dv*d_tdivzstepv + du*d_tdivzstepu | 165 // du*d_zistepu | dv*d_zistepv | s/z 166 fadds C(d_tdivzorigin) // tdivz = d_tdivzorigin + dv*d_tdivzstepv + 167 // du*d_tdivzstepu; stays in %st(1) at end 168 fxch %st(2) // dv*d_zistepv | du*d_zistepu | t/z | s/z 169 faddp %st(0),%st(1) // dv*d_zistepv + du*d_zistepu | t/z | s/z 170 171 flds fp_64k // fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z 172 fxch %st(1) // dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z 173 fadds C(d_ziorigin) // zi = d_ziorigin + dv*d_zistepv + 174 // du*d_zistepu; stays in %st(0) at end 175 // 1/z | fp_64k | t/z | s/z 176 // 177 // calculate and clamp s & t 178 // 179 fdivr %st(0),%st(1) // 1/z | z*64k | t/z | s/z 180 181 // 182 // point %edi to the first pixel in the span 183 // 184 movl C(d_viewbuffer),%ecx 185 movl espan_t_v(%ebx),%eax 186 movl %ebx,pspantemp // preserve spans pointer 187 188 movl C(tadjust),%edx 189 movl C(sadjust),%esi 190 movl C(d_scantable)(,%eax,4),%edi // v * screenwidth 191 addl %ecx,%edi 192 movl espan_t_u(%ebx),%ecx 193 addl %ecx,%edi // pdest = &pdestspan[scans->u]; 194 movl espan_t_count(%ebx),%ecx 195 196 // 197 // now start the FDIV for the end of the span 198 // 199 cmpl $16,%ecx 200 ja LSetupNotLast1 201 202 decl %ecx 203 jz LCleanup1 // if only one pixel, no need to start an FDIV 204 movl %ecx,spancountminus1 205 206 // finish up the s and t calcs 207 fxch %st(1) // z*64k | 1/z | t/z | s/z 208 209 fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z 210 fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z 211 fxch %st(1) // z*64k | s | 1/z | t/z | s/z 212 fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z 213 fxch %st(1) // s | t | 1/z | t/z | s/z 214 fistpl s // 1/z | t | t/z | s/z 215 fistpl t // 1/z | t/z | s/z 216 217 fildl spancountminus1 218 219 flds C(d_tdivzstepu) // C(d_tdivzstepu) | spancountminus1 220 flds C(d_zistepu) // C(d_zistepu) | C(d_tdivzstepu) | spancountminus1 221 fmul %st(2),%st(0) // C(d_zistepu)*scm1 | C(d_tdivzstepu) | scm1 222 fxch %st(1) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1 223 fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1 224 fxch %st(2) // scm1 | C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 225 fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_zistepu)*scm1 | 226 // C(d_tdivzstepu)*scm1 227 fxch %st(1) // C(d_zistepu)*scm1 | C(d_sdivzstepu)*scm1 | 228 // C(d_tdivzstepu)*scm1 229 faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1 230 fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1 231 faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 232 faddp %st(0),%st(3) 233 234 flds fp_64k 235 fdiv %st(1),%st(0) // this is what we've gone to all this trouble to 236 // overlap 237 jmp LFDIVInFlight1 238 239 LCleanup1: 240 // finish up the s and t calcs 241 fxch %st(1) // z*64k | 1/z | t/z | s/z 242 243 fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z 244 fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z 245 fxch %st(1) // z*64k | s | 1/z | t/z | s/z 246 fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z 247 fxch %st(1) // s | t | 1/z | t/z | s/z 248 fistpl s // 1/z | t | t/z | s/z 249 fistpl t // 1/z | t/z | s/z 250 jmp LFDIVInFlight1 251 252 .align 4 253 LSetupNotLast1: 254 // finish up the s and t calcs 255 fxch %st(1) // z*64k | 1/z | t/z | s/z 256 257 fld %st(0) // z*64k | z*64k | 1/z | t/z | s/z 258 fmul %st(4),%st(0) // s | z*64k | 1/z | t/z | s/z 259 fxch %st(1) // z*64k | s | 1/z | t/z | s/z 260 fmul %st(3),%st(0) // t | s | 1/z | t/z | s/z 261 fxch %st(1) // s | t | 1/z | t/z | s/z 262 fistpl s // 1/z | t | t/z | s/z 263 fistpl t // 1/z | t/z | s/z 264 265 fadds zi16stepu 266 fxch %st(2) 267 fadds sdivz16stepu 268 fxch %st(2) 269 flds tdivz16stepu 270 faddp %st(0),%st(2) 271 flds fp_64k 272 fdiv %st(1),%st(0) // z = 1/1/z 273 // this is what we've gone to all this trouble to 274 // overlap 275 LFDIVInFlight1: 276 277 addl s,%esi 278 addl t,%edx 279 movl C(bbextents),%ebx 280 movl C(bbextentt),%ebp 281 cmpl %ebx,%esi 282 ja LClampHighOrLow0 283 LClampReentry0: 284 movl %esi,s 285 movl pbase,%ebx 286 shll $16,%esi 287 cmpl %ebp,%edx 288 movl %esi,sfracf 289 ja LClampHighOrLow1 290 LClampReentry1: 291 movl %edx,t 292 movl s,%esi // sfrac = scans->sfrac; 293 shll $16,%edx 294 movl t,%eax // tfrac = scans->tfrac; 295 sarl $16,%esi 296 movl %edx,tfracf 297 298 // 299 // calculate the texture starting address 300 // 301 sarl $16,%eax 302 movl C(cachewidth),%edx 303 imull %edx,%eax // (tfrac >> 16) * cachewidth 304 addl %ebx,%esi 305 addl %eax,%esi // psource = pbase + (sfrac >> 16) + 306 // ((tfrac >> 16) * cachewidth); 307 // 308 // determine whether last span or not 309 // 310 cmpl $16,%ecx 311 jna LLastSegment 312 313 // 314 // not the last segment; do full 16-wide segment 315 // 316 LNotLastSegment: 317 318 // 319 // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to 320 // get there 321 // 322 323 // pick up after the FDIV that was left in flight previously 324 325 fld %st(0) // duplicate it 326 fmul %st(4),%st(0) // s = s/z * z 327 fxch %st(1) 328 fmul %st(3),%st(0) // t = t/z * z 329 fxch %st(1) 330 fistpl snext 331 fistpl tnext 332 movl snext,%eax 333 movl tnext,%edx 334 335 movb (%esi),%bl // get first source texel 336 subl $16,%ecx // count off this segments' pixels 337 movl C(sadjust),%ebp 338 movl %ecx,counttemp // remember count of remaining pixels 339 340 movl C(tadjust),%ecx 341 movb %bl,(%edi) // store first dest pixel 342 343 addl %eax,%ebp 344 addl %edx,%ecx 345 346 movl C(bbextents),%eax 347 movl C(bbextentt),%edx 348 349 cmpl $4096,%ebp 350 jl LClampLow2 351 cmpl %eax,%ebp 352 ja LClampHigh2 353 LClampReentry2: 354 355 cmpl $4096,%ecx 356 jl LClampLow3 357 cmpl %edx,%ecx 358 ja LClampHigh3 359 LClampReentry3: 360 361 movl %ebp,snext 362 movl %ecx,tnext 363 364 subl s,%ebp 365 subl t,%ecx 366 367 // 368 // set up advancetable 369 // 370 movl %ecx,%eax 371 movl %ebp,%edx 372 sarl $20,%eax // tstep >>= 16; 373 jz LZero 374 sarl $20,%edx // sstep >>= 16; 375 movl C(cachewidth),%ebx 376 imull %ebx,%eax 377 jmp LSetUp1 378 379 LZero: 380 sarl $20,%edx // sstep >>= 16; 381 movl C(cachewidth),%ebx 382 383 LSetUp1: 384 385 addl %edx,%eax // add in sstep 386 // (tstep >> 16) * cachewidth + (sstep >> 16); 387 movl tfracf,%edx 388 movl %eax,advancetable+4 // advance base in t 389 addl %ebx,%eax // ((tstep >> 16) + 1) * cachewidth + 390 // (sstep >> 16); 391 shll $12,%ebp // left-justify sstep fractional part 392 movl sfracf,%ebx 393 shll $12,%ecx // left-justify tstep fractional part 394 movl %eax,advancetable // advance extra in t 395 396 movl %ecx,tstep 397 addl %ecx,%edx // advance tfrac fractional part by tstep frac 398 399 sbbl %ecx,%ecx // turn tstep carry into -1 (0 if none) 400 addl %ebp,%ebx // advance sfrac fractional part by sstep frac 401 adcl advancetable+4(,%ecx,4),%esi // point to next source texel 402 403 addl tstep,%edx 404 sbbl %ecx,%ecx 405 movb (%esi),%al 406 addl %ebp,%ebx 407 movb %al,1(%edi) 408 adcl advancetable+4(,%ecx,4),%esi 409 410 addl tstep,%edx 411 sbbl %ecx,%ecx 412 addl %ebp,%ebx 413 movb (%esi),%al 414 adcl advancetable+4(,%ecx,4),%esi 415 416 addl tstep,%edx 417 sbbl %ecx,%ecx 418 movb %al,2(%edi) 419 addl %ebp,%ebx 420 movb (%esi),%al 421 adcl advancetable+4(,%ecx,4),%esi 422 423 addl tstep,%edx 424 sbbl %ecx,%ecx 425 movb %al,3(%edi) 426 addl %ebp,%ebx 427 movb (%esi),%al 428 adcl advancetable+4(,%ecx,4),%esi 429 430 addl tstep,%edx 431 sbbl %ecx,%ecx 432 movb %al,4(%edi) 433 addl %ebp,%ebx 434 movb (%esi),%al 435 adcl advancetable+4(,%ecx,4),%esi 436 437 addl tstep,%edx 438 sbbl %ecx,%ecx 439 movb %al,5(%edi) 440 addl %ebp,%ebx 441 movb (%esi),%al 442 adcl advancetable+4(,%ecx,4),%esi 443 444 addl tstep,%edx 445 sbbl %ecx,%ecx 446 movb %al,6(%edi) 447 addl %ebp,%ebx 448 movb (%esi),%al 449 adcl advancetable+4(,%ecx,4),%esi 450 451 addl tstep,%edx 452 sbbl %ecx,%ecx 453 movb %al,7(%edi) 454 addl %ebp,%ebx 455 movb (%esi),%al 456 adcl advancetable+4(,%ecx,4),%esi 457 458 459 // 460 // start FDIV for end of next segment in flight, so it can overlap 461 // 462 movl counttemp,%ecx 463 cmpl $16,%ecx // more than one segment after this? 464 ja LSetupNotLast2 // yes 465 466 decl %ecx 467 jz LFDIVInFlight2 // if only one pixel, no need to start an FDIV 468 movl %ecx,spancountminus1 469 fildl spancountminus1 470 471 flds C(d_zistepu) // C(d_zistepu) | spancountminus1 472 fmul %st(1),%st(0) // C(d_zistepu)*scm1 | scm1 473 flds C(d_tdivzstepu) // C(d_tdivzstepu) | C(d_zistepu)*scm1 | scm1 474 fmul %st(2),%st(0) // C(d_tdivzstepu)*scm1 | C(d_zistepu)*scm1 | scm1 475 fxch %st(1) // C(d_zistepu)*scm1 | C(d_tdivzstepu)*scm1 | scm1 476 faddp %st(0),%st(3) // C(d_tdivzstepu)*scm1 | scm1 477 fxch %st(1) // scm1 | C(d_tdivzstepu)*scm1 478 fmuls C(d_sdivzstepu) // C(d_sdivzstepu)*scm1 | C(d_tdivzstepu)*scm1 479 fxch %st(1) // C(d_tdivzstepu)*scm1 | C(d_sdivzstepu)*scm1 480 faddp %st(0),%st(3) // C(d_sdivzstepu)*scm1 481 flds fp_64k // 64k | C(d_sdivzstepu)*scm1 482 fxch %st(1) // C(d_sdivzstepu)*scm1 | 64k 483 faddp %st(0),%st(4) // 64k 484 485 fdiv %st(1),%st(0) // this is what we've gone to all this trouble to 486 // overlap 487 jmp LFDIVInFlight2 488 489 .align 4 490 LSetupNotLast2: 491 fadds zi16stepu 492 fxch %st(2) 493 fadds sdivz16stepu 494 fxch %st(2) 495 flds tdivz16stepu 496 faddp %st(0),%st(2) 497 flds fp_64k 498 fdiv %st(1),%st(0) // z = 1/1/z 499 // this is what we've gone to all this trouble to 500 // overlap 501 LFDIVInFlight2: 502 movl %ecx,counttemp 503 504 addl tstep,%edx 505 sbbl %ecx,%ecx 506 movb %al,8(%edi) 507 addl %ebp,%ebx 508 movb (%esi),%al 509 adcl advancetable+4(,%ecx,4),%esi 510 511 addl tstep,%edx 512 sbbl %ecx,%ecx 513 movb %al,9(%edi) 514 addl %ebp,%ebx 515 movb (%esi),%al 516 adcl advancetable+4(,%ecx,4),%esi 517 518 addl tstep,%edx 519 sbbl %ecx,%ecx 520 movb %al,10(%edi) 521 addl %ebp,%ebx 522 movb (%esi),%al 523 adcl advancetable+4(,%ecx,4),%esi 524 525 addl tstep,%edx 526 sbbl %ecx,%ecx 527 movb %al,11(%edi) 528 addl %ebp,%ebx 529 movb (%esi),%al 530 adcl advancetable+4(,%ecx,4),%esi 531 532 addl tstep,%edx 533 sbbl %ecx,%ecx 534 movb %al,12(%edi) 535 addl %ebp,%ebx 536 movb (%esi),%al 537 adcl advancetable+4(,%ecx,4),%esi 538 539 addl tstep,%edx 540 sbbl %ecx,%ecx 541 movb %al,13(%edi) 542 addl %ebp,%ebx 543 movb (%esi),%al 544 adcl advancetable+4(,%ecx,4),%esi 545 546 addl tstep,%edx 547 sbbl %ecx,%ecx 548 movb %al,14(%edi) 549 addl %ebp,%ebx 550 movb (%esi),%al 551 adcl advancetable+4(,%ecx,4),%esi 552 553 addl $16,%edi 554 movl %edx,tfracf 555 movl snext,%edx 556 movl %ebx,sfracf 557 movl tnext,%ebx 558 movl %edx,s 559 movl %ebx,t 560 561 movl counttemp,%ecx // retrieve count 562 563 // 564 // determine whether last span or not 565 // 566 cmpl $16,%ecx // are there multiple segments remaining? 567 movb %al,-1(%edi) 568 ja LNotLastSegment // yes 569 570 // 571 // last segment of scan 572 // 573 LLastSegment: 574 575 // 576 // advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to 577 // get there. The number of pixels left is variable, and we want to land on the 578 // last pixel, not step one past it, so we can't run into arithmetic problems 579 // 580 testl %ecx,%ecx 581 jz LNoSteps // just draw the last pixel and we're done 582 583 // pick up after the FDIV that was left in flight previously 584 585 586 fld %st(0) // duplicate it 587 fmul %st(4),%st(0) // s = s/z * z 588 fxch %st(1) 589 fmul %st(3),%st(0) // t = t/z * z 590 fxch %st(1) 591 fistpl snext 592 fistpl tnext 593 594 movb (%esi),%al // load first texel in segment 595 movl C(tadjust),%ebx 596 movb %al,(%edi) // store first pixel in segment 597 movl C(sadjust),%eax 598 599 addl snext,%eax 600 addl tnext,%ebx 601 602 movl C(bbextents),%ebp 603 movl C(bbextentt),%edx 604 605 cmpl $4096,%eax 606 jl LClampLow4 607 cmpl %ebp,%eax 608 ja LClampHigh4 609 LClampReentry4: 610 movl %eax,snext 611 612 cmpl $4096,%ebx 613 jl LClampLow5 614 cmpl %edx,%ebx 615 ja LClampHigh5 616 LClampReentry5: 617 618 cmpl $1,%ecx // don't bother 619 je LOnlyOneStep // if two pixels in segment, there's only one step, 620 // of the segment length 621 subl s,%eax 622 subl t,%ebx 623 624 addl %eax,%eax // convert to 15.17 format so multiply by 1.31 625 addl %ebx,%ebx // reciprocal yields 16.48 626 627 imull reciprocal_table_16-8(,%ecx,4) // sstep = (snext - s) / 628 // (spancount-1) 629 movl %edx,%ebp 630 631 movl %ebx,%eax 632 imull reciprocal_table_16-8(,%ecx,4) // tstep = (tnext - t) / 633 // (spancount-1) 634 LSetEntryvec: 635 // 636 // set up advancetable 637 // 638 movl entryvec_table_16(,%ecx,4),%ebx 639 movl %edx,%eax 640 movl %ebx,jumptemp // entry point into code for RET later 641 movl %ebp,%ecx 642 sarl $16,%edx // tstep >>= 16; 643 movl C(cachewidth),%ebx 644 sarl $16,%ecx // sstep >>= 16; 645 imull %ebx,%edx 646 647 addl %ecx,%edx // add in sstep 648 // (tstep >> 16) * cachewidth + (sstep >> 16); 649 movl tfracf,%ecx 650 movl %edx,advancetable+4 // advance base in t 651 addl %ebx,%edx // ((tstep >> 16) + 1) * cachewidth + 652 // (sstep >> 16); 653 shll $16,%ebp // left-justify sstep fractional part 654 movl sfracf,%ebx 655 shll $16,%eax // left-justify tstep fractional part 656 movl %edx,advancetable // advance extra in t 657 658 movl %eax,tstep 659 movl %ecx,%edx 660 addl %eax,%edx 661 sbbl %ecx,%ecx 662 addl %ebp,%ebx 663 adcl advancetable+4(,%ecx,4),%esi 664 665 jmp *jumptemp // jump to the number-of-pixels handler 666 667 //---------------------------------------- 668 669 LNoSteps: 670 movb (%esi),%al // load first texel in segment 671 subl $15,%edi // adjust for hardwired offset 672 jmp LEndSpan 673 674 675 LOnlyOneStep: 676 subl s,%eax 677 subl t,%ebx 678 movl %eax,%ebp 679 movl %ebx,%edx 680 jmp LSetEntryvec 681 682 //---------------------------------------- 683 684 .globl Entry2_16, Entry3_16, Entry4_16, Entry5_16 685 .globl Entry6_16, Entry7_16, Entry8_16, Entry9_16 686 .globl Entry10_16, Entry11_16, Entry12_16, Entry13_16 687 .globl Entry14_16, Entry15_16, Entry16_16 688 689 Entry2_16: 690 subl $14,%edi // adjust for hardwired offsets 691 movb (%esi),%al 692 jmp LEntry2_16 693 694 //---------------------------------------- 695 696 Entry3_16: 697 subl $13,%edi // adjust for hardwired offsets 698 addl %eax,%edx 699 movb (%esi),%al 700 sbbl %ecx,%ecx 701 addl %ebp,%ebx 702 adcl advancetable+4(,%ecx,4),%esi 703 jmp LEntry3_16 704 705 //---------------------------------------- 706 707 Entry4_16: 708 subl $12,%edi // adjust for hardwired offsets 709 addl %eax,%edx 710 movb (%esi),%al 711 sbbl %ecx,%ecx 712 addl %ebp,%ebx 713 adcl advancetable+4(,%ecx,4),%esi 714 addl tstep,%edx 715 jmp LEntry4_16 716 717 //---------------------------------------- 718 719 Entry5_16: 720 subl $11,%edi // adjust for hardwired offsets 721 addl %eax,%edx 722 movb (%esi),%al 723 sbbl %ecx,%ecx 724 addl %ebp,%ebx 725 adcl advancetable+4(,%ecx,4),%esi 726 addl tstep,%edx 727 jmp LEntry5_16 728 729 //---------------------------------------- 730 731 Entry6_16: 732 subl $10,%edi // adjust for hardwired offsets 733 addl %eax,%edx 734 movb (%esi),%al 735 sbbl %ecx,%ecx 736 addl %ebp,%ebx 737 adcl advancetable+4(,%ecx,4),%esi 738 addl tstep,%edx 739 jmp LEntry6_16 740 741 //---------------------------------------- 742 743 Entry7_16: 744 subl $9,%edi // adjust for hardwired offsets 745 addl %eax,%edx 746 movb (%esi),%al 747 sbbl %ecx,%ecx 748 addl %ebp,%ebx 749 adcl advancetable+4(,%ecx,4),%esi 750 addl tstep,%edx 751 jmp LEntry7_16 752 753 //---------------------------------------- 754 755 Entry8_16: 756 subl $8,%edi // adjust for hardwired offsets 757 addl %eax,%edx 758 movb (%esi),%al 759 sbbl %ecx,%ecx 760 addl %ebp,%ebx 761 adcl advancetable+4(,%ecx,4),%esi 762 addl tstep,%edx 763 jmp LEntry8_16 764 765 //---------------------------------------- 766 767 Entry9_16: 768 subl $7,%edi // adjust for hardwired offsets 769 addl %eax,%edx 770 movb (%esi),%al 771 sbbl %ecx,%ecx 772 addl %ebp,%ebx 773 adcl advancetable+4(,%ecx,4),%esi 774 addl tstep,%edx 775 jmp LEntry9_16 776 777 //---------------------------------------- 778 779 Entry10_16: 780 subl $6,%edi // adjust for hardwired offsets 781 addl %eax,%edx 782 movb (%esi),%al 783 sbbl %ecx,%ecx 784 addl %ebp,%ebx 785 adcl advancetable+4(,%ecx,4),%esi 786 addl tstep,%edx 787 jmp LEntry10_16 788 789 //---------------------------------------- 790 791 Entry11_16: 792 subl $5,%edi // adjust for hardwired offsets 793 addl %eax,%edx 794 movb (%esi),%al 795 sbbl %ecx,%ecx 796 addl %ebp,%ebx 797 adcl advancetable+4(,%ecx,4),%esi 798 addl tstep,%edx 799 jmp LEntry11_16 800 801 //---------------------------------------- 802 803 Entry12_16: 804 subl $4,%edi // adjust for hardwired offsets 805 addl %eax,%edx 806 movb (%esi),%al 807 sbbl %ecx,%ecx 808 addl %ebp,%ebx 809 adcl advancetable+4(,%ecx,4),%esi 810 addl tstep,%edx 811 jmp LEntry12_16 812 813 //---------------------------------------- 814 815 Entry13_16: 816 subl $3,%edi // adjust for hardwired offsets 817 addl %eax,%edx 818 movb (%esi),%al 819 sbbl %ecx,%ecx 820 addl %ebp,%ebx 821 adcl advancetable+4(,%ecx,4),%esi 822 addl tstep,%edx 823 jmp LEntry13_16 824 825 //---------------------------------------- 826 827 Entry14_16: 828 subl $2,%edi // adjust for hardwired offsets 829 addl %eax,%edx 830 movb (%esi),%al 831 sbbl %ecx,%ecx 832 addl %ebp,%ebx 833 adcl advancetable+4(,%ecx,4),%esi 834 addl tstep,%edx 835 jmp LEntry14_16 836 837 //---------------------------------------- 838 839 Entry15_16: 840 decl %edi // adjust for hardwired offsets 841 addl %eax,%edx 842 movb (%esi),%al 843 sbbl %ecx,%ecx 844 addl %ebp,%ebx 845 adcl advancetable+4(,%ecx,4),%esi 846 addl tstep,%edx 847 jmp LEntry15_16 848 849 //---------------------------------------- 850 851 Entry16_16: 852 addl %eax,%edx 853 movb (%esi),%al 854 sbbl %ecx,%ecx 855 addl %ebp,%ebx 856 adcl advancetable+4(,%ecx,4),%esi 857 858 addl tstep,%edx 859 sbbl %ecx,%ecx 860 movb %al,1(%edi) 861 addl %ebp,%ebx 862 movb (%esi),%al 863 adcl advancetable+4(,%ecx,4),%esi 864 addl tstep,%edx 865 LEntry15_16: 866 sbbl %ecx,%ecx 867 movb %al,2(%edi) 868 addl %ebp,%ebx 869 movb (%esi),%al 870 adcl advancetable+4(,%ecx,4),%esi 871 addl tstep,%edx 872 LEntry14_16: 873 sbbl %ecx,%ecx 874 movb %al,3(%edi) 875 addl %ebp,%ebx 876 movb (%esi),%al 877 adcl advancetable+4(,%ecx,4),%esi 878 addl tstep,%edx 879 LEntry13_16: 880 sbbl %ecx,%ecx 881 movb %al,4(%edi) 882 addl %ebp,%ebx 883 movb (%esi),%al 884 adcl advancetable+4(,%ecx,4),%esi 885 addl tstep,%edx 886 LEntry12_16: 887 sbbl %ecx,%ecx 888 movb %al,5(%edi) 889 addl %ebp,%ebx 890 movb (%esi),%al 891 adcl advancetable+4(,%ecx,4),%esi 892 addl tstep,%edx 893 LEntry11_16: 894 sbbl %ecx,%ecx 895 movb %al,6(%edi) 896 addl %ebp,%ebx 897 movb (%esi),%al 898 adcl advancetable+4(,%ecx,4),%esi 899 addl tstep,%edx 900 LEntry10_16: 901 sbbl %ecx,%ecx 902 movb %al,7(%edi) 903 addl %ebp,%ebx 904 movb (%esi),%al 905 adcl advancetable+4(,%ecx,4),%esi 906 addl tstep,%edx 907 LEntry9_16: 908 sbbl %ecx,%ecx 909 movb %al,8(%edi) 910 addl %ebp,%ebx 911 movb (%esi),%al 912 adcl advancetable+4(,%ecx,4),%esi 913 addl tstep,%edx 914 LEntry8_16: 915 sbbl %ecx,%ecx 916 movb %al,9(%edi) 917 addl %ebp,%ebx 918 movb (%esi),%al 919 adcl advancetable+4(,%ecx,4),%esi 920 addl tstep,%edx 921 LEntry7_16: 922 sbbl %ecx,%ecx 923 movb %al,10(%edi) 924 addl %ebp,%ebx 925 movb (%esi),%al 926 adcl advancetable+4(,%ecx,4),%esi 927 addl tstep,%edx 928 LEntry6_16: 929 sbbl %ecx,%ecx 930 movb %al,11(%edi) 931 addl %ebp,%ebx 932 movb (%esi),%al 933 adcl advancetable+4(,%ecx,4),%esi 934 addl tstep,%edx 935 LEntry5_16: 936 sbbl %ecx,%ecx 937 movb %al,12(%edi) 938 addl %ebp,%ebx 939 movb (%esi),%al 940 adcl advancetable+4(,%ecx,4),%esi 941 addl tstep,%edx 942 LEntry4_16: 943 sbbl %ecx,%ecx 944 movb %al,13(%edi) 945 addl %ebp,%ebx 946 movb (%esi),%al 947 adcl advancetable+4(,%ecx,4),%esi 948 LEntry3_16: 949 movb %al,14(%edi) 950 movb (%esi),%al 951 LEntry2_16: 952 953 LEndSpan: 954 955 // 956 // clear s/z, t/z, 1/z from FP stack 957 // 958 fstp %st(0) 959 fstp %st(0) 960 fstp %st(0) 961 962 movl pspantemp,%ebx // restore spans pointer 963 movl espan_t_pnext(%ebx),%ebx // point to next span 964 testl %ebx,%ebx // any more spans? 965 movb %al,15(%edi) 966 jnz LSpanLoop // more spans 967 968 popl %ebx // restore register variables 969 popl %esi 970 popl %edi 971 popl %ebp // restore the caller's stack frame 972 ret 973 974 #endif // id386 975