1 /* 2 Copyright (C) 1996-1997 Id Software, Inc. 3 4 This program is free software; you can redistribute it and/or 5 modify it under the terms of the GNU General Public License 6 as published by the Free Software Foundation; either version 2 7 of the License, or (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 13 See the GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program; if not, write to the Free Software 17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 18 19 */ 20 // 21 // d_polysa.s 22 // x86 assembly-language polygon model drawing code 23 // 24 25 #include "asm_i386.h" 26 #include "quakeasm.h" 27 #include "asm_draw.h" 28 #include "d_ifacea.h" 29 30 #if id386 31 32 // !!! if this is changed, it must be changed in d_polyse.c too !!! 33 #define DPS_MAXSPANS MAXHEIGHT+1 34 // 1 extra for spanpackage that marks end 35 36 //#define SPAN_SIZE (((DPS_MAXSPANS + 1 + ((CACHE_SIZE - 1) / spanpackage_t_size)) + 1) * spanpackage_t_size) 37 #define SPAN_SIZE (1024+1+1+1)*32 38 39 40 .data 41 42 .align 4 43 p10_minus_p20: .single 0 44 p01_minus_p21: .single 0 45 temp0: .single 0 46 temp1: .single 0 47 Ltemp: .single 0 48 49 aff8entryvec_table: .long LDraw8, LDraw7, LDraw6, LDraw5 50 .long LDraw4, LDraw3, LDraw2, LDraw1 51 52 lzistepx: .long 0 53 54 55 .text 56 57 #ifndef NeXT 58 .extern C(D_PolysetSetEdgeTable) 59 .extern C(D_RasterizeAliasPolySmooth) 60 #endif 61 62 //---------------------------------------------------------------------- 63 // affine triangle gradient calculation code 64 //---------------------------------------------------------------------- 65 66 #define skinwidth 4+0 67 68 .globl C(D_PolysetCalcGradients) 69 C(D_PolysetCalcGradients): 70 71 // p00_minus_p20 = r_p0[0] - r_p2[0]; 72 // p01_minus_p21 = r_p0[1] - r_p2[1]; 73 // p10_minus_p20 = r_p1[0] - r_p2[0]; 74 // p11_minus_p21 = r_p1[1] - r_p2[1]; 75 // 76 // xstepdenominv = 1.0 / (p10_minus_p20 * p01_minus_p21 - 77 // p00_minus_p20 * p11_minus_p21); 78 // 79 // ystepdenominv = -xstepdenominv; 80 81 fildl C(r_p0)+0 // r_p0[0] 82 fildl C(r_p2)+0 // r_p2[0] | r_p0[0] 83 fildl C(r_p0)+4 // r_p0[1] | r_p2[0] | r_p0[0] 84 fildl C(r_p2)+4 // r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0] 85 fildl C(r_p1)+0 // r_p1[0] | r_p2[1] | r_p0[1] | r_p2[0] | r_p0[0] 86 fildl C(r_p1)+4 // r_p1[1] | r_p1[0] | r_p2[1] | r_p0[1] | 87 // r_p2[0] | r_p0[0] 88 fxch %st(3) // r_p0[1] | r_p1[0] | r_p2[1] | r_p1[1] | 89 // r_p2[0] | r_p0[0] 90 fsub %st(2),%st(0) // p01_minus_p21 | r_p1[0] | r_p2[1] | r_p1[1] | 91 // r_p2[0] | r_p0[0] 92 fxch %st(1) // r_p1[0] | p01_minus_p21 | r_p2[1] | r_p1[1] | 93 // r_p2[0] | r_p0[0] 94 fsub %st(4),%st(0) // p10_minus_p20 | p01_minus_p21 | r_p2[1] | 95 // r_p1[1] | r_p2[0] | r_p0[0] 96 fxch %st(5) // r_p0[0] | p01_minus_p21 | r_p2[1] | 97 // r_p1[1] | r_p2[0] | p10_minus_p20 98 fsubp %st(0),%st(4) // p01_minus_p21 | r_p2[1] | r_p1[1] | 99 // p00_minus_p20 | p10_minus_p20 100 fxch %st(2) // r_p1[1] | r_p2[1] | p01_minus_p21 | 101 // p00_minus_p20 | p10_minus_p20 102 fsubp %st(0),%st(1) // p11_minus_p21 | p01_minus_p21 | 103 // p00_minus_p20 | p10_minus_p20 104 fxch %st(1) // p01_minus_p21 | p11_minus_p21 | 105 // p00_minus_p20 | p10_minus_p20 106 flds C(d_xdenom) // d_xdenom | p01_minus_p21 | p11_minus_p21 | 107 // p00_minus_p20 | p10_minus_p20 108 fxch %st(4) // p10_minus_p20 | p01_minus_p21 | p11_minus_p21 | 109 // p00_minus_p20 | d_xdenom 110 fstps p10_minus_p20 // p01_minus_p21 | p11_minus_p21 | 111 // p00_minus_p20 | d_xdenom 112 fstps p01_minus_p21 // p11_minus_p21 | p00_minus_p20 | xstepdenominv 113 fxch %st(2) // xstepdenominv | p00_minus_p20 | p11_minus_p21 114 115 //// ceil () for light so positive steps are exaggerated, negative steps 116 //// diminished, pushing us away from underflow toward overflow. Underflow is 117 //// very visible, overflow is very unlikely, because of ambient lighting 118 // t0 = r_p0[4] - r_p2[4]; 119 // t1 = r_p1[4] - r_p2[4]; 120 121 fildl C(r_p2)+16 // r_p2[4] | xstepdenominv | p00_minus_p20 | 122 // p11_minus_p21 123 fildl C(r_p0)+16 // r_p0[4] | r_p2[4] | xstepdenominv | 124 // p00_minus_p20 | p11_minus_p21 125 fildl C(r_p1)+16 // r_p1[4] | r_p0[4] | r_p2[4] | xstepdenominv | 126 // p00_minus_p20 | p11_minus_p21 127 fxch %st(2) // r_p2[4] | r_p0[4] | r_p1[4] | xstepdenominv | 128 // p00_minus_p20 | p11_minus_p21 129 fld %st(0) // r_p2[4] | r_p2[4] | r_p0[4] | r_p1[4] | 130 // xstepdenominv | p00_minus_p20 | p11_minus_p21 131 fsubrp %st(0),%st(2) // r_p2[4] | t0 | r_p1[4] | xstepdenominv | 132 // p00_minus_p20 | p11_minus_p21 133 fsubrp %st(0),%st(2) // t0 | t1 | xstepdenominv | p00_minus_p20 | 134 // p11_minus_p21 135 136 // r_lstepx = (int) 137 // ceil((t1 * p01_minus_p21 - t0 * p11_minus_p21) * xstepdenominv); 138 // r_lstepy = (int) 139 // ceil((t1 * p00_minus_p20 - t0 * p10_minus_p20) * ystepdenominv); 140 141 fld %st(0) // t0 | t0 | t1 | xstepdenominv | p00_minus_p20 | 142 // p11_minus_p21 143 fmul %st(5),%st(0) // t0*p11_minus_p21 | t0 | t1 | xstepdenominv | 144 // p00_minus_p20 | p11_minus_p21 145 fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | xstepdenominv | 146 // p00_minus_p20 | p11_minus_p21 147 fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 | 148 // xstepdenominv | p00_minus_p20 | p11_minus_p21 149 fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 | 150 // xstepdenominv | p00_minus_p20 | p11_minus_p21 151 fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 | 152 // xstepdenominv | p00_minus_p20 | p11_minus_p21 153 fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | 154 // t0*p11_minus_p21 | xstepdenominv | 155 // p00_minus_p20 | p11_minus_p21 156 fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | 157 // t0*p11_minus_p21 | xstepdenominv | 158 // p00_minus_p20 | p11_minus_p21 159 fmul %st(5),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 | 160 // t1*p01_minus_p21 | t0*p11_minus_p21 | 161 // xstepdenominv | p00_minus_p20 | p11_minus_p21 162 fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 | 163 // t1*p00_minus_p20 | t0*p11_minus_p21 | 164 // xstepdenominv | p00_minus_p20 | p11_minus_p21 165 fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 | 166 // t1*p01_minus_p21 - t0*p11_minus_p21 | 167 // xstepdenominv | p00_minus_p20 | p11_minus_p21 168 fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | 169 // t1*p01_minus_p21 - t0*p11_minus_p21 | 170 // xstepdenominv | p00_minus_p20 | p11_minus_p21 171 fld %st(2) // xstepdenominv | 172 // t1*p00_minus_p20 - t0*p10_minus_p20 | 173 // t1*p01_minus_p21 - t0*p11_minus_p21 | 174 // xstepdenominv | p00_minus_p20 | p11_minus_p21 175 fmuls float_minus_1 // ystepdenominv | 176 // t1*p00_minus_p20 - t0*p10_minus_p20 | 177 // t1*p01_minus_p21 - t0*p11_minus_p21 | 178 // xstepdenominv | p00_minus_p20 | p11_minus_p21 179 fxch %st(2) // t1*p01_minus_p21 - t0*p11_minus_p21 | 180 // t1*p00_minus_p20 - t0*p10_minus_p20 | 181 // ystepdenominv | xstepdenominv | p00_minus_p20 | 182 // p11_minus_p21 183 fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)* 184 // xstepdenominv | 185 // t1*p00_minus_p20 - t0*p10_minus_p20 | 186 // | ystepdenominv | xstepdenominv | 187 // p00_minus_p20 | p11_minus_p21 188 fxch %st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | 189 // (t1*p01_minus_p21 - t0*p11_minus_p21)* 190 // xstepdenominv | ystepdenominv | 191 // xstepdenominv | p00_minus_p20 | p11_minus_p21 192 fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)* 193 // ystepdenominv | 194 // (t1*p01_minus_p21 - t0*p11_minus_p21)* 195 // xstepdenominv | ystepdenominv | 196 // xstepdenominv | p00_minus_p20 | p11_minus_p21 197 fldcw ceil_cw 198 fistpl C(r_lstepy) // r_lstepx | ystepdenominv | xstepdenominv | 199 // p00_minus_p20 | p11_minus_p21 200 fistpl C(r_lstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 | 201 // p11_minus_p21 202 fldcw single_cw 203 204 // t0 = r_p0[2] - r_p2[2]; 205 // t1 = r_p1[2] - r_p2[2]; 206 207 fildl C(r_p2)+8 // r_p2[2] | ystepdenominv | xstepdenominv | 208 // p00_minus_p20 | p11_minus_p21 209 fildl C(r_p0)+8 // r_p0[2] | r_p2[2] | ystepdenominv | 210 // xstepdenominv | p00_minus_p20 | p11_minus_p21 211 fildl C(r_p1)+8 // r_p1[2] | r_p0[2] | r_p2[2] | ystepdenominv | 212 // xstepdenominv | p00_minus_p20 | p11_minus_p21 213 fxch %st(2) // r_p2[2] | r_p0[2] | r_p1[2] | ystepdenominv | 214 // xstepdenominv | p00_minus_p20 | p11_minus_p21 215 fld %st(0) // r_p2[2] | r_p2[2] | r_p0[2] | r_p1[2] | 216 // ystepdenominv | xstepdenominv | p00_minus_p20 | 217 // p11_minus_p21 218 fsubrp %st(0),%st(2) // r_p2[2] | t0 | r_p1[2] | ystepdenominv | 219 // xstepdenominv | p00_minus_p20 | p11_minus_p21 220 fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv | 221 // p00_minus_p20 | p11_minus_p21 222 223 // r_sstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * 224 // xstepdenominv); 225 // r_sstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) * 226 // ystepdenominv); 227 228 fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv 229 fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv | 230 // xstepdenominv | p00_minus_p20 | p11_minus_p21 231 fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv | 232 // xstepdenominv | p00_minus_p20 | p11_minus_p21 233 fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 | 234 // ystepdenominv | xstepdenominv | p00_minus_p20 | 235 // p11_minus_p21 236 fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 | 237 // ystepdenominv | xstepdenominv | p00_minus_p20 | 238 // p11_minus_p21 239 fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 | 240 // ystepdenominv | xstepdenominv | p00_minus_p20 | 241 // p11_minus_p21 242 fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | 243 // t0*p11_minus_p21 | ystepdenominv | 244 // xstepdenominv | p00_minus_p20 | p11_minus_p21 245 fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | 246 // t0*p11_minus_p21 | ystepdenominv | 247 // xstepdenominv | p00_minus_p20 | p11_minus_p21 248 fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 | 249 // t1*p01_minus_p21 | t0*p11_minus_p21 | 250 // ystepdenominv | xstepdenominv | p00_minus_p20 | 251 // p11_minus_p21 252 fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 | 253 // t1*p00_minus_p20 | t0*p11_minus_p21 | 254 // ystepdenominv | xstepdenominv | p00_minus_p20 | 255 // p11_minus_p21 256 fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 | 257 // t1*p01_minus_p21 - t0*p11_minus_p21 | 258 // ystepdenominv | xstepdenominv | p00_minus_p20 | 259 // p11_minus_p21 260 fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | 261 // t1*p01_minus_p21 - t0*p11_minus_p21 | 262 // ystepdenominv | xstepdenominv | p00_minus_p20 | 263 // p11_minus_p21 264 fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)* 265 // ystepdenominv | 266 // t1*p01_minus_p21 - t0*p11_minus_p21 | 267 // ystepdenominv | xstepdenominv | p00_minus_p20 | 268 // p11_minus_p21 269 fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 | 270 // (t1*p00_minus_p20 - t0*p10_minus_p20)* 271 // ystepdenominv | ystepdenominv | 272 // xstepdenominv | p00_minus_p20 | p11_minus_p21 273 fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)* 274 // xstepdenominv | 275 // (t1*p00_minus_p20 - t0*p10_minus_p20)* 276 // ystepdenominv | ystepdenominv | 277 // xstepdenominv | p00_minus_p20 | p11_minus_p21 278 fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)* 279 // ystepdenominv | 280 // (t1*p01_minus_p21 - t0*p11_minus_p21)* 281 // xstepdenominv | ystepdenominv | 282 // xstepdenominv | p00_minus_p20 | p11_minus_p21 283 fistpl C(r_sstepy) // r_sstepx | ystepdenominv | xstepdenominv | 284 // p00_minus_p20 | p11_minus_p21 285 fistpl C(r_sstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 | 286 // p11_minus_p21 287 288 // t0 = r_p0[3] - r_p2[3]; 289 // t1 = r_p1[3] - r_p2[3]; 290 291 fildl C(r_p2)+12 // r_p2[3] | ystepdenominv | xstepdenominv | 292 // p00_minus_p20 | p11_minus_p21 293 fildl C(r_p0)+12 // r_p0[3] | r_p2[3] | ystepdenominv | 294 // xstepdenominv | p00_minus_p20 | p11_minus_p21 295 fildl C(r_p1)+12 // r_p1[3] | r_p0[3] | r_p2[3] | ystepdenominv | 296 // xstepdenominv | p00_minus_p20 | p11_minus_p21 297 fxch %st(2) // r_p2[3] | r_p0[3] | r_p1[3] | ystepdenominv | 298 // xstepdenominv | p00_minus_p20 | p11_minus_p21 299 fld %st(0) // r_p2[3] | r_p2[3] | r_p0[3] | r_p1[3] | 300 // ystepdenominv | xstepdenominv | p00_minus_p20 | 301 // p11_minus_p21 302 fsubrp %st(0),%st(2) // r_p2[3] | t0 | r_p1[3] | ystepdenominv | 303 // xstepdenominv | p00_minus_p20 | p11_minus_p21 304 fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv | 305 // p00_minus_p20 | p11_minus_p21 306 307 // r_tstepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * 308 // xstepdenominv); 309 // r_tstepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) * 310 // ystepdenominv); 311 312 fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv | 313 // p00_minus_p20 | p11_minus_p21 314 fmul %st(6),%st(0) // t0*p11_minus_p21 | t0 | t1 | ystepdenominv | 315 // xstepdenominv | p00_minus_p20 | p11_minus_p21 316 fxch %st(2) // t1 | t0 | t0*p11_minus_p21 | ystepdenominv | 317 // xstepdenominv | p00_minus_p20 | p11_minus_p21 318 fld %st(0) // t1 | t1 | t0 | t0*p11_minus_p21 | 319 // ystepdenominv | xstepdenominv | p00_minus_p20 | 320 // p11_minus_p21 321 fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | t0*p11_minus_p21 | 322 // ystepdenominv | xstepdenominv | p00_minus_p20 | 323 // p11_minus_p21 324 fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | t0*p11_minus_p21 | 325 // ystepdenominv | xstepdenominv | p00_minus_p20 | 326 // p11_minus_p21 327 fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | 328 // t0*p11_minus_p21 | ystepdenominv | 329 // xstepdenominv | p00_minus_p20 | p11_minus_p21 330 fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | 331 // t0*p11_minus_p21 | ystepdenominv | 332 // xstepdenominv | p00_minus_p20 | p11_minus_p21 333 fmul %st(6),%st(0) // t1*p00_minus_p20 | t0*p10_minus_p20 | 334 // t1*p01_minus_p21 | t0*p11_minus_p21 | 335 // ystepdenominv | xstepdenominv | p00_minus_p20 | 336 // p11_minus_p21 337 fxch %st(2) // t1*p01_minus_p21 | t0*p10_minus_p20 | 338 // t1*p00_minus_p20 | t0*p11_minus_p21 | 339 // ystepdenominv | xstepdenominv | p00_minus_p20 | 340 // p11_minus_p21 341 fsubp %st(0),%st(3) // t0*p10_minus_p20 | t1*p00_minus_p20 | 342 // t1*p01_minus_p21 - t0*p11_minus_p21 | 343 // ystepdenominv | xstepdenominv | p00_minus_p20 | 344 // p11_minus_p21 345 fsubrp %st(0),%st(1) // t1*p00_minus_p20 - t0*p10_minus_p20 | 346 // t1*p01_minus_p21 - t0*p11_minus_p21 | 347 // ystepdenominv | xstepdenominv | p00_minus_p20 | 348 // p11_minus_p21 349 fmul %st(2),%st(0) // (t1*p00_minus_p20 - t0*p10_minus_p20)* 350 // ystepdenominv | 351 // t1*p01_minus_p21 - t0*p11_minus_p21 | 352 // ystepdenominv | xstepdenominv | p00_minus_p20 | 353 // p11_minus_p21 354 fxch %st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 | 355 // (t1*p00_minus_p20 - t0*p10_minus_p20)* 356 // ystepdenominv | ystepdenominv | 357 // xstepdenominv | p00_minus_p20 | p11_minus_p21 358 fmul %st(3),%st(0) // (t1*p01_minus_p21 - t0*p11_minus_p21)* 359 // xstepdenominv | 360 // (t1*p00_minus_p20 - t0*p10_minus_p20)* 361 // ystepdenominv | ystepdenominv | 362 // xstepdenominv | p00_minus_p20 | p11_minus_p21 363 fxch %st(1) // (t1*p00_minus_p20 - t0*p10_minus_p20)* 364 // ystepdenominv | 365 // (t1*p01_minus_p21 - t0*p11_minus_p21)* 366 // xstepdenominv | ystepdenominv | 367 // xstepdenominv | p00_minus_p20 | p11_minus_p21 368 fistpl C(r_tstepy) // r_tstepx | ystepdenominv | xstepdenominv | 369 // p00_minus_p20 | p11_minus_p21 370 fistpl C(r_tstepx) // ystepdenominv | xstepdenominv | p00_minus_p20 | 371 // p11_minus_p21 372 373 // t0 = r_p0[5] - r_p2[5]; 374 // t1 = r_p1[5] - r_p2[5]; 375 376 fildl C(r_p2)+20 // r_p2[5] | ystepdenominv | xstepdenominv | 377 // p00_minus_p20 | p11_minus_p21 378 fildl C(r_p0)+20 // r_p0[5] | r_p2[5] | ystepdenominv | 379 // xstepdenominv | p00_minus_p20 | p11_minus_p21 380 fildl C(r_p1)+20 // r_p1[5] | r_p0[5] | r_p2[5] | ystepdenominv | 381 // xstepdenominv | p00_minus_p20 | p11_minus_p21 382 fxch %st(2) // r_p2[5] | r_p0[5] | r_p1[5] | ystepdenominv | 383 // xstepdenominv | p00_minus_p20 | p11_minus_p21 384 fld %st(0) // r_p2[5] | r_p2[5] | r_p0[5] | r_p1[5] | 385 // ystepdenominv | xstepdenominv | p00_minus_p20 | 386 // p11_minus_p21 387 fsubrp %st(0),%st(2) // r_p2[5] | t0 | r_p1[5] | ystepdenominv | 388 // xstepdenominv | p00_minus_p20 | p11_minus_p21 389 fsubrp %st(0),%st(2) // t0 | t1 | ystepdenominv | xstepdenominv | 390 // p00_minus_p20 | p11_minus_p21 391 392 // r_zistepx = (int)((t1 * p01_minus_p21 - t0 * p11_minus_p21) * 393 // xstepdenominv); 394 // r_zistepy = (int)((t1 * p00_minus_p20 - t0 * p10_minus_p20) * 395 // ystepdenominv); 396 397 fld %st(0) // t0 | t0 | t1 | ystepdenominv | xstepdenominv | 398 // p00_minus_p20 | p11_minus_p21 399 fmulp %st(0),%st(6) // t0 | t1 | ystepdenominv | xstepdenominv | 400 // p00_minus_p20 | t0*p11_minus_p21 401 fxch %st(1) // t1 | t0 | ystepdenominv | xstepdenominv | 402 // p00_minus_p20 | t0*p11_minus_p21 403 fld %st(0) // t1 | t1 | t0 | ystepdenominv | xstepdenominv | 404 // p00_minus_p20 | t0*p11_minus_p21 405 fmuls p01_minus_p21 // t1*p01_minus_p21 | t1 | t0 | ystepdenominv | 406 // xstepdenominv | p00_minus_p20 | 407 // t0*p11_minus_p21 408 fxch %st(2) // t0 | t1 | t1*p01_minus_p21 | ystepdenominv | 409 // xstepdenominv | p00_minus_p20 | 410 // t0*p11_minus_p21 411 fmuls p10_minus_p20 // t0*p10_minus_p20 | t1 | t1*p01_minus_p21 | 412 // ystepdenominv | xstepdenominv | p00_minus_p20 | 413 // t0*p11_minus_p21 414 fxch %st(1) // t1 | t0*p10_minus_p20 | t1*p01_minus_p21 | 415 // ystepdenominv | xstepdenominv | p00_minus_p20 | 416 // t0*p11_minus_p21 417 fmulp %st(0),%st(5) // t0*p10_minus_p20 | t1*p01_minus_p21 | 418 // ystepdenominv | xstepdenominv | 419 // t1*p00_minus_p20 | t0*p11_minus_p21 420 fxch %st(5) // t0*p11_minus_p21 | t1*p01_minus_p21 | 421 // ystepdenominv | xstepdenominv | 422 // t1*p00_minus_p20 | t0*p10_minus_p20 423 fsubrp %st(0),%st(1) // t1*p01_minus_p21 - t0*p11_minus_p21 | 424 // ystepdenominv | xstepdenominv | 425 // t1*p00_minus_p20 | t0*p10_minus_p20 426 fxch %st(3) // t1*p00_minus_p20 | ystepdenominv | 427 // xstepdenominv | 428 // t1*p01_minus_p21 - t0*p11_minus_p21 | 429 // t0*p10_minus_p20 430 fsubp %st(0),%st(4) // ystepdenominv | xstepdenominv | 431 // t1*p01_minus_p21 - t0*p11_minus_p21 | 432 // t1*p00_minus_p20 - t0*p10_minus_p20 433 fxch %st(1) // xstepdenominv | ystepdenominv | 434 // t1*p01_minus_p21 - t0*p11_minus_p21 | 435 // t1*p00_minus_p20 - t0*p10_minus_p20 436 fmulp %st(0),%st(2) // ystepdenominv | 437 // (t1*p01_minus_p21 - t0*p11_minus_p21) * 438 // xstepdenominv | 439 // t1*p00_minus_p20 - t0*p10_minus_p20 440 fmulp %st(0),%st(2) // (t1*p01_minus_p21 - t0*p11_minus_p21) * 441 // xstepdenominv | 442 // (t1*p00_minus_p20 - t0*p10_minus_p20) * 443 // ystepdenominv 444 fistpl C(r_zistepx) // (t1*p00_minus_p20 - t0*p10_minus_p20) * 445 // ystepdenominv 446 fistpl C(r_zistepy) 447 448 // a_sstepxfrac = r_sstepx << 16; 449 // a_tstepxfrac = r_tstepx << 16; 450 // 451 // a_ststepxwhole = r_affinetridesc.skinwidth * (r_tstepx >> 16) + 452 // (r_sstepx >> 16); 453 454 movl C(r_sstepx),%eax 455 movl C(r_tstepx),%edx 456 shll $16,%eax 457 shll $16,%edx 458 movl %eax,C(a_sstepxfrac) 459 movl %edx,C(a_tstepxfrac) 460 461 movl C(r_sstepx),%ecx 462 movl C(r_tstepx),%eax 463 sarl $16,%ecx 464 sarl $16,%eax 465 imull skinwidth(%esp) 466 addl %ecx,%eax 467 movl %eax,C(a_ststepxwhole) 468 469 ret 470 471 472 //---------------------------------------------------------------------- 473 // recursive subdivision affine triangle drawing code 474 // 475 // not C-callable because of stdcall return 476 //---------------------------------------------------------------------- 477 478 #define lp1 4+16 479 #define lp2 8+16 480 #define lp3 12+16 481 482 .globl C(D_PolysetRecursiveTriangle) 483 C(D_PolysetRecursiveTriangle): 484 pushl %ebp // preserve caller stack frame pointer 485 pushl %esi // preserve register variables 486 pushl %edi 487 pushl %ebx 488 489 // int *temp; 490 // int d; 491 // int new[6]; 492 // int i; 493 // int z; 494 // short *zbuf; 495 movl lp2(%esp),%esi 496 movl lp1(%esp),%ebx 497 movl lp3(%esp),%edi 498 499 // d = lp2[0] - lp1[0]; 500 // if (d < -1 || d > 1) 501 // goto split; 502 movl 0(%esi),%eax 503 504 movl 0(%ebx),%edx 505 movl 4(%esi),%ebp 506 507 subl %edx,%eax 508 movl 4(%ebx),%ecx 509 510 subl %ecx,%ebp 511 incl %eax 512 513 cmpl $2,%eax 514 ja LSplit 515 516 // d = lp2[1] - lp1[1]; 517 // if (d < -1 || d > 1) 518 // goto split; 519 movl 0(%edi),%eax 520 incl %ebp 521 522 cmpl $2,%ebp 523 ja LSplit 524 525 // d = lp3[0] - lp2[0]; 526 // if (d < -1 || d > 1) 527 // goto split2; 528 movl 0(%esi),%edx 529 movl 4(%edi),%ebp 530 531 subl %edx,%eax 532 movl 4(%esi),%ecx 533 534 subl %ecx,%ebp 535 incl %eax 536 537 cmpl $2,%eax 538 ja LSplit2 539 540 // d = lp3[1] - lp2[1]; 541 // if (d < -1 || d > 1) 542 // goto split2; 543 movl 0(%ebx),%eax 544 incl %ebp 545 546 cmpl $2,%ebp 547 ja LSplit2 548 549 // d = lp1[0] - lp3[0]; 550 // if (d < -1 || d > 1) 551 // goto split3; 552 movl 0(%edi),%edx 553 movl 4(%ebx),%ebp 554 555 subl %edx,%eax 556 movl 4(%edi),%ecx 557 558 subl %ecx,%ebp 559 incl %eax 560 561 incl %ebp 562 movl %ebx,%edx 563 564 cmpl $2,%eax 565 ja LSplit3 566 567 // d = lp1[1] - lp3[1]; 568 // if (d < -1 || d > 1) 569 // { 570 //split3: 571 // temp = lp1; 572 // lp3 = lp2; 573 // lp1 = lp3; 574 // lp2 = temp; 575 // goto split; 576 // } 577 // 578 // return; // entire tri is filled 579 // 580 cmpl $2,%ebp 581 jna LDone 582 583 LSplit3: 584 movl %edi,%ebx 585 movl %esi,%edi 586 movl %edx,%esi 587 jmp LSplit 588 589 //split2: 590 LSplit2: 591 592 // temp = lp1; 593 // lp1 = lp2; 594 // lp2 = lp3; 595 // lp3 = temp; 596 movl %ebx,%eax 597 movl %esi,%ebx 598 movl %edi,%esi 599 movl %eax,%edi 600 601 //split: 602 LSplit: 603 604 subl $24,%esp // allocate space for a new vertex 605 606 //// split this edge 607 // new[0] = (lp1[0] + lp2[0]) >> 1; 608 // new[1] = (lp1[1] + lp2[1]) >> 1; 609 // new[2] = (lp1[2] + lp2[2]) >> 1; 610 // new[3] = (lp1[3] + lp2[3]) >> 1; 611 // new[5] = (lp1[5] + lp2[5]) >> 1; 612 movl 8(%ebx),%eax 613 614 movl 8(%esi),%edx 615 movl 12(%ebx),%ecx 616 617 addl %edx,%eax 618 movl 12(%esi),%edx 619 620 sarl $1,%eax 621 addl %edx,%ecx 622 623 movl %eax,8(%esp) 624 movl 20(%ebx),%eax 625 626 sarl $1,%ecx 627 movl 20(%esi),%edx 628 629 movl %ecx,12(%esp) 630 addl %edx,%eax 631 632 movl 0(%ebx),%ecx 633 movl 0(%esi),%edx 634 635 sarl $1,%eax 636 addl %ecx,%edx 637 638 movl %eax,20(%esp) 639 movl 4(%ebx),%eax 640 641 sarl $1,%edx 642 movl 4(%esi),%ebp 643 644 movl %edx,0(%esp) 645 addl %eax,%ebp 646 647 sarl $1,%ebp 648 movl %ebp,4(%esp) 649 650 //// draw the point if splitting a leading edge 651 // if (lp2[1] > lp1[1]) 652 // goto nodraw; 653 cmpl %eax,4(%esi) 654 jg LNoDraw 655 656 // if ((lp2[1] == lp1[1]) && (lp2[0] < lp1[0])) 657 // goto nodraw; 658 movl 0(%esi),%edx 659 jnz LDraw 660 661 cmpl %ecx,%edx 662 jl LNoDraw 663 664 LDraw: 665 666 // z = new[5] >> 16; 667 movl 20(%esp),%edx 668 movl 4(%esp),%ecx 669 670 sarl $16,%edx 671 movl 0(%esp),%ebp 672 673 // zbuf = zspantable[new[1]] + new[0]; 674 movl C(zspantable)(,%ecx,4),%eax 675 676 // if (z >= *zbuf) 677 // { 678 cmpw (%eax,%ebp,2),%dx 679 jnge LNoDraw 680 681 // int pix; 682 // 683 // *zbuf = z; 684 movw %dx,(%eax,%ebp,2) 685 686 // pix = d_pcolormap[skintable[new[3]>>16][new[2]>>16]]; 687 movl 12(%esp),%eax 688 689 sarl $16,%eax 690 movl 8(%esp),%edx 691 692 sarl $16,%edx 693 subl %ecx,%ecx 694 695 movl C(skintable)(,%eax,4),%eax 696 movl 4(%esp),%ebp 697 698 movb (%eax,%edx,),%cl 699 movl C(d_pcolormap),%edx 700 701 movb (%edx,%ecx,),%dl 702 movl 0(%esp),%ecx 703 704 // d_viewbuffer[d_scantable[new[1]] + new[0]] = pix; 705 movl C(d_scantable)(,%ebp,4),%eax 706 addl %eax,%ecx 707 movl C(d_viewbuffer),%eax 708 movb %dl,(%eax,%ecx,1) 709 710 // } 711 // 712 //nodraw: 713 LNoDraw: 714 715 //// recursively continue 716 // D_PolysetRecursiveTriangle (lp3, lp1, new); 717 pushl %esp 718 pushl %ebx 719 pushl %edi 720 call C(D_PolysetRecursiveTriangle) 721 722 // D_PolysetRecursiveTriangle (lp3, new, lp2); 723 movl %esp,%ebx 724 pushl %esi 725 pushl %ebx 726 pushl %edi 727 call C(D_PolysetRecursiveTriangle) 728 addl $24,%esp 729 730 LDone: 731 popl %ebx // restore register variables 732 popl %edi 733 popl %esi 734 popl %ebp // restore caller stack frame pointer 735 ret $12 736 737 738 //---------------------------------------------------------------------- 739 // 8-bpp horizontal span drawing code for affine polygons, with smooth 740 // shading and no transparency 741 //---------------------------------------------------------------------- 742 743 #define pspans 4+8 744 745 .globl C(D_PolysetAff8Start) 746 C(D_PolysetAff8Start): 747 748 .globl C(D_PolysetDrawSpans8) 749 C(D_PolysetDrawSpans8): 750 pushl %esi // preserve register variables 751 pushl %ebx 752 753 movl pspans(%esp),%esi // point to the first span descriptor 754 movl C(r_zistepx),%ecx 755 756 pushl %ebp // preserve caller's stack frame 757 pushl %edi 758 759 rorl $16,%ecx // put high 16 bits of 1/z step in low word 760 movl spanpackage_t_count(%esi),%edx 761 762 movl %ecx,lzistepx 763 764 LSpanLoop: 765 766 // lcount = d_aspancount - pspanpackage->count; 767 // 768 // errorterm += erroradjustup; 769 // if (errorterm >= 0) 770 // { 771 // d_aspancount += d_countextrastep; 772 // errorterm -= erroradjustdown; 773 // } 774 // else 775 // { 776 // d_aspancount += ubasestep; 777 // } 778 movl C(d_aspancount),%eax 779 subl %edx,%eax 780 781 movl C(erroradjustup),%edx 782 movl C(errorterm),%ebx 783 addl %edx,%ebx 784 js LNoTurnover 785 786 movl C(erroradjustdown),%edx 787 movl C(d_countextrastep),%edi 788 subl %edx,%ebx 789 movl C(d_aspancount),%ebp 790 movl %ebx,C(errorterm) 791 addl %edi,%ebp 792 movl %ebp,C(d_aspancount) 793 jmp LRightEdgeStepped 794 795 LNoTurnover: 796 movl C(d_aspancount),%edi 797 movl C(ubasestep),%edx 798 movl %ebx,C(errorterm) 799 addl %edx,%edi 800 movl %edi,C(d_aspancount) 801 802 LRightEdgeStepped: 803 cmpl $1,%eax 804 805 jl LNextSpan 806 jz LExactlyOneLong 807 808 // 809 // set up advancetable 810 // 811 movl C(a_ststepxwhole),%ecx 812 movl C(r_affinetridesc)+atd_skinwidth,%edx 813 814 movl %ecx,advancetable+4 // advance base in t 815 addl %edx,%ecx 816 817 movl %ecx,advancetable // advance extra in t 818 movl C(a_tstepxfrac),%ecx 819 820 movw C(r_lstepx),%cx 821 movl %eax,%edx // count 822 823 movl %ecx,tstep 824 addl $7,%edx 825 826 shrl $3,%edx // count of full and partial loops 827 movl spanpackage_t_sfrac(%esi),%ebx 828 829 movw %dx,%bx 830 movl spanpackage_t_pz(%esi),%ecx 831 832 negl %eax 833 834 movl spanpackage_t_pdest(%esi),%edi 835 andl $7,%eax // 0->0, 1->7, 2->6, ... , 7->1 836 837 subl %eax,%edi // compensate for hardwired offsets 838 subl %eax,%ecx 839 840 subl %eax,%ecx 841 movl spanpackage_t_tfrac(%esi),%edx 842 843 movw spanpackage_t_light(%esi),%dx 844 movl spanpackage_t_zi(%esi),%ebp 845 846 rorl $16,%ebp // put high 16 bits of 1/z in low word 847 pushl %esi 848 849 movl spanpackage_t_ptex(%esi),%esi 850 jmp aff8entryvec_table(,%eax,4) 851 852 // %bx = count of full and partial loops 853 // %ebx high word = sfrac 854 // %ecx = pz 855 // %dx = light 856 // %edx high word = tfrac 857 // %esi = ptex 858 // %edi = pdest 859 // %ebp = 1/z 860 // tstep low word = C(r_lstepx) 861 // tstep high word = C(a_tstepxfrac) 862 // C(a_sstepxfrac) low word = 0 863 // C(a_sstepxfrac) high word = C(a_sstepxfrac) 864 865 LDrawLoop: 866 867 // FIXME: do we need to clamp light? We may need at least a buffer bit to 868 // keep it from poking into tfrac and causing problems 869 870 LDraw8: 871 cmpw (%ecx),%bp 872 jl Lp1 873 xorl %eax,%eax 874 movb %dh,%ah 875 movb (%esi),%al 876 movw %bp,(%ecx) 877 movb 0x12345678(%eax),%al 878 LPatch8: 879 movb %al,(%edi) 880 Lp1: 881 addl tstep,%edx 882 sbbl %eax,%eax 883 addl lzistepx,%ebp 884 adcl $0,%ebp 885 addl C(a_sstepxfrac),%ebx 886 adcl advancetable+4(,%eax,4),%esi 887 888 LDraw7: 889 cmpw 2(%ecx),%bp 890 jl Lp2 891 xorl %eax,%eax 892 movb %dh,%ah 893 movb (%esi),%al 894 movw %bp,2(%ecx) 895 movb 0x12345678(%eax),%al 896 LPatch7: 897 movb %al,1(%edi) 898 Lp2: 899 addl tstep,%edx 900 sbbl %eax,%eax 901 addl lzistepx,%ebp 902 adcl $0,%ebp 903 addl C(a_sstepxfrac),%ebx 904 adcl advancetable+4(,%eax,4),%esi 905 906 LDraw6: 907 cmpw 4(%ecx),%bp 908 jl Lp3 909 xorl %eax,%eax 910 movb %dh,%ah 911 movb (%esi),%al 912 movw %bp,4(%ecx) 913 movb 0x12345678(%eax),%al 914 LPatch6: 915 movb %al,2(%edi) 916 Lp3: 917 addl tstep,%edx 918 sbbl %eax,%eax 919 addl lzistepx,%ebp 920 adcl $0,%ebp 921 addl C(a_sstepxfrac),%ebx 922 adcl advancetable+4(,%eax,4),%esi 923 924 LDraw5: 925 cmpw 6(%ecx),%bp 926 jl Lp4 927 xorl %eax,%eax 928 movb %dh,%ah 929 movb (%esi),%al 930 movw %bp,6(%ecx) 931 movb 0x12345678(%eax),%al 932 LPatch5: 933 movb %al,3(%edi) 934 Lp4: 935 addl tstep,%edx 936 sbbl %eax,%eax 937 addl lzistepx,%ebp 938 adcl $0,%ebp 939 addl C(a_sstepxfrac),%ebx 940 adcl advancetable+4(,%eax,4),%esi 941 942 LDraw4: 943 cmpw 8(%ecx),%bp 944 jl Lp5 945 xorl %eax,%eax 946 movb %dh,%ah 947 movb (%esi),%al 948 movw %bp,8(%ecx) 949 movb 0x12345678(%eax),%al 950 LPatch4: 951 movb %al,4(%edi) 952 Lp5: 953 addl tstep,%edx 954 sbbl %eax,%eax 955 addl lzistepx,%ebp 956 adcl $0,%ebp 957 addl C(a_sstepxfrac),%ebx 958 adcl advancetable+4(,%eax,4),%esi 959 960 LDraw3: 961 cmpw 10(%ecx),%bp 962 jl Lp6 963 xorl %eax,%eax 964 movb %dh,%ah 965 movb (%esi),%al 966 movw %bp,10(%ecx) 967 movb 0x12345678(%eax),%al 968 LPatch3: 969 movb %al,5(%edi) 970 Lp6: 971 addl tstep,%edx 972 sbbl %eax,%eax 973 addl lzistepx,%ebp 974 adcl $0,%ebp 975 addl C(a_sstepxfrac),%ebx 976 adcl advancetable+4(,%eax,4),%esi 977 978 LDraw2: 979 cmpw 12(%ecx),%bp 980 jl Lp7 981 xorl %eax,%eax 982 movb %dh,%ah 983 movb (%esi),%al 984 movw %bp,12(%ecx) 985 movb 0x12345678(%eax),%al 986 LPatch2: 987 movb %al,6(%edi) 988 Lp7: 989 addl tstep,%edx 990 sbbl %eax,%eax 991 addl lzistepx,%ebp 992 adcl $0,%ebp 993 addl C(a_sstepxfrac),%ebx 994 adcl advancetable+4(,%eax,4),%esi 995 996 LDraw1: 997 cmpw 14(%ecx),%bp 998 jl Lp8 999 xorl %eax,%eax 1000 movb %dh,%ah 1001 movb (%esi),%al 1002 movw %bp,14(%ecx) 1003 movb 0x12345678(%eax),%al 1004 LPatch1: 1005 movb %al,7(%edi) 1006 Lp8: 1007 addl tstep,%edx 1008 sbbl %eax,%eax 1009 addl lzistepx,%ebp 1010 adcl $0,%ebp 1011 addl C(a_sstepxfrac),%ebx 1012 adcl advancetable+4(,%eax,4),%esi 1013 1014 addl $8,%edi 1015 addl $16,%ecx 1016 1017 decw %bx 1018 jnz LDrawLoop 1019 1020 popl %esi // restore spans pointer 1021 LNextSpan: 1022 addl $(spanpackage_t_size),%esi // point to next span 1023 LNextSpanESISet: 1024 movl spanpackage_t_count(%esi),%edx 1025 cmpl $-999999,%edx // any more spans? 1026 jnz LSpanLoop // yes 1027 1028 popl %edi 1029 popl %ebp // restore the caller's stack frame 1030 popl %ebx // restore register variables 1031 popl %esi 1032 ret 1033 1034 1035 // draw a one-long span 1036 1037 LExactlyOneLong: 1038 1039 movl spanpackage_t_pz(%esi),%ecx 1040 movl spanpackage_t_zi(%esi),%ebp 1041 1042 rorl $16,%ebp // put high 16 bits of 1/z in low word 1043 movl spanpackage_t_ptex(%esi),%ebx 1044 1045 cmpw (%ecx),%bp 1046 jl LNextSpan 1047 xorl %eax,%eax 1048 movl spanpackage_t_pdest(%esi),%edi 1049 movb spanpackage_t_light+1(%esi),%ah 1050 addl $(spanpackage_t_size),%esi // point to next span 1051 movb (%ebx),%al 1052 movw %bp,(%ecx) 1053 movb 0x12345678(%eax),%al 1054 LPatch9: 1055 movb %al,(%edi) 1056 1057 jmp LNextSpanESISet 1058 1059 .globl C(D_PolysetAff8End) 1060 C(D_PolysetAff8End): 1061 1062 1063 #define pcolormap 4 1064 1065 .globl C(D_Aff8Patch) 1066 C(D_Aff8Patch): 1067 movl pcolormap(%esp),%eax 1068 movl %eax,LPatch1-4 1069 movl %eax,LPatch2-4 1070 movl %eax,LPatch3-4 1071 movl %eax,LPatch4-4 1072 movl %eax,LPatch5-4 1073 movl %eax,LPatch6-4 1074 movl %eax,LPatch7-4 1075 movl %eax,LPatch8-4 1076 movl %eax,LPatch9-4 1077 1078 ret 1079 1080 1081 //---------------------------------------------------------------------- 1082 // Alias model polygon dispatching code, combined with subdivided affine 1083 // triangle drawing code 1084 //---------------------------------------------------------------------- 1085 1086 .globl C(D_PolysetDraw) 1087 C(D_PolysetDraw): 1088 1089 // spanpackage_t spans[DPS_MAXSPANS + 1 + 1090 // ((CACHE_SIZE - 1) / sizeof(spanpackage_t)) + 1]; 1091 // // one extra because of cache line pretouching 1092 // 1093 // a_spans = (spanpackage_t *) 1094 // (((long)&spans[0] + CACHE_SIZE - 1) & ~(CACHE_SIZE - 1)); 1095 subl $(SPAN_SIZE),%esp 1096 movl %esp,%eax 1097 addl $(CACHE_SIZE - 1),%eax 1098 andl $(~(CACHE_SIZE - 1)),%eax 1099 movl %eax,C(a_spans) 1100 1101 // if (r_affinetridesc.drawtype) 1102 // D_DrawSubdiv (); 1103 // else 1104 // D_DrawNonSubdiv (); 1105 movl C(r_affinetridesc)+atd_drawtype,%eax 1106 testl %eax,%eax 1107 jz C(D_DrawNonSubdiv) 1108 1109 pushl %ebp // preserve caller stack frame pointer 1110 1111 // lnumtriangles = r_affinetridesc.numtriangles; 1112 movl C(r_affinetridesc)+atd_numtriangles,%ebp 1113 1114 pushl %esi // preserve register variables 1115 shll $4,%ebp 1116 1117 pushl %ebx 1118 // ptri = r_affinetridesc.ptriangles; 1119 movl C(r_affinetridesc)+atd_ptriangles,%ebx 1120 1121 pushl %edi 1122 1123 // mtriangle_t *ptri; 1124 // finalvert_t *pfv, *index0, *index1, *index2; 1125 // int i; 1126 // int lnumtriangles; 1127 // int s0, s1, s2; 1128 1129 // pfv = r_affinetridesc.pfinalverts; 1130 movl C(r_affinetridesc)+atd_pfinalverts,%edi 1131 1132 // for (i=0 ; i<lnumtriangles ; i++) 1133 // { 1134 1135 Llooptop: 1136 1137 // index0 = pfv + ptri[i].vertindex[0]; 1138 // index1 = pfv + ptri[i].vertindex[1]; 1139 // index2 = pfv + ptri[i].vertindex[2]; 1140 movl mtri_vertindex-16+0(%ebx,%ebp,),%ecx 1141 movl mtri_vertindex-16+4(%ebx,%ebp,),%esi 1142 1143 shll $(fv_shift),%ecx 1144 movl mtri_vertindex-16+8(%ebx,%ebp,),%edx 1145 1146 shll $(fv_shift),%esi 1147 addl %edi,%ecx 1148 1149 shll $(fv_shift),%edx 1150 addl %edi,%esi 1151 1152 addl %edi,%edx 1153 1154 // if (((index0->v[1]-index1->v[1]) * 1155 // (index0->v[0]-index2->v[0]) - 1156 // (index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1])) >= 0) 1157 // { 1158 // continue; 1159 // } 1160 // 1161 // d_pcolormap = &((byte *)acolormap)[index0->v[4] & 0xFF00]; 1162 fildl fv_v+4(%ecx) // i0v1 1163 fildl fv_v+4(%esi) // i1v1 | i0v1 1164 fildl fv_v+0(%ecx) // i0v0 | i1v1 | i0v1 1165 fildl fv_v+0(%edx) // i2v0 | i0v0 | i1v1 | i0v1 1166 fxch %st(2) // i1v1 | i0v0 | i2v0 | i0v1 1167 fsubr %st(3),%st(0) // i0v1-i1v1 | i0v0 | i2v0 | i0v1 1168 fildl fv_v+0(%esi) // i1v0 | i0v1-i1v1 | i0v0 | i2v0 | i0v1 1169 fxch %st(2) // i0v0 | i0v1-i1v1 | i1v0 | i2v0 | i0v1 1170 fsub %st(0),%st(3) // i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0 | i0v1 1171 fildl fv_v+4(%edx) // i2v1 | i0v0 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1 1172 fxch %st(1) // i0v0 | i2v1 | i0v1-i1v1 | i1v0 | i0v0-i2v0| i0v1 1173 fsubp %st(0),%st(3) // i2v1 | i0v1-i1v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1 1174 fxch %st(1) // i0v1-i1v1 | i2v1 | i0v0-i1v0 | i0v0-i2v0 | i0v1 1175 fmulp %st(0),%st(3) // i2v1 | i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1 1176 fsubrp %st(0),%st(3) // i0v0-i1v0 | i0v1-i1v1*i0v0-i2v0 | i0v1-i2v1 1177 movl fv_v+16(%ecx),%eax 1178 andl $0xFF00,%eax 1179 fmulp %st(0),%st(2) // i0v1-i1v1*i0v0-i2v0 | i0v0-i1v0*i0v1-i2v1 1180 addl C(acolormap),%eax 1181 fsubp %st(0),%st(1) // (i0v1-i1v1)*(i0v0-i2v0)-(i0v0-i1v0)*(i0v1-i2v1) 1182 movl %eax,C(d_pcolormap) 1183 fstps Ltemp 1184 movl Ltemp,%eax 1185 subl $0x80000001,%eax 1186 jc Lskip 1187 1188 // if (ptri[i].facesfront) 1189 // { 1190 // D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v); 1191 movl mtri_facesfront-16(%ebx,%ebp,),%eax 1192 testl %eax,%eax 1193 jz Lfacesback 1194 1195 pushl %edx 1196 pushl %esi 1197 pushl %ecx 1198 call C(D_PolysetRecursiveTriangle) 1199 1200 subl $16,%ebp 1201 jnz Llooptop 1202 jmp Ldone2 1203 1204 // } 1205 // else 1206 // { 1207 Lfacesback: 1208 1209 // s0 = index0->v[2]; 1210 // s1 = index1->v[2]; 1211 // s2 = index2->v[2]; 1212 movl fv_v+8(%ecx),%eax 1213 pushl %eax 1214 movl fv_v+8(%esi),%eax 1215 pushl %eax 1216 movl fv_v+8(%edx),%eax 1217 pushl %eax 1218 pushl %ecx 1219 pushl %edx 1220 1221 // if (index0->flags & ALIAS_ONSEAM) 1222 // index0->v[2] += r_affinetridesc.seamfixupX16; 1223 movl C(r_affinetridesc)+atd_seamfixupX16,%eax 1224 testl $(ALIAS_ONSEAM),fv_flags(%ecx) 1225 jz Lp11 1226 addl %eax,fv_v+8(%ecx) 1227 Lp11: 1228 1229 // if (index1->flags & ALIAS_ONSEAM) 1230 // index1->v[2] += r_affinetridesc.seamfixupX16; 1231 testl $(ALIAS_ONSEAM),fv_flags(%esi) 1232 jz Lp12 1233 addl %eax,fv_v+8(%esi) 1234 Lp12: 1235 1236 // if (index2->flags & ALIAS_ONSEAM) 1237 // index2->v[2] += r_affinetridesc.seamfixupX16; 1238 testl $(ALIAS_ONSEAM),fv_flags(%edx) 1239 jz Lp13 1240 addl %eax,fv_v+8(%edx) 1241 Lp13: 1242 1243 // D_PolysetRecursiveTriangle(index0->v, index1->v, index2->v); 1244 pushl %edx 1245 pushl %esi 1246 pushl %ecx 1247 call C(D_PolysetRecursiveTriangle) 1248 1249 // index0->v[2] = s0; 1250 // index1->v[2] = s1; 1251 // index2->v[2] = s2; 1252 popl %edx 1253 popl %ecx 1254 popl %eax 1255 movl %eax,fv_v+8(%edx) 1256 popl %eax 1257 movl %eax,fv_v+8(%esi) 1258 popl %eax 1259 movl %eax,fv_v+8(%ecx) 1260 1261 // } 1262 // } 1263 Lskip: 1264 subl $16,%ebp 1265 jnz Llooptop 1266 1267 Ldone2: 1268 popl %edi // restore the caller's stack frame 1269 popl %ebx 1270 popl %esi // restore register variables 1271 popl %ebp 1272 1273 addl $(SPAN_SIZE),%esp 1274 1275 ret 1276 1277 1278 //---------------------------------------------------------------------- 1279 // Alias model triangle left-edge scanning code 1280 //---------------------------------------------------------------------- 1281 1282 #define height 4+16 1283 1284 .globl C(D_PolysetScanLeftEdge) 1285 C(D_PolysetScanLeftEdge): 1286 pushl %ebp // preserve caller stack frame pointer 1287 pushl %esi // preserve register variables 1288 pushl %edi 1289 pushl %ebx 1290 1291 movl height(%esp),%eax 1292 movl C(d_sfrac),%ecx 1293 andl $0xFFFF,%eax 1294 movl C(d_ptex),%ebx 1295 orl %eax,%ecx 1296 movl C(d_pedgespanpackage),%esi 1297 movl C(d_tfrac),%edx 1298 movl C(d_light),%edi 1299 movl C(d_zi),%ebp 1300 1301 // %eax: scratch 1302 // %ebx: d_ptex 1303 // %ecx: d_sfrac in high word, count in low word 1304 // %edx: d_tfrac 1305 // %esi: d_pedgespanpackage, errorterm, scratch alternately 1306 // %edi: d_light 1307 // %ebp: d_zi 1308 1309 // do 1310 // { 1311 1312 LScanLoop: 1313 1314 // d_pedgespanpackage->ptex = ptex; 1315 // d_pedgespanpackage->pdest = d_pdest; 1316 // d_pedgespanpackage->pz = d_pz; 1317 // d_pedgespanpackage->count = d_aspancount; 1318 // d_pedgespanpackage->light = d_light; 1319 // d_pedgespanpackage->zi = d_zi; 1320 // d_pedgespanpackage->sfrac = d_sfrac << 16; 1321 // d_pedgespanpackage->tfrac = d_tfrac << 16; 1322 movl %ebx,spanpackage_t_ptex(%esi) 1323 movl C(d_pdest),%eax 1324 movl %eax,spanpackage_t_pdest(%esi) 1325 movl C(d_pz),%eax 1326 movl %eax,spanpackage_t_pz(%esi) 1327 movl C(d_aspancount),%eax 1328 movl %eax,spanpackage_t_count(%esi) 1329 movl %edi,spanpackage_t_light(%esi) 1330 movl %ebp,spanpackage_t_zi(%esi) 1331 movl %ecx,spanpackage_t_sfrac(%esi) 1332 movl %edx,spanpackage_t_tfrac(%esi) 1333 1334 // pretouch the next cache line 1335 movb spanpackage_t_size(%esi),%al 1336 1337 // d_pedgespanpackage++; 1338 addl $(spanpackage_t_size),%esi 1339 movl C(erroradjustup),%eax 1340 movl %esi,C(d_pedgespanpackage) 1341 1342 // errorterm += erroradjustup; 1343 movl C(errorterm),%esi 1344 addl %eax,%esi 1345 movl C(d_pdest),%eax 1346 1347 // if (errorterm >= 0) 1348 // { 1349 js LNoLeftEdgeTurnover 1350 1351 // errorterm -= erroradjustdown; 1352 // d_pdest += d_pdestextrastep; 1353 subl C(erroradjustdown),%esi 1354 addl C(d_pdestextrastep),%eax 1355 movl %esi,C(errorterm) 1356 movl %eax,C(d_pdest) 1357 1358 // d_pz += d_pzextrastep; 1359 // d_aspancount += d_countextrastep; 1360 // d_ptex += d_ptexextrastep; 1361 // d_sfrac += d_sfracextrastep; 1362 // d_ptex += d_sfrac >> 16; 1363 // d_sfrac &= 0xFFFF; 1364 // d_tfrac += d_tfracextrastep; 1365 movl C(d_pz),%eax 1366 movl C(d_aspancount),%esi 1367 addl C(d_pzextrastep),%eax 1368 addl C(d_sfracextrastep),%ecx 1369 adcl C(d_ptexextrastep),%ebx 1370 addl C(d_countextrastep),%esi 1371 movl %eax,C(d_pz) 1372 movl C(d_tfracextrastep),%eax 1373 movl %esi,C(d_aspancount) 1374 addl %eax,%edx 1375 1376 // if (d_tfrac & 0x10000) 1377 // { 1378 jnc LSkip1 1379 1380 // d_ptex += r_affinetridesc.skinwidth; 1381 // d_tfrac &= 0xFFFF; 1382 addl C(r_affinetridesc)+atd_skinwidth,%ebx 1383 1384 // } 1385 1386 LSkip1: 1387 1388 // d_light += d_lightextrastep; 1389 // d_zi += d_ziextrastep; 1390 addl C(d_lightextrastep),%edi 1391 addl C(d_ziextrastep),%ebp 1392 1393 // } 1394 movl C(d_pedgespanpackage),%esi 1395 decl %ecx 1396 testl $0xFFFF,%ecx 1397 jnz LScanLoop 1398 1399 popl %ebx 1400 popl %edi 1401 popl %esi 1402 popl %ebp 1403 ret 1404 1405 // else 1406 // { 1407 1408 LNoLeftEdgeTurnover: 1409 movl %esi,C(errorterm) 1410 1411 // d_pdest += d_pdestbasestep; 1412 addl C(d_pdestbasestep),%eax 1413 movl %eax,C(d_pdest) 1414 1415 // d_pz += d_pzbasestep; 1416 // d_aspancount += ubasestep; 1417 // d_ptex += d_ptexbasestep; 1418 // d_sfrac += d_sfracbasestep; 1419 // d_ptex += d_sfrac >> 16; 1420 // d_sfrac &= 0xFFFF; 1421 movl C(d_pz),%eax 1422 movl C(d_aspancount),%esi 1423 addl C(d_pzbasestep),%eax 1424 addl C(d_sfracbasestep),%ecx 1425 adcl C(d_ptexbasestep),%ebx 1426 addl C(ubasestep),%esi 1427 movl %eax,C(d_pz) 1428 movl %esi,C(d_aspancount) 1429 1430 // d_tfrac += d_tfracbasestep; 1431 movl C(d_tfracbasestep),%esi 1432 addl %esi,%edx 1433 1434 // if (d_tfrac & 0x10000) 1435 // { 1436 jnc LSkip2 1437 1438 // d_ptex += r_affinetridesc.skinwidth; 1439 // d_tfrac &= 0xFFFF; 1440 addl C(r_affinetridesc)+atd_skinwidth,%ebx 1441 1442 // } 1443 1444 LSkip2: 1445 1446 // d_light += d_lightbasestep; 1447 // d_zi += d_zibasestep; 1448 addl C(d_lightbasestep),%edi 1449 addl C(d_zibasestep),%ebp 1450 1451 // } 1452 // } while (--height); 1453 movl C(d_pedgespanpackage),%esi 1454 decl %ecx 1455 testl $0xFFFF,%ecx 1456 jnz LScanLoop 1457 1458 popl %ebx 1459 popl %edi 1460 popl %esi 1461 popl %ebp 1462 ret 1463 1464 1465 //---------------------------------------------------------------------- 1466 // Alias model vertex drawing code 1467 //---------------------------------------------------------------------- 1468 1469 #define fv 4+8 1470 #define numverts 8+8 1471 1472 .globl C(D_PolysetDrawFinalVerts) 1473 C(D_PolysetDrawFinalVerts): 1474 pushl %ebp // preserve caller stack frame pointer 1475 pushl %ebx 1476 1477 // int i, z; 1478 // short *zbuf; 1479 1480 movl numverts(%esp),%ecx 1481 movl fv(%esp),%ebx 1482 1483 pushl %esi // preserve register variables 1484 pushl %edi 1485 1486 LFVLoop: 1487 1488 // for (i=0 ; i<numverts ; i++, fv++) 1489 // { 1490 // // valid triangle coordinates for filling can include the bottom and 1491 // // right clip edges, due to the fill rule; these shouldn't be drawn 1492 // if ((fv->v[0] < r_refdef.vrectright) && 1493 // (fv->v[1] < r_refdef.vrectbottom)) 1494 // { 1495 movl fv_v+0(%ebx),%eax 1496 movl C(r_refdef)+rd_vrectright,%edx 1497 cmpl %edx,%eax 1498 jge LNextVert 1499 movl fv_v+4(%ebx),%esi 1500 movl C(r_refdef)+rd_vrectbottom,%edx 1501 cmpl %edx,%esi 1502 jge LNextVert 1503 1504 // zbuf = zspantable[fv->v[1]] + fv->v[0]; 1505 movl C(zspantable)(,%esi,4),%edi 1506 1507 // z = fv->v[5]>>16; 1508 movl fv_v+20(%ebx),%edx 1509 shrl $16,%edx 1510 1511 // if (z >= *zbuf) 1512 // { 1513 // int pix; 1514 cmpw (%edi,%eax,2),%dx 1515 jl LNextVert 1516 1517 // *zbuf = z; 1518 movw %dx,(%edi,%eax,2) 1519 1520 // pix = skintable[fv->v[3]>>16][fv->v[2]>>16]; 1521 movl fv_v+12(%ebx),%edi 1522 shrl $16,%edi 1523 movl C(skintable)(,%edi,4),%edi 1524 movl fv_v+8(%ebx),%edx 1525 shrl $16,%edx 1526 movb (%edi,%edx),%dl 1527 1528 // pix = ((byte *)acolormap)[pix + (fv->v[4] & 0xFF00)]; 1529 movl fv_v+16(%ebx),%edi 1530 andl $0xFF00,%edi 1531 andl $0x00FF,%edx 1532 addl %edx,%edi 1533 movl C(acolormap),%edx 1534 movb (%edx,%edi,1),%dl 1535 1536 // d_viewbuffer[d_scantable[fv->v[1]] + fv->v[0]] = pix; 1537 movl C(d_scantable)(,%esi,4),%edi 1538 movl C(d_viewbuffer),%esi 1539 addl %eax,%edi 1540 movb %dl,(%esi,%edi) 1541 1542 // } 1543 // } 1544 // } 1545 LNextVert: 1546 addl $(fv_size),%ebx 1547 decl %ecx 1548 jnz LFVLoop 1549 1550 popl %edi 1551 popl %esi 1552 popl %ebx 1553 popl %ebp 1554 ret 1555 1556 1557 //---------------------------------------------------------------------- 1558 // Alias model non-subdivided polygon dispatching code 1559 // 1560 // not C-callable because of stack buffer cleanup 1561 //---------------------------------------------------------------------- 1562 1563 .globl C(D_DrawNonSubdiv) 1564 C(D_DrawNonSubdiv): 1565 pushl %ebp // preserve caller stack frame pointer 1566 movl C(r_affinetridesc)+atd_numtriangles,%ebp 1567 pushl %ebx 1568 shll $(mtri_shift),%ebp 1569 pushl %esi // preserve register variables 1570 movl C(r_affinetridesc)+atd_ptriangles,%esi 1571 pushl %edi 1572 1573 // mtriangle_t *ptri; 1574 // finalvert_t *pfv, *index0, *index1, *index2; 1575 // int i; 1576 // int lnumtriangles; 1577 1578 // pfv = r_affinetridesc.pfinalverts; 1579 // ptri = r_affinetridesc.ptriangles; 1580 // lnumtriangles = r_affinetridesc.numtriangles; 1581 1582 LNDLoop: 1583 1584 // for (i=0 ; i<lnumtriangles ; i++, ptri++) 1585 // { 1586 // index0 = pfv + ptri->vertindex[0]; 1587 // index1 = pfv + ptri->vertindex[1]; 1588 // index2 = pfv + ptri->vertindex[2]; 1589 movl C(r_affinetridesc)+atd_pfinalverts,%edi 1590 movl mtri_vertindex+0-mtri_size(%esi,%ebp,1),%ecx 1591 shll $(fv_shift),%ecx 1592 movl mtri_vertindex+4-mtri_size(%esi,%ebp,1),%edx 1593 shll $(fv_shift),%edx 1594 movl mtri_vertindex+8-mtri_size(%esi,%ebp,1),%ebx 1595 shll $(fv_shift),%ebx 1596 addl %edi,%ecx 1597 addl %edi,%edx 1598 addl %edi,%ebx 1599 1600 // d_xdenom = (index0->v[1]-index1->v[1]) * 1601 // (index0->v[0]-index2->v[0]) - 1602 // (index0->v[0]-index1->v[0])*(index0->v[1]-index2->v[1]); 1603 movl fv_v+4(%ecx),%eax 1604 movl fv_v+0(%ecx),%esi 1605 subl fv_v+4(%edx),%eax 1606 subl fv_v+0(%ebx),%esi 1607 imull %esi,%eax 1608 movl fv_v+0(%ecx),%esi 1609 movl fv_v+4(%ecx),%edi 1610 subl fv_v+0(%edx),%esi 1611 subl fv_v+4(%ebx),%edi 1612 imull %esi,%edi 1613 subl %edi,%eax 1614 1615 // if (d_xdenom >= 0) 1616 // { 1617 // continue; 1618 jns LNextTri 1619 1620 // } 1621 1622 movl %eax,C(d_xdenom) 1623 fildl C(d_xdenom) 1624 1625 // r_p0[0] = index0->v[0]; // u 1626 // r_p0[1] = index0->v[1]; // v 1627 // r_p0[2] = index0->v[2]; // s 1628 // r_p0[3] = index0->v[3]; // t 1629 // r_p0[4] = index0->v[4]; // light 1630 // r_p0[5] = index0->v[5]; // iz 1631 movl fv_v+0(%ecx),%eax 1632 movl fv_v+4(%ecx),%esi 1633 movl %eax,C(r_p0)+0 1634 movl %esi,C(r_p0)+4 1635 movl fv_v+8(%ecx),%eax 1636 movl fv_v+12(%ecx),%esi 1637 movl %eax,C(r_p0)+8 1638 movl %esi,C(r_p0)+12 1639 movl fv_v+16(%ecx),%eax 1640 movl fv_v+20(%ecx),%esi 1641 movl %eax,C(r_p0)+16 1642 movl %esi,C(r_p0)+20 1643 1644 fdivrs float_1 1645 1646 // r_p1[0] = index1->v[0]; 1647 // r_p1[1] = index1->v[1]; 1648 // r_p1[2] = index1->v[2]; 1649 // r_p1[3] = index1->v[3]; 1650 // r_p1[4] = index1->v[4]; 1651 // r_p1[5] = index1->v[5]; 1652 movl fv_v+0(%edx),%eax 1653 movl fv_v+4(%edx),%esi 1654 movl %eax,C(r_p1)+0 1655 movl %esi,C(r_p1)+4 1656 movl fv_v+8(%edx),%eax 1657 movl fv_v+12(%edx),%esi 1658 movl %eax,C(r_p1)+8 1659 movl %esi,C(r_p1)+12 1660 movl fv_v+16(%edx),%eax 1661 movl fv_v+20(%edx),%esi 1662 movl %eax,C(r_p1)+16 1663 movl %esi,C(r_p1)+20 1664 1665 // r_p2[0] = index2->v[0]; 1666 // r_p2[1] = index2->v[1]; 1667 // r_p2[2] = index2->v[2]; 1668 // r_p2[3] = index2->v[3]; 1669 // r_p2[4] = index2->v[4]; 1670 // r_p2[5] = index2->v[5]; 1671 movl fv_v+0(%ebx),%eax 1672 movl fv_v+4(%ebx),%esi 1673 movl %eax,C(r_p2)+0 1674 movl %esi,C(r_p2)+4 1675 movl fv_v+8(%ebx),%eax 1676 movl fv_v+12(%ebx),%esi 1677 movl %eax,C(r_p2)+8 1678 movl %esi,C(r_p2)+12 1679 movl fv_v+16(%ebx),%eax 1680 movl fv_v+20(%ebx),%esi 1681 movl %eax,C(r_p2)+16 1682 movl C(r_affinetridesc)+atd_ptriangles,%edi 1683 movl %esi,C(r_p2)+20 1684 movl mtri_facesfront-mtri_size(%edi,%ebp,1),%eax 1685 1686 // if (!ptri->facesfront) 1687 // { 1688 testl %eax,%eax 1689 jnz LFacesFront 1690 1691 // if (index0->flags & ALIAS_ONSEAM) 1692 // r_p0[2] += r_affinetridesc.seamfixupX16; 1693 movl fv_flags(%ecx),%eax 1694 movl fv_flags(%edx),%esi 1695 movl fv_flags(%ebx),%edi 1696 testl $(ALIAS_ONSEAM),%eax 1697 movl C(r_affinetridesc)+atd_seamfixupX16,%eax 1698 jz LOnseamDone0 1699 addl %eax,C(r_p0)+8 1700 LOnseamDone0: 1701 1702 // if (index1->flags & ALIAS_ONSEAM) 1703 // r_p1[2] += r_affinetridesc.seamfixupX16; 1704 testl $(ALIAS_ONSEAM),%esi 1705 jz LOnseamDone1 1706 addl %eax,C(r_p1)+8 1707 LOnseamDone1: 1708 1709 // if (index2->flags & ALIAS_ONSEAM) 1710 // r_p2[2] += r_affinetridesc.seamfixupX16; 1711 testl $(ALIAS_ONSEAM),%edi 1712 jz LOnseamDone2 1713 addl %eax,C(r_p2)+8 1714 LOnseamDone2: 1715 1716 // } 1717 1718 LFacesFront: 1719 1720 fstps C(d_xdenom) 1721 1722 // D_PolysetSetEdgeTable (); 1723 // D_RasterizeAliasPolySmooth (); 1724 call C(D_PolysetSetEdgeTable) 1725 call C(D_RasterizeAliasPolySmooth) 1726 1727 LNextTri: 1728 movl C(r_affinetridesc)+atd_ptriangles,%esi 1729 subl $16,%ebp 1730 jnz LNDLoop 1731 // } 1732 1733 popl %edi 1734 popl %esi 1735 popl %ebx 1736 popl %ebp 1737 1738 addl $(SPAN_SIZE),%esp 1739 1740 ret 1741 1742 1743 #endif // id386 1744 1745