1 2 /* FF is big and ugly so feel free to write lines as long as you like. 3 * Aieeeeeeeee ! 4 * 5 * Let me make that clearer: 6 * Aieeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee ! !! !!! 7 */ 8 9 #include "device9.h" 10 #include "basetexture9.h" 11 #include "vertexdeclaration9.h" 12 #include "vertexshader9.h" 13 #include "pixelshader9.h" 14 #include "nine_ff.h" 15 #include "nine_defines.h" 16 #include "nine_helpers.h" 17 #include "nine_pipe.h" 18 #include "nine_dump.h" 19 20 #include "pipe/p_context.h" 21 #include "tgsi/tgsi_ureg.h" 22 #include "tgsi/tgsi_dump.h" 23 #include "util/u_box.h" 24 #include "util/u_hash_table.h" 25 #include "util/u_upload_mgr.h" 26 27 #define DBG_CHANNEL DBG_FF 28 29 #define NINE_FF_NUM_VS_CONST 196 30 #define NINE_FF_NUM_PS_CONST 24 31 32 struct fvec4 33 { 34 float x, y, z, w; 35 }; 36 37 struct nine_ff_vs_key 38 { 39 union { 40 struct { 41 uint32_t position_t : 1; 42 uint32_t lighting : 1; 43 uint32_t darkness : 1; /* lighting enabled but no active lights */ 44 uint32_t localviewer : 1; 45 uint32_t vertexpointsize : 1; 46 uint32_t pointscale : 1; 47 uint32_t vertexblend : 3; 48 uint32_t vertexblend_indexed : 1; 49 uint32_t vertextween : 1; 50 uint32_t mtl_diffuse : 2; /* 0 = material, 1 = color1, 2 = color2 */ 51 uint32_t mtl_ambient : 2; 52 uint32_t mtl_specular : 2; 53 uint32_t mtl_emissive : 2; 54 uint32_t fog_mode : 2; 55 uint32_t fog_range : 1; 56 uint32_t color0in_one : 1; 57 uint32_t color1in_zero : 1; 58 uint32_t has_normal : 1; 59 uint32_t fog : 1; 60 uint32_t normalizenormals : 1; 61 uint32_t ucp : 1; 62 uint32_t pad1 : 4; 63 uint32_t tc_dim_input: 16; /* 8 * 2 bits */ 64 uint32_t pad2 : 16; 65 uint32_t tc_dim_output: 24; /* 8 * 3 bits */ 66 uint32_t pad3 : 8; 67 uint32_t tc_gen : 24; /* 8 * 3 bits */ 68 uint32_t pad4 : 8; 69 uint32_t tc_idx : 24; 70 uint32_t pad5 : 8; 71 uint32_t passthrough; 72 }; 73 uint64_t value64[3]; /* don't forget to resize VertexShader9.ff_key */ 74 uint32_t value32[6]; 75 }; 76 }; 77 78 /* Texture stage state: 79 * 80 * COLOROP D3DTOP 5 bit 81 * ALPHAOP D3DTOP 5 bit 82 * COLORARG0 D3DTA 3 bit 83 * COLORARG1 D3DTA 3 bit 84 * COLORARG2 D3DTA 3 bit 85 * ALPHAARG0 D3DTA 3 bit 86 * ALPHAARG1 D3DTA 3 bit 87 * ALPHAARG2 D3DTA 3 bit 88 * RESULTARG D3DTA 1 bit (CURRENT:0 or TEMP:1) 89 * TEXCOORDINDEX 0 - 7 3 bit 90 * =========================== 91 * 32 bit per stage 92 */ 93 struct nine_ff_ps_key 94 { 95 union { 96 struct { 97 struct { 98 uint32_t colorop : 5; 99 uint32_t alphaop : 5; 100 uint32_t colorarg0 : 3; 101 uint32_t colorarg1 : 3; 102 uint32_t colorarg2 : 3; 103 uint32_t alphaarg0 : 3; 104 uint32_t alphaarg1 : 3; 105 uint32_t alphaarg2 : 3; 106 uint32_t resultarg : 1; /* CURRENT:0 or TEMP:1 */ 107 uint32_t textarget : 2; /* 1D/2D/3D/CUBE */ 108 uint32_t pad : 1; 109 /* that's 32 bit exactly */ 110 } ts[8]; 111 uint32_t projected : 16; 112 uint32_t fog : 1; /* for vFog coming from VS */ 113 uint32_t fog_mode : 2; 114 uint32_t fog_source : 1; /* 0: Z, 1: W */ 115 uint32_t specular : 1; 116 uint32_t pad1 : 11; /* 9 32-bit words with this */ 117 uint8_t colorarg_b4[3]; 118 uint8_t colorarg_b5[3]; 119 uint8_t alphaarg_b4[3]; /* 11 32-bit words plus a byte */ 120 uint8_t pad2[3]; 121 }; 122 uint64_t value64[6]; /* don't forget to resize PixelShader9.ff_key */ 123 uint32_t value32[12]; 124 }; 125 }; 126 127 static unsigned nine_ff_vs_key_hash(void *key) 128 { 129 struct nine_ff_vs_key *vs = key; 130 unsigned i; 131 uint32_t hash = vs->value32[0]; 132 for (i = 1; i < ARRAY_SIZE(vs->value32); ++i) 133 hash ^= vs->value32[i]; 134 return hash; 135 } 136 static int nine_ff_vs_key_comp(void *key1, void *key2) 137 { 138 struct nine_ff_vs_key *a = (struct nine_ff_vs_key *)key1; 139 struct nine_ff_vs_key *b = (struct nine_ff_vs_key *)key2; 140 141 return memcmp(a->value64, b->value64, sizeof(a->value64)); 142 } 143 static unsigned nine_ff_ps_key_hash(void *key) 144 { 145 struct nine_ff_ps_key *ps = key; 146 unsigned i; 147 uint32_t hash = ps->value32[0]; 148 for (i = 1; i < ARRAY_SIZE(ps->value32); ++i) 149 hash ^= ps->value32[i]; 150 return hash; 151 } 152 static int nine_ff_ps_key_comp(void *key1, void *key2) 153 { 154 struct nine_ff_ps_key *a = (struct nine_ff_ps_key *)key1; 155 struct nine_ff_ps_key *b = (struct nine_ff_ps_key *)key2; 156 157 return memcmp(a->value64, b->value64, sizeof(a->value64)); 158 } 159 static unsigned nine_ff_fvf_key_hash(void *key) 160 { 161 return *(DWORD *)key; 162 } 163 static int nine_ff_fvf_key_comp(void *key1, void *key2) 164 { 165 return *(DWORD *)key1 != *(DWORD *)key2; 166 } 167 168 static void nine_ff_prune_vs(struct NineDevice9 *); 169 static void nine_ff_prune_ps(struct NineDevice9 *); 170 171 static void nine_ureg_tgsi_dump(struct ureg_program *ureg, boolean override) 172 { 173 if (debug_get_bool_option("NINE_FF_DUMP", FALSE) || override) { 174 unsigned count; 175 const struct tgsi_token *toks = ureg_get_tokens(ureg, &count); 176 tgsi_dump(toks, 0); 177 ureg_free_tokens(toks); 178 } 179 } 180 181 #define _X(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_X) 182 #define _Y(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Y) 183 #define _Z(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Z) 184 #define _W(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_W) 185 186 #define _XXXX(r) ureg_scalar(r, TGSI_SWIZZLE_X) 187 #define _YYYY(r) ureg_scalar(r, TGSI_SWIZZLE_Y) 188 #define _ZZZZ(r) ureg_scalar(r, TGSI_SWIZZLE_Z) 189 #define _WWWW(r) ureg_scalar(r, TGSI_SWIZZLE_W) 190 191 #define _XYZW(r) (r) 192 193 /* AL should contain base address of lights table. */ 194 #define LIGHT_CONST(i) \ 195 ureg_src_indirect(ureg_DECL_constant(ureg, i), _X(AL)) 196 197 #define MATERIAL_CONST(i) \ 198 ureg_DECL_constant(ureg, 19 + (i)) 199 200 #define _CONST(n) ureg_DECL_constant(ureg, n) 201 202 /* VS FF constants layout: 203 * 204 * CONST[ 0.. 3] D3DTS_WORLD * D3DTS_VIEW * D3DTS_PROJECTION 205 * CONST[ 4.. 7] D3DTS_WORLD * D3DTS_VIEW 206 * CONST[ 8..11] D3DTS_PROJECTION 207 * CONST[12..15] D3DTS_VIEW^(-1) 208 * CONST[16..18] Normal matrix 209 * 210 * CONST[19].xyz MATERIAL.Emissive + Material.Ambient * RS.Ambient 211 * CONST[20] MATERIAL.Diffuse 212 * CONST[21] MATERIAL.Ambient 213 * CONST[22] MATERIAL.Specular 214 * CONST[23].x___ MATERIAL.Power 215 * CONST[24] MATERIAL.Emissive 216 * CONST[25] RS.Ambient 217 * 218 * CONST[26].x___ RS.PointSizeMin 219 * CONST[26]._y__ RS.PointSizeMax 220 * CONST[26].__z_ RS.PointSize 221 * CONST[26].___w RS.PointScaleA 222 * CONST[27].x___ RS.PointScaleB 223 * CONST[27]._y__ RS.PointScaleC 224 * 225 * CONST[28].x___ RS.FogEnd 226 * CONST[28]._y__ 1.0f / (RS.FogEnd - RS.FogStart) 227 * CONST[28].__z_ RS.FogDensity 228 229 * CONST[30].x___ TWEENFACTOR 230 * 231 * CONST[32].x___ LIGHT[0].Type 232 * CONST[32]._yzw LIGHT[0].Attenuation0,1,2 233 * CONST[33] LIGHT[0].Diffuse 234 * CONST[34] LIGHT[0].Specular 235 * CONST[35] LIGHT[0].Ambient 236 * CONST[36].xyz_ LIGHT[0].Position 237 * CONST[36].___w LIGHT[0].Range 238 * CONST[37].xyz_ LIGHT[0].Direction 239 * CONST[37].___w LIGHT[0].Falloff 240 * CONST[38].x___ cos(LIGHT[0].Theta / 2) 241 * CONST[38]._y__ cos(LIGHT[0].Phi / 2) 242 * CONST[38].__z_ 1.0f / (cos(LIGHT[0].Theta / 2) - cos(Light[0].Phi / 2)) 243 * CONST[39].xyz_ LIGHT[0].HalfVector (for directional lights) 244 * CONST[39].___w 1 if this is the last active light, 0 if not 245 * CONST[40] LIGHT[1] 246 * CONST[48] LIGHT[2] 247 * CONST[56] LIGHT[3] 248 * CONST[64] LIGHT[4] 249 * CONST[72] LIGHT[5] 250 * CONST[80] LIGHT[6] 251 * CONST[88] LIGHT[7] 252 * NOTE: no lighting code is generated if there are no active lights 253 * 254 * CONST[100].x___ Viewport 2/width 255 * CONST[100]._y__ Viewport 2/height 256 * CONST[100].__z_ Viewport 1/(zmax - zmin) 257 * CONST[100].___w Viewport width 258 * CONST[101].x___ Viewport x0 259 * CONST[101]._y__ Viewport y0 260 * CONST[101].__z_ Viewport z0 261 * 262 * CONST[128..131] D3DTS_TEXTURE0 263 * CONST[132..135] D3DTS_TEXTURE1 264 * CONST[136..139] D3DTS_TEXTURE2 265 * CONST[140..143] D3DTS_TEXTURE3 266 * CONST[144..147] D3DTS_TEXTURE4 267 * CONST[148..151] D3DTS_TEXTURE5 268 * CONST[152..155] D3DTS_TEXTURE6 269 * CONST[156..159] D3DTS_TEXTURE7 270 * 271 * CONST[160] D3DTS_WORLDMATRIX[0] * D3DTS_VIEW 272 * CONST[164] D3DTS_WORLDMATRIX[1] * D3DTS_VIEW 273 * ... 274 * CONST[192] D3DTS_WORLDMATRIX[8] * D3DTS_VIEW 275 */ 276 struct vs_build_ctx 277 { 278 struct ureg_program *ureg; 279 const struct nine_ff_vs_key *key; 280 281 uint16_t input[PIPE_MAX_ATTRIBS]; 282 unsigned num_inputs; 283 284 struct ureg_src aVtx; 285 struct ureg_src aNrm; 286 struct ureg_src aCol[2]; 287 struct ureg_src aTex[8]; 288 struct ureg_src aPsz; 289 struct ureg_src aInd; 290 struct ureg_src aWgt; 291 292 struct ureg_src aVtx1; /* tweening */ 293 struct ureg_src aNrm1; 294 295 struct ureg_src mtlA; 296 struct ureg_src mtlD; 297 struct ureg_src mtlS; 298 struct ureg_src mtlE; 299 }; 300 301 static inline unsigned 302 get_texcoord_sn(struct pipe_screen *screen) 303 { 304 if (screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD)) 305 return TGSI_SEMANTIC_TEXCOORD; 306 return TGSI_SEMANTIC_GENERIC; 307 } 308 309 static inline struct ureg_src 310 build_vs_add_input(struct vs_build_ctx *vs, uint16_t ndecl) 311 { 312 const unsigned i = vs->num_inputs++; 313 assert(i < PIPE_MAX_ATTRIBS); 314 vs->input[i] = ndecl; 315 return ureg_DECL_vs_input(vs->ureg, i); 316 } 317 318 /* NOTE: dst may alias src */ 319 static inline void 320 ureg_normalize3(struct ureg_program *ureg, 321 struct ureg_dst dst, struct ureg_src src) 322 { 323 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 324 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 325 326 ureg_DP3(ureg, tmp_x, src, src); 327 ureg_RSQ(ureg, tmp_x, _X(tmp)); 328 ureg_MUL(ureg, dst, src, _X(tmp)); 329 ureg_release_temporary(ureg, tmp); 330 } 331 332 static void * 333 nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) 334 { 335 const struct nine_ff_vs_key *key = vs->key; 336 struct ureg_program *ureg = ureg_create(PIPE_SHADER_VERTEX); 337 struct ureg_dst oPos, oCol[2], oPsz, oFog; 338 struct ureg_dst AR; 339 unsigned i, c; 340 unsigned label[32], l = 0; 341 boolean need_aNrm = key->lighting || key->passthrough & (1 << NINE_DECLUSAGE_NORMAL); 342 boolean has_aNrm = need_aNrm && key->has_normal; 343 boolean need_aVtx = key->lighting || key->fog_mode || key->pointscale || key->ucp; 344 const unsigned texcoord_sn = get_texcoord_sn(device->screen); 345 346 vs->ureg = ureg; 347 348 /* Check which inputs we should transform. */ 349 for (i = 0; i < 8 * 3; i += 3) { 350 switch ((key->tc_gen >> i) & 0x7) { 351 case NINED3DTSS_TCI_CAMERASPACENORMAL: 352 need_aNrm = TRUE; 353 break; 354 case NINED3DTSS_TCI_CAMERASPACEPOSITION: 355 need_aVtx = TRUE; 356 break; 357 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR: 358 need_aVtx = need_aNrm = TRUE; 359 break; 360 case NINED3DTSS_TCI_SPHEREMAP: 361 need_aVtx = need_aNrm = TRUE; 362 break; 363 default: 364 break; 365 } 366 } 367 368 /* Declare and record used inputs (needed for linkage with vertex format): 369 * (texture coordinates handled later) 370 */ 371 vs->aVtx = build_vs_add_input(vs, 372 key->position_t ? NINE_DECLUSAGE_POSITIONT : NINE_DECLUSAGE_POSITION); 373 374 vs->aNrm = ureg_imm1f(ureg, 0.0f); 375 if (has_aNrm) 376 vs->aNrm = build_vs_add_input(vs, NINE_DECLUSAGE_NORMAL); 377 378 vs->aCol[0] = ureg_imm1f(ureg, 1.0f); 379 vs->aCol[1] = ureg_imm1f(ureg, 0.0f); 380 381 if (key->lighting || key->darkness) { 382 const unsigned mask = key->mtl_diffuse | key->mtl_specular | 383 key->mtl_ambient | key->mtl_emissive; 384 if ((mask & 0x1) && !key->color0in_one) 385 vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0)); 386 if ((mask & 0x2) && !key->color1in_zero) 387 vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1)); 388 389 vs->mtlD = MATERIAL_CONST(1); 390 vs->mtlA = MATERIAL_CONST(2); 391 vs->mtlS = MATERIAL_CONST(3); 392 vs->mtlE = MATERIAL_CONST(5); 393 if (key->mtl_diffuse == 1) vs->mtlD = vs->aCol[0]; else 394 if (key->mtl_diffuse == 2) vs->mtlD = vs->aCol[1]; 395 if (key->mtl_ambient == 1) vs->mtlA = vs->aCol[0]; else 396 if (key->mtl_ambient == 2) vs->mtlA = vs->aCol[1]; 397 if (key->mtl_specular == 1) vs->mtlS = vs->aCol[0]; else 398 if (key->mtl_specular == 2) vs->mtlS = vs->aCol[1]; 399 if (key->mtl_emissive == 1) vs->mtlE = vs->aCol[0]; else 400 if (key->mtl_emissive == 2) vs->mtlE = vs->aCol[1]; 401 } else { 402 if (!key->color0in_one) vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0)); 403 if (!key->color1in_zero) vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1)); 404 } 405 406 if (key->vertexpointsize) 407 vs->aPsz = build_vs_add_input(vs, NINE_DECLUSAGE_PSIZE); 408 409 if (key->vertexblend_indexed || key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) 410 vs->aInd = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDINDICES); 411 if (key->vertexblend || key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) 412 vs->aWgt = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDWEIGHT); 413 if (key->vertextween) { 414 vs->aVtx1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(POSITION,1)); 415 vs->aNrm1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(NORMAL,1)); 416 } 417 418 /* Declare outputs: 419 */ 420 oPos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); /* HPOS */ 421 oCol[0] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0)); 422 oCol[1] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1)); 423 if (key->fog || key->passthrough & (1 << NINE_DECLUSAGE_FOG)) { 424 oFog = ureg_DECL_output(ureg, TGSI_SEMANTIC_FOG, 0); 425 oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X); 426 } 427 428 if (key->vertexpointsize || key->pointscale) { 429 oPsz = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_PSIZE, 0, 430 TGSI_WRITEMASK_X, 0, 1); 431 oPsz = ureg_writemask(oPsz, TGSI_WRITEMASK_X); 432 } 433 434 if (key->lighting || key->vertexblend) 435 AR = ureg_DECL_address(ureg); 436 437 /* === Vertex transformation / vertex blending: 438 */ 439 440 if (key->position_t) { 441 if (device->driver_caps.window_space_position_support) { 442 ureg_MOV(ureg, oPos, vs->aVtx); 443 } else { 444 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 445 /* vs->aVtx contains the coordinates buffer wise. 446 * later in the pipeline, clipping, viewport and division 447 * by w (rhw = 1/w) are going to be applied, so do the reverse 448 * of these transformations (except clipping) to have the good 449 * position at the end.*/ 450 ureg_MOV(ureg, tmp, vs->aVtx); 451 /* X from [X_min, X_min + width] to [-1, 1], same for Y. Z to [0, 1] */ 452 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_negate(_CONST(101))); 453 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(100)); 454 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, -1.0f)); 455 /* Y needs to be reversed */ 456 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_negate(ureg_src(tmp))); 457 /* inverse rhw */ 458 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), _W(tmp)); 459 /* multiply X, Y, Z by w */ 460 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _W(tmp)); 461 ureg_MOV(ureg, oPos, ureg_src(tmp)); 462 ureg_release_temporary(ureg, tmp); 463 } 464 } else if (key->vertexblend) { 465 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 466 struct ureg_dst tmp2 = ureg_DECL_temporary(ureg); 467 struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg); 468 struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg); 469 struct ureg_dst sum_blendweights = ureg_DECL_temporary(ureg); 470 struct ureg_src cWM[4]; 471 472 for (i = 160; i <= 195; ++i) 473 ureg_DECL_constant(ureg, i); 474 475 /* translate world matrix index to constant file index */ 476 if (key->vertexblend_indexed) { 477 ureg_MAD(ureg, tmp, vs->aInd, ureg_imm1f(ureg, 4.0f), ureg_imm1f(ureg, 160.0f)); 478 ureg_ARL(ureg, AR, ureg_src(tmp)); 479 } 480 481 ureg_MOV(ureg, aVtx_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f)); 482 ureg_MOV(ureg, aNrm_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f)); 483 ureg_MOV(ureg, sum_blendweights, ureg_imm4f(ureg, 1.0f, 1.0f, 1.0f, 1.0f)); 484 485 for (i = 0; i < key->vertexblend; ++i) { 486 for (c = 0; c < 4; ++c) { 487 cWM[c] = ureg_src_register(TGSI_FILE_CONSTANT, (160 + i * 4) * !key->vertexblend_indexed + c); 488 if (key->vertexblend_indexed) 489 cWM[c] = ureg_src_indirect(cWM[c], ureg_scalar(ureg_src(AR), i)); 490 } 491 492 /* multiply by WORLD(index) */ 493 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), cWM[0]); 494 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), cWM[1], ureg_src(tmp)); 495 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), cWM[2], ureg_src(tmp)); 496 ureg_MAD(ureg, tmp, _WWWW(vs->aVtx), cWM[3], ureg_src(tmp)); 497 498 if (has_aNrm) { 499 /* Note: the spec says the transpose of the inverse of the 500 * WorldView matrices should be used, but all tests show 501 * otherwise. 502 * Only case unknown: D3DVBF_0WEIGHTS */ 503 ureg_MUL(ureg, tmp2, _XXXX(vs->aNrm), cWM[0]); 504 ureg_MAD(ureg, tmp2, _YYYY(vs->aNrm), cWM[1], ureg_src(tmp2)); 505 ureg_MAD(ureg, tmp2, _ZZZZ(vs->aNrm), cWM[2], ureg_src(tmp2)); 506 } 507 508 if (i < (key->vertexblend - 1)) { 509 /* accumulate weighted position value */ 510 ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(vs->aWgt, i), ureg_src(aVtx_dst)); 511 if (has_aNrm) 512 ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(vs->aWgt, i), ureg_src(aNrm_dst)); 513 /* subtract weighted position value for last value */ 514 ureg_ADD(ureg, sum_blendweights, ureg_src(sum_blendweights), ureg_negate(ureg_scalar(vs->aWgt, i))); 515 } 516 } 517 518 /* the last weighted position is always 1 - sum_of_previous_weights */ 519 ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aVtx_dst)); 520 if (has_aNrm) 521 ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aNrm_dst)); 522 523 /* multiply by VIEW_PROJ */ 524 ureg_MUL(ureg, tmp, _X(aVtx_dst), _CONST(8)); 525 ureg_MAD(ureg, tmp, _Y(aVtx_dst), _CONST(9), ureg_src(tmp)); 526 ureg_MAD(ureg, tmp, _Z(aVtx_dst), _CONST(10), ureg_src(tmp)); 527 ureg_MAD(ureg, oPos, _W(aVtx_dst), _CONST(11), ureg_src(tmp)); 528 529 if (need_aVtx) 530 vs->aVtx = ureg_src(aVtx_dst); 531 532 ureg_release_temporary(ureg, tmp); 533 ureg_release_temporary(ureg, tmp2); 534 ureg_release_temporary(ureg, sum_blendweights); 535 if (!need_aVtx) 536 ureg_release_temporary(ureg, aVtx_dst); 537 538 if (has_aNrm) { 539 if (key->normalizenormals) 540 ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst)); 541 vs->aNrm = ureg_src(aNrm_dst); 542 } else 543 ureg_release_temporary(ureg, aNrm_dst); 544 } else { 545 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 546 547 if (key->vertextween) { 548 struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg); 549 ureg_LRP(ureg, aVtx_dst, _XXXX(_CONST(30)), vs->aVtx1, vs->aVtx); 550 vs->aVtx = ureg_src(aVtx_dst); 551 if (has_aNrm) { 552 struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg); 553 ureg_LRP(ureg, aNrm_dst, _XXXX(_CONST(30)), vs->aNrm1, vs->aNrm); 554 vs->aNrm = ureg_src(aNrm_dst); 555 } 556 } 557 558 /* position = vertex * WORLD_VIEW_PROJ */ 559 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(0)); 560 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(1), ureg_src(tmp)); 561 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(2), ureg_src(tmp)); 562 ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(tmp)); 563 ureg_release_temporary(ureg, tmp); 564 565 if (need_aVtx) { 566 struct ureg_dst aVtx_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 567 ureg_MUL(ureg, aVtx_dst, _XXXX(vs->aVtx), _CONST(4)); 568 ureg_MAD(ureg, aVtx_dst, _YYYY(vs->aVtx), _CONST(5), ureg_src(aVtx_dst)); 569 ureg_MAD(ureg, aVtx_dst, _ZZZZ(vs->aVtx), _CONST(6), ureg_src(aVtx_dst)); 570 ureg_MAD(ureg, aVtx_dst, _WWWW(vs->aVtx), _CONST(7), ureg_src(aVtx_dst)); 571 vs->aVtx = ureg_src(aVtx_dst); 572 } 573 if (has_aNrm) { 574 struct ureg_dst aNrm_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 575 ureg_MUL(ureg, aNrm_dst, _XXXX(vs->aNrm), _CONST(16)); 576 ureg_MAD(ureg, aNrm_dst, _YYYY(vs->aNrm), _CONST(17), ureg_src(aNrm_dst)); 577 ureg_MAD(ureg, aNrm_dst, _ZZZZ(vs->aNrm), _CONST(18), ureg_src(aNrm_dst)); 578 if (key->normalizenormals) 579 ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst)); 580 vs->aNrm = ureg_src(aNrm_dst); 581 } 582 } 583 584 /* === Process point size: 585 */ 586 if (key->vertexpointsize || key->pointscale) { 587 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 588 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 589 struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y); 590 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z); 591 if (key->vertexpointsize) { 592 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26); 593 ureg_MAX(ureg, tmp_z, _XXXX(vs->aPsz), _XXXX(cPsz1)); 594 ureg_MIN(ureg, tmp_z, _Z(tmp), _YYYY(cPsz1)); 595 } else { 596 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26); 597 ureg_MOV(ureg, tmp_z, _ZZZZ(cPsz1)); 598 } 599 600 if (key->pointscale) { 601 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26); 602 struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27); 603 604 ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx); 605 ureg_RSQ(ureg, tmp_y, _X(tmp)); 606 ureg_MUL(ureg, tmp_y, _Y(tmp), _X(tmp)); 607 ureg_CMP(ureg, tmp_y, ureg_negate(_Y(tmp)), _Y(tmp), ureg_imm1f(ureg, 0.0f)); 608 ureg_MAD(ureg, tmp_x, _Y(tmp), _YYYY(cPsz2), _XXXX(cPsz2)); 609 ureg_MAD(ureg, tmp_x, _Y(tmp), _X(tmp), _WWWW(cPsz1)); 610 ureg_RSQ(ureg, tmp_x, _X(tmp)); 611 ureg_MUL(ureg, tmp_x, _X(tmp), _Z(tmp)); 612 ureg_MUL(ureg, tmp_x, _X(tmp), _WWWW(_CONST(100))); 613 ureg_MAX(ureg, tmp_x, _X(tmp), _XXXX(cPsz1)); 614 ureg_MIN(ureg, tmp_z, _X(tmp), _YYYY(cPsz1)); 615 } 616 617 ureg_MOV(ureg, oPsz, _Z(tmp)); 618 ureg_release_temporary(ureg, tmp); 619 } 620 621 for (i = 0; i < 8; ++i) { 622 struct ureg_dst tmp, tmp_x, tmp2; 623 struct ureg_dst oTex, input_coord, transformed, t, aVtx_normed; 624 unsigned c, writemask; 625 const unsigned tci = (key->tc_gen >> (i * 3)) & 0x7; 626 const unsigned idx = (key->tc_idx >> (i * 3)) & 0x7; 627 unsigned dim_input = 1 + ((key->tc_dim_input >> (i * 2)) & 0x3); 628 const unsigned dim_output = (key->tc_dim_output >> (i * 3)) & 0x7; 629 630 /* No texture output of index s */ 631 if (tci == NINED3DTSS_TCI_DISABLE) 632 continue; 633 oTex = ureg_DECL_output(ureg, texcoord_sn, i); 634 tmp = ureg_DECL_temporary(ureg); 635 tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 636 input_coord = ureg_DECL_temporary(ureg); 637 transformed = ureg_DECL_temporary(ureg); 638 639 /* Get the coordinate */ 640 switch (tci) { 641 case NINED3DTSS_TCI_PASSTHRU: 642 /* NINED3DTSS_TCI_PASSTHRU => Use texcoord coming from index idx * 643 * Else the idx is used only to determine wrapping mode. */ 644 vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx)); 645 ureg_MOV(ureg, input_coord, vs->aTex[idx]); 646 break; 647 case NINED3DTSS_TCI_CAMERASPACENORMAL: 648 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aNrm); 649 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); 650 dim_input = 4; 651 break; 652 case NINED3DTSS_TCI_CAMERASPACEPOSITION: 653 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aVtx); 654 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); 655 dim_input = 4; 656 break; 657 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR: 658 tmp.WriteMask = TGSI_WRITEMASK_XYZ; 659 aVtx_normed = ureg_DECL_temporary(ureg); 660 ureg_normalize3(ureg, aVtx_normed, vs->aVtx); 661 ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm); 662 ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp)); 663 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp)); 664 ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp))); 665 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); 666 ureg_release_temporary(ureg, aVtx_normed); 667 dim_input = 4; 668 tmp.WriteMask = TGSI_WRITEMASK_XYZW; 669 break; 670 case NINED3DTSS_TCI_SPHEREMAP: 671 /* Implement the formula of GL_SPHERE_MAP */ 672 tmp.WriteMask = TGSI_WRITEMASK_XYZ; 673 aVtx_normed = ureg_DECL_temporary(ureg); 674 tmp2 = ureg_DECL_temporary(ureg); 675 ureg_normalize3(ureg, aVtx_normed, vs->aVtx); 676 ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm); 677 ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp)); 678 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp)); 679 ureg_ADD(ureg, tmp, ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp))); 680 /* now tmp = normed(Vtx) - 2 dot3(normed(Vtx), Nrm) Nrm */ 681 ureg_MOV(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_XYZ), ureg_src(tmp)); 682 ureg_MUL(ureg, tmp2, ureg_src(tmp2), ureg_src(tmp2)); 683 ureg_DP3(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_src(tmp2)); 684 ureg_RSQ(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2)); 685 ureg_MUL(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_imm1f(ureg, 0.5f)); 686 /* tmp2 = 0.5 / sqrt(tmp.x^2 + tmp.y^2 + (tmp.z+1)^2) 687 * TODO: z coordinates are a bit different gl vs d3d, should the formula be adapted ? */ 688 ureg_MUL(ureg, tmp, ureg_src(tmp), _X(tmp2)); 689 ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 0.5f)); 690 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_ZW), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f)); 691 ureg_release_temporary(ureg, aVtx_normed); 692 ureg_release_temporary(ureg, tmp2); 693 dim_input = 4; 694 tmp.WriteMask = TGSI_WRITEMASK_XYZW; 695 break; 696 default: 697 assert(0); 698 break; 699 } 700 701 /* Apply the transformation */ 702 /* dim_output == 0 => do not transform the components. 703 * XYZRHW also disables transformation */ 704 if (!dim_output || key->position_t) { 705 ureg_release_temporary(ureg, transformed); 706 transformed = input_coord; 707 writemask = TGSI_WRITEMASK_XYZW; 708 } else { 709 for (c = 0; c < dim_output; c++) { 710 t = ureg_writemask(transformed, 1 << c); 711 switch (dim_input) { 712 /* dim_input = 1 2 3: -> we add trailing 1 to input*/ 713 case 1: ureg_MAD(ureg, t, _X(input_coord), _XXXX(_CONST(128 + i * 4 + c)), _YYYY(_CONST(128 + i * 4 + c))); 714 break; 715 case 2: ureg_DP2(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); 716 ureg_ADD(ureg, t, ureg_src(transformed), _ZZZZ(_CONST(128 + i * 4 + c))); 717 break; 718 case 3: ureg_DP3(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); 719 ureg_ADD(ureg, t, ureg_src(transformed), _WWWW(_CONST(128 + i * 4 + c))); 720 break; 721 case 4: ureg_DP4(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); break; 722 default: 723 assert(0); 724 } 725 } 726 writemask = (1 << dim_output) - 1; 727 ureg_release_temporary(ureg, input_coord); 728 } 729 730 ureg_MOV(ureg, ureg_writemask(oTex, writemask), ureg_src(transformed)); 731 ureg_release_temporary(ureg, transformed); 732 ureg_release_temporary(ureg, tmp); 733 } 734 735 /* === Lighting: 736 * 737 * DIRECTIONAL: Light at infinite distance, parallel rays, no attenuation. 738 * POINT: Finite distance to scene, divergent rays, isotropic, attenuation. 739 * SPOT: Finite distance, divergent rays, angular dependence, attenuation. 740 * 741 * vec3 normal = normalize(in.Normal * NormalMatrix); 742 * vec3 hitDir = light.direction; 743 * float atten = 1.0; 744 * 745 * if (light.type != DIRECTIONAL) 746 * { 747 * vec3 hitVec = light.position - eyeVertex; 748 * float d = length(hitVec); 749 * hitDir = hitVec / d; 750 * atten = 1 / ((light.atten2 * d + light.atten1) * d + light.atten0); 751 * } 752 * 753 * if (light.type == SPOTLIGHT) 754 * { 755 * float rho = dp3(-hitVec, light.direction); 756 * if (rho < cos(light.phi / 2)) 757 * atten = 0; 758 * if (rho < cos(light.theta / 2)) 759 * atten *= pow(some_func(rho), light.falloff); 760 * } 761 * 762 * float nDotHit = dp3_sat(normal, hitVec); 763 * float powFact = 0.0; 764 * 765 * if (nDotHit > 0.0) 766 * { 767 * vec3 midVec = normalize(hitDir + eye); 768 * float nDotMid = dp3_sat(normal, midVec); 769 * pFact = pow(nDotMid, material.power); 770 * } 771 * 772 * ambient += light.ambient * atten; 773 * diffuse += light.diffuse * atten * nDotHit; 774 * specular += light.specular * atten * powFact; 775 */ 776 if (key->lighting) { 777 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 778 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 779 struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y); 780 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z); 781 struct ureg_dst rAtt = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W); 782 struct ureg_dst rHit = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 783 struct ureg_dst rMid = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 784 785 struct ureg_dst rCtr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W); 786 787 struct ureg_dst AL = ureg_writemask(AR, TGSI_WRITEMASK_X); 788 789 /* Light.*.Alpha is not used. */ 790 struct ureg_dst rD = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 791 struct ureg_dst rA = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 792 struct ureg_dst rS = ureg_DECL_temporary(ureg); 793 794 struct ureg_src mtlP = _XXXX(MATERIAL_CONST(4)); 795 796 struct ureg_src cLKind = _XXXX(LIGHT_CONST(0)); 797 struct ureg_src cLAtt0 = _YYYY(LIGHT_CONST(0)); 798 struct ureg_src cLAtt1 = _ZZZZ(LIGHT_CONST(0)); 799 struct ureg_src cLAtt2 = _WWWW(LIGHT_CONST(0)); 800 struct ureg_src cLColD = _XYZW(LIGHT_CONST(1)); 801 struct ureg_src cLColS = _XYZW(LIGHT_CONST(2)); 802 struct ureg_src cLColA = _XYZW(LIGHT_CONST(3)); 803 struct ureg_src cLPos = _XYZW(LIGHT_CONST(4)); 804 struct ureg_src cLRng = _WWWW(LIGHT_CONST(4)); 805 struct ureg_src cLDir = _XYZW(LIGHT_CONST(5)); 806 struct ureg_src cLFOff = _WWWW(LIGHT_CONST(5)); 807 struct ureg_src cLTht = _XXXX(LIGHT_CONST(6)); 808 struct ureg_src cLPhi = _YYYY(LIGHT_CONST(6)); 809 struct ureg_src cLSDiv = _ZZZZ(LIGHT_CONST(6)); 810 struct ureg_src cLLast = _WWWW(LIGHT_CONST(7)); 811 812 const unsigned loop_label = l++; 813 814 ureg_MOV(ureg, rCtr, ureg_imm1f(ureg, 32.0f)); /* &lightconst(0) */ 815 ureg_MOV(ureg, rD, ureg_imm1f(ureg, 0.0f)); 816 ureg_MOV(ureg, rA, ureg_imm1f(ureg, 0.0f)); 817 ureg_MOV(ureg, rS, ureg_imm1f(ureg, 0.0f)); 818 819 /* loop management */ 820 ureg_BGNLOOP(ureg, &label[loop_label]); 821 ureg_ARL(ureg, AL, _W(rCtr)); 822 823 /* if (not DIRECTIONAL light): */ 824 ureg_SNE(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_DIRECTIONAL)); 825 ureg_MOV(ureg, rHit, ureg_negate(cLDir)); 826 ureg_MOV(ureg, rAtt, ureg_imm1f(ureg, 1.0f)); 827 ureg_IF(ureg, _X(tmp), &label[l++]); 828 { 829 /* hitDir = light.position - eyeVtx 830 * d = length(hitDir) 831 */ 832 ureg_ADD(ureg, rHit, cLPos, ureg_negate(vs->aVtx)); 833 ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit)); 834 ureg_RSQ(ureg, tmp_y, _X(tmp)); 835 ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */ 836 837 /* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */ 838 ureg_MAD(ureg, rAtt, _X(tmp), cLAtt2, cLAtt1); 839 ureg_MAD(ureg, rAtt, _X(tmp), _W(rAtt), cLAtt0); 840 ureg_RCP(ureg, rAtt, _W(rAtt)); 841 /* cut-off if distance exceeds Light.Range */ 842 ureg_SLT(ureg, tmp_x, _X(tmp), cLRng); 843 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp)); 844 } 845 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg)); 846 ureg_ENDIF(ureg); 847 848 /* normalize hitDir */ 849 ureg_normalize3(ureg, rHit, ureg_src(rHit)); 850 851 /* if (SPOT light) */ 852 ureg_SEQ(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_SPOT)); 853 ureg_IF(ureg, _X(tmp), &label[l++]); 854 { 855 /* rho = dp3(-hitDir, light.spotDir) 856 * 857 * if (rho > light.ctht2) NOTE: 0 <= phi <= pi, 0 <= theta <= phi 858 * spotAtt = 1 859 * else 860 * if (rho <= light.cphi2) 861 * spotAtt = 0 862 * else 863 * spotAtt = (rho - light.cphi2) / (light.ctht2 - light.cphi2) ^ light.falloff 864 */ 865 ureg_DP3(ureg, tmp_y, ureg_negate(ureg_src(rHit)), cLDir); /* rho */ 866 ureg_ADD(ureg, tmp_x, _Y(tmp), ureg_negate(cLPhi)); 867 ureg_MUL(ureg, tmp_x, _X(tmp), cLSDiv); 868 ureg_POW(ureg, tmp_x, _X(tmp), cLFOff); /* spotAtten */ 869 ureg_SGE(ureg, tmp_z, _Y(tmp), cLTht); /* if inside theta && phi */ 870 ureg_SGE(ureg, tmp_y, _Y(tmp), cLPhi); /* if inside phi */ 871 ureg_MAD(ureg, ureg_saturate(tmp_x), _X(tmp), _Y(tmp), _Z(tmp)); 872 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp)); 873 } 874 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg)); 875 ureg_ENDIF(ureg); 876 877 /* directional factors, let's not use LIT because of clarity */ 878 879 if (has_aNrm) { 880 if (key->localviewer) { 881 ureg_normalize3(ureg, rMid, vs->aVtx); 882 ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_negate(ureg_src(rMid))); 883 } else { 884 ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, -1.0f)); 885 } 886 ureg_normalize3(ureg, rMid, ureg_src(rMid)); 887 ureg_DP3(ureg, ureg_saturate(tmp_x), vs->aNrm, ureg_src(rHit)); 888 ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid)); 889 ureg_MUL(ureg, tmp_z, _X(tmp), _Y(tmp)); 890 /* Tests show that specular is computed only if (dp3(normal,hitDir) > 0). 891 * For front facing, it is more restrictive than test (dp3(normal,mid) > 0). 892 * No tests were made for backfacing, so add the two conditions */ 893 ureg_IF(ureg, _Z(tmp), &label[l++]); 894 { 895 ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid)); 896 ureg_POW(ureg, tmp_y, _Y(tmp), mtlP); 897 ureg_MUL(ureg, tmp_y, _W(rAtt), _Y(tmp)); /* power factor * att */ 898 ureg_MAD(ureg, rS, cLColS, _Y(tmp), ureg_src(rS)); /* accumulate specular */ 899 } 900 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg)); 901 ureg_ENDIF(ureg); 902 903 ureg_MUL(ureg, tmp_x, _W(rAtt), _X(tmp)); /* dp3(normal,hitDir) * att */ 904 ureg_MAD(ureg, rD, cLColD, _X(tmp), ureg_src(rD)); /* accumulate diffuse */ 905 } 906 907 ureg_MAD(ureg, rA, cLColA, _W(rAtt), ureg_src(rA)); /* accumulate ambient */ 908 909 /* break if this was the last light */ 910 ureg_IF(ureg, cLLast, &label[l++]); 911 ureg_BRK(ureg); 912 ureg_ENDIF(ureg); 913 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg)); 914 915 ureg_ADD(ureg, rCtr, _W(rCtr), ureg_imm1f(ureg, 8.0f)); 916 ureg_fixup_label(ureg, label[loop_label], ureg_get_instruction_number(ureg)); 917 ureg_ENDLOOP(ureg, &label[loop_label]); 918 919 /* Apply to material: 920 * 921 * oCol[0] = (material.emissive + material.ambient * rs.ambient) + 922 * material.ambient * ambient + 923 * material.diffuse * diffuse + 924 * oCol[1] = material.specular * specular; 925 */ 926 if (key->mtl_emissive == 0 && key->mtl_ambient == 0) 927 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), vs->mtlA, _CONST(19)); 928 else { 929 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), _CONST(25)); 930 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE); 931 } 932 933 ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), ureg_src(rD), vs->mtlD, ureg_src(tmp)); 934 ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD); 935 ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS); 936 ureg_release_temporary(ureg, rAtt); 937 ureg_release_temporary(ureg, rHit); 938 ureg_release_temporary(ureg, rMid); 939 ureg_release_temporary(ureg, rCtr); 940 ureg_release_temporary(ureg, rD); 941 ureg_release_temporary(ureg, rA); 942 ureg_release_temporary(ureg, rS); 943 ureg_release_temporary(ureg, rAtt); 944 ureg_release_temporary(ureg, tmp); 945 } else 946 /* COLOR */ 947 if (key->darkness) { 948 if (key->mtl_emissive == 0 && key->mtl_ambient == 0) 949 ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), _CONST(19)); 950 else 951 ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE); 952 ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD); 953 ureg_MOV(ureg, oCol[1], ureg_imm1f(ureg, 0.0f)); 954 } else { 955 ureg_MOV(ureg, oCol[0], vs->aCol[0]); 956 ureg_MOV(ureg, oCol[1], vs->aCol[1]); 957 } 958 959 /* === Process fog. 960 * 961 * exp(x) = ex2(log2(e) * x) 962 */ 963 if (key->fog_mode) { 964 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 965 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 966 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z); 967 if (key->fog_range) { 968 ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx); 969 ureg_RSQ(ureg, tmp_z, _X(tmp)); 970 ureg_MUL(ureg, tmp_z, _Z(tmp), _X(tmp)); 971 } else { 972 ureg_MOV(ureg, tmp_z, ureg_abs(_ZZZZ(vs->aVtx))); 973 } 974 975 if (key->fog_mode == D3DFOG_EXP) { 976 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28))); 977 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f)); 978 ureg_EX2(ureg, tmp_x, _X(tmp)); 979 } else 980 if (key->fog_mode == D3DFOG_EXP2) { 981 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28))); 982 ureg_MUL(ureg, tmp_x, _X(tmp), _X(tmp)); 983 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f)); 984 ureg_EX2(ureg, tmp_x, _X(tmp)); 985 } else 986 if (key->fog_mode == D3DFOG_LINEAR) { 987 ureg_ADD(ureg, tmp_x, _XXXX(_CONST(28)), ureg_negate(_Z(tmp))); 988 ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28))); 989 } 990 ureg_MOV(ureg, oFog, _X(tmp)); 991 ureg_release_temporary(ureg, tmp); 992 } else if (key->fog && !(key->passthrough & (1 << NINE_DECLUSAGE_FOG))) { 993 ureg_MOV(ureg, oFog, ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W)); 994 } 995 996 if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) { 997 struct ureg_src input; 998 struct ureg_dst output; 999 input = vs->aWgt; 1000 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 18); 1001 ureg_MOV(ureg, output, input); 1002 } 1003 if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) { 1004 struct ureg_src input; 1005 struct ureg_dst output; 1006 input = vs->aInd; 1007 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 19); 1008 ureg_MOV(ureg, output, input); 1009 } 1010 if (key->passthrough & (1 << NINE_DECLUSAGE_NORMAL)) { 1011 struct ureg_src input; 1012 struct ureg_dst output; 1013 input = vs->aNrm; 1014 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 20); 1015 ureg_MOV(ureg, output, input); 1016 } 1017 if (key->passthrough & (1 << NINE_DECLUSAGE_TANGENT)) { 1018 struct ureg_src input; 1019 struct ureg_dst output; 1020 input = build_vs_add_input(vs, NINE_DECLUSAGE_TANGENT); 1021 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 21); 1022 ureg_MOV(ureg, output, input); 1023 } 1024 if (key->passthrough & (1 << NINE_DECLUSAGE_BINORMAL)) { 1025 struct ureg_src input; 1026 struct ureg_dst output; 1027 input = build_vs_add_input(vs, NINE_DECLUSAGE_BINORMAL); 1028 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 22); 1029 ureg_MOV(ureg, output, input); 1030 } 1031 if (key->passthrough & (1 << NINE_DECLUSAGE_FOG)) { 1032 struct ureg_src input; 1033 struct ureg_dst output; 1034 input = build_vs_add_input(vs, NINE_DECLUSAGE_FOG); 1035 input = ureg_scalar(input, TGSI_SWIZZLE_X); 1036 output = oFog; 1037 ureg_MOV(ureg, output, input); 1038 } 1039 if (key->passthrough & (1 << NINE_DECLUSAGE_DEPTH)) { 1040 (void) 0; /* TODO: replace z of position output ? */ 1041 } 1042 1043 /* ucp for ff applies on world coordinates. 1044 * aVtx is in worldview coordinates. */ 1045 if (key->ucp) { 1046 struct ureg_dst clipVect = ureg_DECL_output(ureg, TGSI_SEMANTIC_CLIPVERTEX, 0); 1047 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 1048 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(12)); 1049 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(13), ureg_src(tmp)); 1050 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(14), ureg_src(tmp)); 1051 ureg_ADD(ureg, clipVect, _CONST(15), ureg_src(tmp)); 1052 ureg_release_temporary(ureg, tmp); 1053 } 1054 1055 if (key->position_t && device->driver_caps.window_space_position_support) 1056 ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE); 1057 1058 ureg_END(ureg); 1059 nine_ureg_tgsi_dump(ureg, FALSE); 1060 return ureg_create_shader_and_destroy(ureg, device->context.pipe); 1061 } 1062 1063 /* PS FF constants layout: 1064 * 1065 * CONST[ 0.. 7] stage[i].D3DTSS_CONSTANT 1066 * CONST[ 8..15].x___ stage[i].D3DTSS_BUMPENVMAT00 1067 * CONST[ 8..15]._y__ stage[i].D3DTSS_BUMPENVMAT01 1068 * CONST[ 8..15].__z_ stage[i].D3DTSS_BUMPENVMAT10 1069 * CONST[ 8..15].___w stage[i].D3DTSS_BUMPENVMAT11 1070 * CONST[16..19].x_z_ stage[i].D3DTSS_BUMPENVLSCALE 1071 * CONST[17..19]._y_w stage[i].D3DTSS_BUMPENVLOFFSET 1072 * 1073 * CONST[20] D3DRS_TEXTUREFACTOR 1074 * CONST[21] D3DRS_FOGCOLOR 1075 * CONST[22].x___ RS.FogEnd 1076 * CONST[22]._y__ 1.0f / (RS.FogEnd - RS.FogStart) 1077 * CONST[22].__z_ RS.FogDensity 1078 */ 1079 struct ps_build_ctx 1080 { 1081 struct ureg_program *ureg; 1082 1083 struct ureg_src vC[2]; /* DIFFUSE, SPECULAR */ 1084 struct ureg_src vT[8]; /* TEXCOORD[i] */ 1085 struct ureg_dst rCur; /* D3DTA_CURRENT */ 1086 struct ureg_dst rMod; 1087 struct ureg_src rCurSrc; 1088 struct ureg_dst rTmp; /* D3DTA_TEMP */ 1089 struct ureg_src rTmpSrc; 1090 struct ureg_dst rTex; 1091 struct ureg_src rTexSrc; 1092 struct ureg_src cBEM[8]; 1093 struct ureg_src s[8]; 1094 1095 struct { 1096 unsigned index; 1097 unsigned index_pre_mod; 1098 } stage; 1099 }; 1100 1101 static struct ureg_src 1102 ps_get_ts_arg(struct ps_build_ctx *ps, unsigned ta) 1103 { 1104 struct ureg_src reg; 1105 1106 switch (ta & D3DTA_SELECTMASK) { 1107 case D3DTA_CONSTANT: 1108 reg = ureg_DECL_constant(ps->ureg, ps->stage.index); 1109 break; 1110 case D3DTA_CURRENT: 1111 reg = (ps->stage.index == ps->stage.index_pre_mod) ? ureg_src(ps->rMod) : ps->rCurSrc; 1112 break; 1113 case D3DTA_DIFFUSE: 1114 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR); 1115 break; 1116 case D3DTA_SPECULAR: 1117 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR); 1118 break; 1119 case D3DTA_TEMP: 1120 reg = ps->rTmpSrc; 1121 break; 1122 case D3DTA_TEXTURE: 1123 reg = ps->rTexSrc; 1124 break; 1125 case D3DTA_TFACTOR: 1126 reg = ureg_DECL_constant(ps->ureg, 20); 1127 break; 1128 default: 1129 assert(0); 1130 reg = ureg_src_undef(); 1131 break; 1132 } 1133 if (ta & D3DTA_COMPLEMENT) { 1134 struct ureg_dst dst = ureg_DECL_temporary(ps->ureg); 1135 ureg_ADD(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), ureg_negate(reg)); 1136 reg = ureg_src(dst); 1137 } 1138 if (ta & D3DTA_ALPHAREPLICATE) 1139 reg = _WWWW(reg); 1140 return reg; 1141 } 1142 1143 static struct ureg_dst 1144 ps_get_ts_dst(struct ps_build_ctx *ps, unsigned ta) 1145 { 1146 assert(!(ta & (D3DTA_COMPLEMENT | D3DTA_ALPHAREPLICATE))); 1147 1148 switch (ta & D3DTA_SELECTMASK) { 1149 case D3DTA_CURRENT: 1150 return ps->rCur; 1151 case D3DTA_TEMP: 1152 return ps->rTmp; 1153 default: 1154 assert(0); 1155 return ureg_dst_undef(); 1156 } 1157 } 1158 1159 static uint8_t ps_d3dtop_args_mask(D3DTEXTUREOP top) 1160 { 1161 switch (top) { 1162 case D3DTOP_DISABLE: 1163 return 0x0; 1164 case D3DTOP_SELECTARG1: 1165 case D3DTOP_PREMODULATE: 1166 return 0x2; 1167 case D3DTOP_SELECTARG2: 1168 return 0x4; 1169 case D3DTOP_MULTIPLYADD: 1170 case D3DTOP_LERP: 1171 return 0x7; 1172 default: 1173 return 0x6; 1174 } 1175 } 1176 1177 static inline boolean 1178 is_MOV_no_op(struct ureg_dst dst, struct ureg_src src) 1179 { 1180 return !dst.WriteMask || 1181 (dst.File == src.File && 1182 dst.Index == src.Index && 1183 !dst.Indirect && 1184 !dst.Saturate && 1185 !src.Indirect && 1186 !src.Negate && 1187 !src.Absolute && 1188 (!(dst.WriteMask & TGSI_WRITEMASK_X) || (src.SwizzleX == TGSI_SWIZZLE_X)) && 1189 (!(dst.WriteMask & TGSI_WRITEMASK_Y) || (src.SwizzleY == TGSI_SWIZZLE_Y)) && 1190 (!(dst.WriteMask & TGSI_WRITEMASK_Z) || (src.SwizzleZ == TGSI_SWIZZLE_Z)) && 1191 (!(dst.WriteMask & TGSI_WRITEMASK_W) || (src.SwizzleW == TGSI_SWIZZLE_W))); 1192 1193 } 1194 1195 static void 1196 ps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct ureg_src *arg) 1197 { 1198 struct ureg_program *ureg = ps->ureg; 1199 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 1200 struct ureg_dst tmp2 = ureg_DECL_temporary(ureg); 1201 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 1202 1203 tmp.WriteMask = dst.WriteMask; 1204 1205 if (top != D3DTOP_SELECTARG1 && top != D3DTOP_SELECTARG2 && 1206 top != D3DTOP_MODULATE && top != D3DTOP_PREMODULATE && 1207 top != D3DTOP_BLENDDIFFUSEALPHA && top != D3DTOP_BLENDTEXTUREALPHA && 1208 top != D3DTOP_BLENDFACTORALPHA && top != D3DTOP_BLENDCURRENTALPHA && 1209 top != D3DTOP_BUMPENVMAP && top != D3DTOP_BUMPENVMAPLUMINANCE && 1210 top != D3DTOP_LERP) 1211 dst = ureg_saturate(dst); 1212 1213 switch (top) { 1214 case D3DTOP_SELECTARG1: 1215 if (!is_MOV_no_op(dst, arg[1])) 1216 ureg_MOV(ureg, dst, arg[1]); 1217 break; 1218 case D3DTOP_SELECTARG2: 1219 if (!is_MOV_no_op(dst, arg[2])) 1220 ureg_MOV(ureg, dst, arg[2]); 1221 break; 1222 case D3DTOP_MODULATE: 1223 ureg_MUL(ureg, dst, arg[1], arg[2]); 1224 break; 1225 case D3DTOP_MODULATE2X: 1226 ureg_MUL(ureg, tmp, arg[1], arg[2]); 1227 ureg_ADD(ureg, dst, ureg_src(tmp), ureg_src(tmp)); 1228 break; 1229 case D3DTOP_MODULATE4X: 1230 ureg_MUL(ureg, tmp, arg[1], arg[2]); 1231 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 4.0f)); 1232 break; 1233 case D3DTOP_ADD: 1234 ureg_ADD(ureg, dst, arg[1], arg[2]); 1235 break; 1236 case D3DTOP_ADDSIGNED: 1237 ureg_ADD(ureg, tmp, arg[1], arg[2]); 1238 ureg_ADD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, -0.5f)); 1239 break; 1240 case D3DTOP_ADDSIGNED2X: 1241 ureg_ADD(ureg, tmp, arg[1], arg[2]); 1242 ureg_MAD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f)); 1243 break; 1244 case D3DTOP_SUBTRACT: 1245 ureg_ADD(ureg, dst, arg[1], ureg_negate(arg[2])); 1246 break; 1247 case D3DTOP_ADDSMOOTH: 1248 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1])); 1249 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], arg[1]); 1250 break; 1251 case D3DTOP_BLENDDIFFUSEALPHA: 1252 ureg_LRP(ureg, dst, _WWWW(ps->vC[0]), arg[1], arg[2]); 1253 break; 1254 case D3DTOP_BLENDTEXTUREALPHA: 1255 /* XXX: alpha taken from previous stage, texture or result ? */ 1256 ureg_LRP(ureg, dst, _W(ps->rTex), arg[1], arg[2]); 1257 break; 1258 case D3DTOP_BLENDFACTORALPHA: 1259 ureg_LRP(ureg, dst, _WWWW(_CONST(20)), arg[1], arg[2]); 1260 break; 1261 case D3DTOP_BLENDTEXTUREALPHAPM: 1262 ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_W(ps->rTex))); 1263 ureg_MAD(ureg, dst, arg[2], _X(tmp), arg[1]); 1264 break; 1265 case D3DTOP_BLENDCURRENTALPHA: 1266 ureg_LRP(ureg, dst, _WWWW(ps->rCurSrc), arg[1], arg[2]); 1267 break; 1268 case D3DTOP_PREMODULATE: 1269 ureg_MOV(ureg, dst, arg[1]); 1270 ps->stage.index_pre_mod = ps->stage.index + 1; 1271 break; 1272 case D3DTOP_MODULATEALPHA_ADDCOLOR: 1273 ureg_MAD(ureg, dst, _WWWW(arg[1]), arg[2], arg[1]); 1274 break; 1275 case D3DTOP_MODULATECOLOR_ADDALPHA: 1276 ureg_MAD(ureg, dst, arg[1], arg[2], _WWWW(arg[1])); 1277 break; 1278 case D3DTOP_MODULATEINVALPHA_ADDCOLOR: 1279 ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_WWWW(arg[1]))); 1280 ureg_MAD(ureg, dst, _X(tmp), arg[2], arg[1]); 1281 break; 1282 case D3DTOP_MODULATEINVCOLOR_ADDALPHA: 1283 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1])); 1284 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], _WWWW(arg[1])); 1285 break; 1286 case D3DTOP_BUMPENVMAP: 1287 break; 1288 case D3DTOP_BUMPENVMAPLUMINANCE: 1289 break; 1290 case D3DTOP_DOTPRODUCT3: 1291 ureg_ADD(ureg, tmp, arg[1], ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5)); 1292 ureg_ADD(ureg, tmp2, arg[2] , ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5)); 1293 ureg_DP3(ureg, tmp, ureg_src(tmp), ureg_src(tmp2)); 1294 ureg_MUL(ureg, ureg_saturate(dst), ureg_src(tmp), ureg_imm4f(ureg,4.0,4.0,4.0,4.0)); 1295 break; 1296 case D3DTOP_MULTIPLYADD: 1297 ureg_MAD(ureg, dst, arg[1], arg[2], arg[0]); 1298 break; 1299 case D3DTOP_LERP: 1300 ureg_LRP(ureg, dst, arg[0], arg[1], arg[2]); 1301 break; 1302 case D3DTOP_DISABLE: 1303 /* no-op ? */ 1304 break; 1305 default: 1306 assert(!"invalid D3DTOP"); 1307 break; 1308 } 1309 ureg_release_temporary(ureg, tmp); 1310 ureg_release_temporary(ureg, tmp2); 1311 } 1312 1313 static void * 1314 nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key) 1315 { 1316 struct ps_build_ctx ps; 1317 struct ureg_program *ureg = ureg_create(PIPE_SHADER_FRAGMENT); 1318 struct ureg_dst oCol; 1319 unsigned s; 1320 const unsigned texcoord_sn = get_texcoord_sn(device->screen); 1321 1322 memset(&ps, 0, sizeof(ps)); 1323 ps.ureg = ureg; 1324 ps.stage.index_pre_mod = -1; 1325 1326 ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR); 1327 1328 ps.rCur = ureg_DECL_temporary(ureg); 1329 ps.rTmp = ureg_DECL_temporary(ureg); 1330 ps.rTex = ureg_DECL_temporary(ureg); 1331 ps.rCurSrc = ureg_src(ps.rCur); 1332 ps.rTmpSrc = ureg_src(ps.rTmp); 1333 ps.rTexSrc = ureg_src(ps.rTex); 1334 1335 /* Initial values */ 1336 ureg_MOV(ureg, ps.rCur, ps.vC[0]); 1337 ureg_MOV(ureg, ps.rTmp, ureg_imm1f(ureg, 0.0f)); 1338 ureg_MOV(ureg, ps.rTex, ureg_imm1f(ureg, 0.0f)); 1339 1340 for (s = 0; s < 8; ++s) { 1341 ps.s[s] = ureg_src_undef(); 1342 1343 if (key->ts[s].colorop != D3DTOP_DISABLE) { 1344 if (key->ts[s].colorarg0 == D3DTA_SPECULAR || 1345 key->ts[s].colorarg1 == D3DTA_SPECULAR || 1346 key->ts[s].colorarg2 == D3DTA_SPECULAR) 1347 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR); 1348 1349 if (key->ts[s].colorarg0 == D3DTA_TEXTURE || 1350 key->ts[s].colorarg1 == D3DTA_TEXTURE || 1351 key->ts[s].colorarg2 == D3DTA_TEXTURE) { 1352 ps.s[s] = ureg_DECL_sampler(ureg, s); 1353 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE); 1354 } 1355 if (s && (key->ts[s - 1].colorop == D3DTOP_PREMODULATE || 1356 key->ts[s - 1].alphaop == D3DTOP_PREMODULATE)) 1357 ps.s[s] = ureg_DECL_sampler(ureg, s); 1358 } 1359 1360 if (key->ts[s].alphaop != D3DTOP_DISABLE) { 1361 if (key->ts[s].alphaarg0 == D3DTA_SPECULAR || 1362 key->ts[s].alphaarg1 == D3DTA_SPECULAR || 1363 key->ts[s].alphaarg2 == D3DTA_SPECULAR) 1364 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR); 1365 1366 if (key->ts[s].alphaarg0 == D3DTA_TEXTURE || 1367 key->ts[s].alphaarg1 == D3DTA_TEXTURE || 1368 key->ts[s].alphaarg2 == D3DTA_TEXTURE) { 1369 ps.s[s] = ureg_DECL_sampler(ureg, s); 1370 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE); 1371 } 1372 } 1373 } 1374 if (key->specular) 1375 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR); 1376 1377 oCol = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); 1378 1379 /* Run stages. 1380 */ 1381 for (s = 0; s < 8; ++s) { 1382 unsigned colorarg[3]; 1383 unsigned alphaarg[3]; 1384 const uint8_t used_c = ps_d3dtop_args_mask(key->ts[s].colorop); 1385 const uint8_t used_a = ps_d3dtop_args_mask(key->ts[s].alphaop); 1386 struct ureg_dst dst; 1387 struct ureg_src arg[3]; 1388 1389 if (key->ts[s].colorop == D3DTOP_DISABLE) { 1390 assert (key->ts[s].alphaop == D3DTOP_DISABLE); 1391 continue; 1392 } 1393 ps.stage.index = s; 1394 1395 DBG("STAGE[%u]: colorop=%s alphaop=%s\n", s, 1396 nine_D3DTOP_to_str(key->ts[s].colorop), 1397 nine_D3DTOP_to_str(key->ts[s].alphaop)); 1398 1399 if (!ureg_src_is_undef(ps.s[s])) { 1400 unsigned target; 1401 struct ureg_src texture_coord = ps.vT[s]; 1402 struct ureg_dst delta; 1403 switch (key->ts[s].textarget) { 1404 case 0: target = TGSI_TEXTURE_1D; break; 1405 case 1: target = TGSI_TEXTURE_2D; break; 1406 case 2: target = TGSI_TEXTURE_3D; break; 1407 case 3: target = TGSI_TEXTURE_CUBE; break; 1408 /* this is a 2 bit bitfield, do I really need a default case ? */ 1409 } 1410 1411 /* Modify coordinates */ 1412 if (s >= 1 && 1413 (key->ts[s-1].colorop == D3DTOP_BUMPENVMAP || 1414 key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)) { 1415 delta = ureg_DECL_temporary(ureg); 1416 /* Du' = D3DTSS_BUMPENVMAT00(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT10(stage s-1)*t(s-1)G */ 1417 ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _X(ps.rTex), _XXXX(_CONST(8 + s - 1))); 1418 ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _Y(ps.rTex), _ZZZZ(_CONST(8 + s - 1)), ureg_src(delta)); 1419 /* Dv' = D3DTSS_BUMPENVMAT01(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT11(stage s-1)*t(s-1)G */ 1420 ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _X(ps.rTex), _YYYY(_CONST(8 + s - 1))); 1421 ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _Y(ps.rTex), _WWWW(_CONST(8 + s - 1)), ureg_src(delta)); 1422 texture_coord = ureg_src(ureg_DECL_temporary(ureg)); 1423 ureg_MOV(ureg, ureg_writemask(ureg_dst(texture_coord), ureg_dst(ps.vT[s]).WriteMask), ps.vT[s]); 1424 ureg_ADD(ureg, ureg_writemask(ureg_dst(texture_coord), TGSI_WRITEMASK_XY), texture_coord, ureg_src(delta)); 1425 /* Prepare luminance multiplier 1426 * t(s)RGBA = t(s)RGBA * clamp[(t(s-1)B * D3DTSS_BUMPENVLSCALE(stage s-1)) + D3DTSS_BUMPENVLOFFSET(stage s-1)] */ 1427 if (key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE) { 1428 struct ureg_src bumpenvlscale = ((s-1) & 1) ? _ZZZZ(_CONST(16 + (s-1) / 2)) : _XXXX(_CONST(16 + (s-1) / 2)); 1429 struct ureg_src bumpenvloffset = ((s-1) & 1) ? _WWWW(_CONST(16 + (s-1) / 2)) : _YYYY(_CONST(16 + (s-1) / 2)); 1430 1431 ureg_MAD(ureg, ureg_saturate(ureg_writemask(delta, TGSI_WRITEMASK_X)), _Z(ps.rTex), bumpenvlscale, bumpenvloffset); 1432 } 1433 } 1434 if (key->projected & (3 << (s *2))) { 1435 unsigned dim = 1 + ((key->projected >> (2 * s)) & 3); 1436 if (dim == 4) 1437 ureg_TXP(ureg, ps.rTex, target, texture_coord, ps.s[s]); 1438 else { 1439 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 1440 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(texture_coord, dim-1)); 1441 ureg_MUL(ureg, ps.rTmp, _X(tmp), texture_coord); 1442 ureg_TEX(ureg, ps.rTex, target, ps.rTmpSrc, ps.s[s]); 1443 ureg_release_temporary(ureg, tmp); 1444 } 1445 } else { 1446 ureg_TEX(ureg, ps.rTex, target, texture_coord, ps.s[s]); 1447 } 1448 if (s >= 1 && key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE) 1449 ureg_MUL(ureg, ps.rTex, ureg_src(ps.rTex), _X(delta)); 1450 } 1451 1452 if (key->ts[s].colorop == D3DTOP_BUMPENVMAP || 1453 key->ts[s].colorop == D3DTOP_BUMPENVMAPLUMINANCE) 1454 continue; 1455 1456 dst = ps_get_ts_dst(&ps, key->ts[s].resultarg ? D3DTA_TEMP : D3DTA_CURRENT); 1457 1458 if (ps.stage.index_pre_mod == ps.stage.index) { 1459 ps.rMod = ureg_DECL_temporary(ureg); 1460 ureg_MUL(ureg, ps.rMod, ps.rCurSrc, ps.rTexSrc); 1461 } 1462 1463 colorarg[0] = (key->ts[s].colorarg0 | ((key->colorarg_b4[0] >> s) << 4) | ((key->colorarg_b5[0] >> s) << 5)) & 0x3f; 1464 colorarg[1] = (key->ts[s].colorarg1 | ((key->colorarg_b4[1] >> s) << 4) | ((key->colorarg_b5[1] >> s) << 5)) & 0x3f; 1465 colorarg[2] = (key->ts[s].colorarg2 | ((key->colorarg_b4[2] >> s) << 4) | ((key->colorarg_b5[2] >> s) << 5)) & 0x3f; 1466 alphaarg[0] = (key->ts[s].alphaarg0 | ((key->alphaarg_b4[0] >> s) << 4)) & 0x1f; 1467 alphaarg[1] = (key->ts[s].alphaarg1 | ((key->alphaarg_b4[1] >> s) << 4)) & 0x1f; 1468 alphaarg[2] = (key->ts[s].alphaarg2 | ((key->alphaarg_b4[2] >> s) << 4)) & 0x1f; 1469 1470 if (key->ts[s].colorop != key->ts[s].alphaop || 1471 colorarg[0] != alphaarg[0] || 1472 colorarg[1] != alphaarg[1] || 1473 colorarg[2] != alphaarg[2]) 1474 dst.WriteMask = TGSI_WRITEMASK_XYZ; 1475 1476 /* Special DOTPRODUCT behaviour (see wine tests) */ 1477 if (key->ts[s].colorop == D3DTOP_DOTPRODUCT3) 1478 dst.WriteMask = TGSI_WRITEMASK_XYZW; 1479 1480 if (used_c & 0x1) arg[0] = ps_get_ts_arg(&ps, colorarg[0]); 1481 if (used_c & 0x2) arg[1] = ps_get_ts_arg(&ps, colorarg[1]); 1482 if (used_c & 0x4) arg[2] = ps_get_ts_arg(&ps, colorarg[2]); 1483 ps_do_ts_op(&ps, key->ts[s].colorop, dst, arg); 1484 1485 if (dst.WriteMask != TGSI_WRITEMASK_XYZW) { 1486 dst.WriteMask = TGSI_WRITEMASK_W; 1487 1488 if (used_a & 0x1) arg[0] = ps_get_ts_arg(&ps, alphaarg[0]); 1489 if (used_a & 0x2) arg[1] = ps_get_ts_arg(&ps, alphaarg[1]); 1490 if (used_a & 0x4) arg[2] = ps_get_ts_arg(&ps, alphaarg[2]); 1491 ps_do_ts_op(&ps, key->ts[s].alphaop, dst, arg); 1492 } 1493 } 1494 1495 if (key->specular) 1496 ureg_ADD(ureg, ureg_writemask(ps.rCur, TGSI_WRITEMASK_XYZ), ps.rCurSrc, ps.vC[1]); 1497 1498 /* Fog. 1499 */ 1500 if (key->fog_mode) { 1501 struct ureg_dst rFog = ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X); 1502 struct ureg_src vPos; 1503 if (device->screen->get_param(device->screen, 1504 PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL)) { 1505 vPos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0); 1506 } else { 1507 vPos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0, 1508 TGSI_INTERPOLATE_LINEAR); 1509 } 1510 1511 /* Source is either W or Z. 1512 * When we use vs ff, 1513 * Z is when an orthogonal projection matrix is detected, 1514 * W (WFOG) else. 1515 * Z is used for programmable vs. 1516 * Note: Tests indicate that the projection matrix coefficients do 1517 * actually affect pixel fog (and not vertex fog) when vs ff is used, 1518 * which justifies taking the position's w instead of taking the z coordinate 1519 * before the projection in the vs shader. 1520 */ 1521 if (!key->fog_source) 1522 ureg_MOV(ureg, rFog, _ZZZZ(vPos)); 1523 else 1524 /* Position's w is 1/w */ 1525 ureg_RCP(ureg, rFog, _WWWW(vPos)); 1526 1527 if (key->fog_mode == D3DFOG_EXP) { 1528 ureg_MUL(ureg, rFog, _X(rFog), _ZZZZ(_CONST(22))); 1529 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f)); 1530 ureg_EX2(ureg, rFog, _X(rFog)); 1531 } else 1532 if (key->fog_mode == D3DFOG_EXP2) { 1533 ureg_MUL(ureg, rFog, _X(rFog), _ZZZZ(_CONST(22))); 1534 ureg_MUL(ureg, rFog, _X(rFog), _X(rFog)); 1535 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f)); 1536 ureg_EX2(ureg, rFog, _X(rFog)); 1537 } else 1538 if (key->fog_mode == D3DFOG_LINEAR) { 1539 ureg_ADD(ureg, rFog, _XXXX(_CONST(22)), ureg_negate(_X(rFog))); 1540 ureg_MUL(ureg, ureg_saturate(rFog), _X(rFog), _YYYY(_CONST(22))); 1541 } 1542 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _X(rFog), ps.rCurSrc, _CONST(21)); 1543 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc); 1544 } else 1545 if (key->fog) { 1546 struct ureg_src vFog = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0, TGSI_INTERPOLATE_PERSPECTIVE); 1547 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _XXXX(vFog), ps.rCurSrc, _CONST(21)); 1548 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc); 1549 } else { 1550 ureg_MOV(ureg, oCol, ps.rCurSrc); 1551 } 1552 1553 ureg_END(ureg); 1554 nine_ureg_tgsi_dump(ureg, FALSE); 1555 return ureg_create_shader_and_destroy(ureg, device->context.pipe); 1556 } 1557 1558 static struct NineVertexShader9 * 1559 nine_ff_get_vs(struct NineDevice9 *device) 1560 { 1561 const struct nine_context *context = &device->context; 1562 struct NineVertexShader9 *vs; 1563 enum pipe_error err; 1564 struct vs_build_ctx bld; 1565 struct nine_ff_vs_key key; 1566 unsigned s, i; 1567 boolean has_indexes = false; 1568 boolean has_weights = false; 1569 char input_texture_coord[8]; 1570 1571 assert(sizeof(key) <= sizeof(key.value32)); 1572 1573 memset(&key, 0, sizeof(key)); 1574 memset(&bld, 0, sizeof(bld)); 1575 memset(&input_texture_coord, 0, sizeof(input_texture_coord)); 1576 1577 bld.key = &key; 1578 1579 /* FIXME: this shouldn't be NULL, but it is on init */ 1580 if (context->vdecl) { 1581 key.color0in_one = 1; 1582 key.color1in_zero = 1; 1583 for (i = 0; i < context->vdecl->nelems; i++) { 1584 uint16_t usage = context->vdecl->usage_map[i]; 1585 if (usage == NINE_DECLUSAGE_POSITIONT) 1586 key.position_t = 1; 1587 else if (usage == NINE_DECLUSAGE_i(COLOR, 0)) 1588 key.color0in_one = 0; 1589 else if (usage == NINE_DECLUSAGE_i(COLOR, 1)) 1590 key.color1in_zero = 0; 1591 else if (usage == NINE_DECLUSAGE_i(BLENDINDICES, 0)) { 1592 has_indexes = true; 1593 key.passthrough |= 1 << usage; 1594 } else if (usage == NINE_DECLUSAGE_i(BLENDWEIGHT, 0)) { 1595 has_weights = true; 1596 key.passthrough |= 1 << usage; 1597 } else if (usage == NINE_DECLUSAGE_i(NORMAL, 0)) { 1598 key.has_normal = 1; 1599 key.passthrough |= 1 << usage; 1600 } else if (usage == NINE_DECLUSAGE_PSIZE) 1601 key.vertexpointsize = 1; 1602 else if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) { 1603 s = usage / NINE_DECLUSAGE_COUNT; 1604 if (s < 8) 1605 input_texture_coord[s] = nine_decltype_get_dim(context->vdecl->decls[i].Type); 1606 else 1607 DBG("FF given texture coordinate >= 8. Ignoring\n"); 1608 } else if (usage < NINE_DECLUSAGE_NONE) 1609 key.passthrough |= 1 << usage; 1610 } 1611 } 1612 /* ff vs + ps 3.0: some elements are passed to the ps (wine test). 1613 * We do restrict to indices 0 */ 1614 key.passthrough &= ~((1 << NINE_DECLUSAGE_POSITION) | (1 << NINE_DECLUSAGE_PSIZE) | 1615 (1 << NINE_DECLUSAGE_TEXCOORD) | (1 << NINE_DECLUSAGE_POSITIONT) | 1616 (1 << NINE_DECLUSAGE_TESSFACTOR) | (1 << NINE_DECLUSAGE_SAMPLE)); 1617 if (!key.position_t) 1618 key.passthrough = 0; 1619 key.pointscale = !!context->rs[D3DRS_POINTSCALEENABLE]; 1620 1621 key.lighting = !!context->rs[D3DRS_LIGHTING] && context->ff.num_lights_active; 1622 key.darkness = !!context->rs[D3DRS_LIGHTING] && !context->ff.num_lights_active; 1623 if (key.position_t) { 1624 key.darkness = 0; /* |= key.lighting; */ /* XXX ? */ 1625 key.lighting = 0; 1626 } 1627 if ((key.lighting | key.darkness) && context->rs[D3DRS_COLORVERTEX]) { 1628 uint32_t mask = (key.color0in_one ? 0 : 1) | (key.color1in_zero ? 0 : 2); 1629 key.mtl_diffuse = context->rs[D3DRS_DIFFUSEMATERIALSOURCE] & mask; 1630 key.mtl_ambient = context->rs[D3DRS_AMBIENTMATERIALSOURCE] & mask; 1631 key.mtl_specular = context->rs[D3DRS_SPECULARMATERIALSOURCE] & mask; 1632 key.mtl_emissive = context->rs[D3DRS_EMISSIVEMATERIALSOURCE] & mask; 1633 } 1634 key.fog = !!context->rs[D3DRS_FOGENABLE]; 1635 key.fog_mode = (!key.position_t && context->rs[D3DRS_FOGENABLE]) ? context->rs[D3DRS_FOGVERTEXMODE] : 0; 1636 if (key.fog_mode) 1637 key.fog_range = context->rs[D3DRS_RANGEFOGENABLE]; 1638 1639 key.localviewer = !!context->rs[D3DRS_LOCALVIEWER]; 1640 key.normalizenormals = !!context->rs[D3DRS_NORMALIZENORMALS]; 1641 key.ucp = !!context->rs[D3DRS_CLIPPLANEENABLE]; 1642 1643 if (context->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) { 1644 key.vertexblend_indexed = !!context->rs[D3DRS_INDEXEDVERTEXBLENDENABLE] && has_indexes; 1645 1646 switch (context->rs[D3DRS_VERTEXBLEND]) { 1647 case D3DVBF_0WEIGHTS: key.vertexblend = key.vertexblend_indexed; break; 1648 case D3DVBF_1WEIGHTS: key.vertexblend = 2; break; 1649 case D3DVBF_2WEIGHTS: key.vertexblend = 3; break; 1650 case D3DVBF_3WEIGHTS: key.vertexblend = 4; break; 1651 case D3DVBF_TWEENING: key.vertextween = 1; break; 1652 default: 1653 assert(!"invalid D3DVBF"); 1654 break; 1655 } 1656 if (!has_weights && context->rs[D3DRS_VERTEXBLEND] != D3DVBF_0WEIGHTS) 1657 key.vertexblend = 0; /* TODO: if key.vertexblend_indexed, perhaps it should use 1.0 as weight, or revert to D3DVBF_0WEIGHTS */ 1658 } 1659 1660 for (s = 0; s < 8; ++s) { 1661 unsigned gen = (context->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1; 1662 unsigned idx = context->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] & 7; 1663 unsigned dim; 1664 1665 if (key.position_t && gen > NINED3DTSS_TCI_PASSTHRU) 1666 gen = NINED3DTSS_TCI_PASSTHRU; 1667 1668 if (!input_texture_coord[idx] && gen == NINED3DTSS_TCI_PASSTHRU) 1669 gen = NINED3DTSS_TCI_DISABLE; 1670 1671 key.tc_gen |= gen << (s * 3); 1672 key.tc_idx |= idx << (s * 3); 1673 key.tc_dim_input |= ((input_texture_coord[idx]-1) & 0x3) << (s * 2); 1674 1675 dim = context->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7; 1676 if (dim > 4) 1677 dim = input_texture_coord[idx]; 1678 if (dim == 1) /* NV behaviour */ 1679 dim = 0; 1680 key.tc_dim_output |= dim << (s * 3); 1681 } 1682 1683 vs = util_hash_table_get(device->ff.ht_vs, &key); 1684 if (vs) 1685 return vs; 1686 NineVertexShader9_new(device, &vs, NULL, nine_ff_build_vs(device, &bld)); 1687 1688 nine_ff_prune_vs(device); 1689 if (vs) { 1690 unsigned n; 1691 1692 memcpy(&vs->ff_key, &key, sizeof(vs->ff_key)); 1693 1694 err = util_hash_table_set(device->ff.ht_vs, &vs->ff_key, vs); 1695 (void)err; 1696 assert(err == PIPE_OK); 1697 device->ff.num_vs++; 1698 NineUnknown_ConvertRefToBind(NineUnknown(vs)); 1699 1700 vs->num_inputs = bld.num_inputs; 1701 for (n = 0; n < bld.num_inputs; ++n) 1702 vs->input_map[n].ndecl = bld.input[n]; 1703 1704 vs->position_t = key.position_t; 1705 vs->point_size = key.vertexpointsize | key.pointscale; 1706 } 1707 return vs; 1708 } 1709 1710 #define GET_D3DTS(n) nine_state_access_transform(&context->ff, D3DTS_##n, FALSE) 1711 #define IS_D3DTS_DIRTY(s,n) ((s)->ff.changed.transform[(D3DTS_##n) / 32] & (1 << ((D3DTS_##n) % 32))) 1712 1713 static struct NinePixelShader9 * 1714 nine_ff_get_ps(struct NineDevice9 *device) 1715 { 1716 struct nine_context *context = &device->context; 1717 D3DMATRIX *projection_matrix = GET_D3DTS(PROJECTION); 1718 struct NinePixelShader9 *ps; 1719 enum pipe_error err; 1720 struct nine_ff_ps_key key; 1721 unsigned s; 1722 uint8_t sampler_mask = 0; 1723 1724 assert(sizeof(key) <= sizeof(key.value32)); 1725 1726 memset(&key, 0, sizeof(key)); 1727 for (s = 0; s < 8; ++s) { 1728 key.ts[s].colorop = context->ff.tex_stage[s][D3DTSS_COLOROP]; 1729 key.ts[s].alphaop = context->ff.tex_stage[s][D3DTSS_ALPHAOP]; 1730 const uint8_t used_c = ps_d3dtop_args_mask(key.ts[s].colorop); 1731 const uint8_t used_a = ps_d3dtop_args_mask(key.ts[s].alphaop); 1732 /* MSDN says D3DTOP_DISABLE disables this and all subsequent stages. 1733 * ALPHAOP cannot be enabled if COLOROP is disabled. 1734 * Verified on Windows. */ 1735 if (key.ts[s].colorop == D3DTOP_DISABLE) { 1736 key.ts[s].alphaop = D3DTOP_DISABLE; /* DISABLE == 1, avoid degenerate keys */ 1737 break; 1738 } 1739 1740 if (!context->texture[s].enabled && 1741 ((context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE && 1742 used_c & 0x1) || 1743 (context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE && 1744 used_c & 0x2) || 1745 (context->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE && 1746 used_c & 0x4))) { 1747 /* Tested on Windows: Invalid texture read disables the stage 1748 * and the subsequent ones, but only for colorop. For alpha, 1749 * it's as if the texture had alpha of 1.0, which is what 1750 * has our dummy texture in that case. Invalid color also 1751 * disabled the following alpha stages. */ 1752 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE; 1753 break; 1754 } 1755 1756 if (context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE || 1757 context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE || 1758 context->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE || 1759 context->ff.tex_stage[s][D3DTSS_ALPHAARG0] == D3DTA_TEXTURE || 1760 context->ff.tex_stage[s][D3DTSS_ALPHAARG1] == D3DTA_TEXTURE || 1761 context->ff.tex_stage[s][D3DTSS_ALPHAARG2] == D3DTA_TEXTURE) 1762 sampler_mask |= (1 << s); 1763 1764 if (key.ts[s].colorop != D3DTOP_DISABLE) { 1765 if (used_c & 0x1) key.ts[s].colorarg0 = context->ff.tex_stage[s][D3DTSS_COLORARG0]; 1766 if (used_c & 0x2) key.ts[s].colorarg1 = context->ff.tex_stage[s][D3DTSS_COLORARG1]; 1767 if (used_c & 0x4) key.ts[s].colorarg2 = context->ff.tex_stage[s][D3DTSS_COLORARG2]; 1768 if (used_c & 0x1) key.colorarg_b4[0] |= (context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 4) << s; 1769 if (used_c & 0x1) key.colorarg_b5[0] |= (context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 5) << s; 1770 if (used_c & 0x2) key.colorarg_b4[1] |= (context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 4) << s; 1771 if (used_c & 0x2) key.colorarg_b5[1] |= (context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 5) << s; 1772 if (used_c & 0x4) key.colorarg_b4[2] |= (context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 4) << s; 1773 if (used_c & 0x4) key.colorarg_b5[2] |= (context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 5) << s; 1774 } 1775 if (key.ts[s].alphaop != D3DTOP_DISABLE) { 1776 if (used_a & 0x1) key.ts[s].alphaarg0 = context->ff.tex_stage[s][D3DTSS_ALPHAARG0]; 1777 if (used_a & 0x2) key.ts[s].alphaarg1 = context->ff.tex_stage[s][D3DTSS_ALPHAARG1]; 1778 if (used_a & 0x4) key.ts[s].alphaarg2 = context->ff.tex_stage[s][D3DTSS_ALPHAARG2]; 1779 if (used_a & 0x1) key.alphaarg_b4[0] |= (context->ff.tex_stage[s][D3DTSS_ALPHAARG0] >> 4) << s; 1780 if (used_a & 0x2) key.alphaarg_b4[1] |= (context->ff.tex_stage[s][D3DTSS_ALPHAARG1] >> 4) << s; 1781 if (used_a & 0x4) key.alphaarg_b4[2] |= (context->ff.tex_stage[s][D3DTSS_ALPHAARG2] >> 4) << s; 1782 } 1783 key.ts[s].resultarg = context->ff.tex_stage[s][D3DTSS_RESULTARG] == D3DTA_TEMP; 1784 1785 if (context->texture[s].enabled) { 1786 switch (context->texture[s].type) { 1787 case D3DRTYPE_TEXTURE: key.ts[s].textarget = 1; break; 1788 case D3DRTYPE_VOLUMETEXTURE: key.ts[s].textarget = 2; break; 1789 case D3DRTYPE_CUBETEXTURE: key.ts[s].textarget = 3; break; 1790 default: 1791 assert(!"unexpected texture type"); 1792 break; 1793 } 1794 } else { 1795 key.ts[s].textarget = 1; 1796 } 1797 } 1798 1799 /* Note: If colorop is D3DTOP_DISABLE for the first stage 1800 * (which implies alphaop is too), nothing particular happens, 1801 * that is, current is equal to diffuse (which is the case anyway, 1802 * because it is how it is initialized). 1803 * Special case seems if alphaop is D3DTOP_DISABLE and not colorop, 1804 * because then if the resultarg is TEMP, then diffuse alpha is written 1805 * to it. */ 1806 if (key.ts[0].colorop != D3DTOP_DISABLE && 1807 key.ts[0].alphaop == D3DTOP_DISABLE && 1808 key.ts[0].resultarg != 0) { 1809 key.ts[0].alphaop = D3DTOP_SELECTARG1; 1810 key.ts[0].alphaarg1 = D3DTA_DIFFUSE; 1811 } 1812 /* When no alpha stage writes to current, diffuse alpha is taken. 1813 * Since we initialize current to diffuse, we have the behaviour. */ 1814 1815 /* Last stage always writes to Current */ 1816 if (s >= 1) 1817 key.ts[s-1].resultarg = 0; 1818 1819 key.projected = nine_ff_get_projected_key(context); 1820 key.specular = !!context->rs[D3DRS_SPECULARENABLE]; 1821 1822 for (; s < 8; ++s) 1823 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE; 1824 if (context->rs[D3DRS_FOGENABLE]) 1825 key.fog_mode = context->rs[D3DRS_FOGTABLEMODE]; 1826 key.fog = !!context->rs[D3DRS_FOGENABLE]; 1827 /* Pixel fog (with WFOG advertised): source is either Z or W. 1828 * W is the source if vs ff is used, and the 1829 * projection matrix is not orthogonal. 1830 * Tests on Win 10 seem to indicate _34 1831 * and _33 are checked against 0, 1. */ 1832 if (key.fog_mode && key.fog) 1833 key.fog_source = !context->programmable_vs && 1834 !(projection_matrix->_34 == 0.0f && 1835 projection_matrix->_44 == 1.0f); 1836 1837 ps = util_hash_table_get(device->ff.ht_ps, &key); 1838 if (ps) 1839 return ps; 1840 NinePixelShader9_new(device, &ps, NULL, nine_ff_build_ps(device, &key)); 1841 1842 nine_ff_prune_ps(device); 1843 if (ps) { 1844 memcpy(&ps->ff_key, &key, sizeof(ps->ff_key)); 1845 1846 err = util_hash_table_set(device->ff.ht_ps, &ps->ff_key, ps); 1847 (void)err; 1848 assert(err == PIPE_OK); 1849 device->ff.num_ps++; 1850 NineUnknown_ConvertRefToBind(NineUnknown(ps)); 1851 1852 ps->rt_mask = 0x1; 1853 ps->sampler_mask = sampler_mask; 1854 } 1855 return ps; 1856 } 1857 1858 static void 1859 nine_ff_load_vs_transforms(struct NineDevice9 *device) 1860 { 1861 struct nine_context *context = &device->context; 1862 D3DMATRIX T; 1863 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const; 1864 unsigned i; 1865 1866 /* TODO: make this nicer, and only upload the ones we need */ 1867 /* TODO: use ff.vs_const as storage of W, V, P matrices */ 1868 1869 if (IS_D3DTS_DIRTY(context, WORLD) || 1870 IS_D3DTS_DIRTY(context, VIEW) || 1871 IS_D3DTS_DIRTY(context, PROJECTION)) { 1872 /* WVP, WV matrices */ 1873 nine_d3d_matrix_matrix_mul(&M[1], GET_D3DTS(WORLD), GET_D3DTS(VIEW)); 1874 nine_d3d_matrix_matrix_mul(&M[0], &M[1], GET_D3DTS(PROJECTION)); 1875 1876 /* normal matrix == transpose(inverse(WV)) */ 1877 nine_d3d_matrix_inverse(&T, &M[1]); 1878 nine_d3d_matrix_transpose(&M[4], &T); 1879 1880 /* P matrix */ 1881 M[2] = *GET_D3DTS(PROJECTION); 1882 1883 /* V and W matrix */ 1884 nine_d3d_matrix_inverse(&M[3], GET_D3DTS(VIEW)); 1885 M[40] = M[1]; 1886 } 1887 1888 if (context->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) { 1889 /* load other world matrices */ 1890 for (i = 1; i <= 8; ++i) { 1891 nine_d3d_matrix_matrix_mul(&M[40 + i], GET_D3DTS(WORLDMATRIX(i)), GET_D3DTS(VIEW)); 1892 } 1893 } 1894 1895 device->ff.vs_const[30 * 4] = asfloat(context->rs[D3DRS_TWEENFACTOR]); 1896 } 1897 1898 static void 1899 nine_ff_load_lights(struct NineDevice9 *device) 1900 { 1901 struct nine_context *context = &device->context; 1902 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const; 1903 unsigned l; 1904 1905 if (context->changed.group & NINE_STATE_FF_MATERIAL) { 1906 const D3DMATERIAL9 *mtl = &context->ff.material; 1907 1908 memcpy(&dst[20], &mtl->Diffuse, 4 * sizeof(float)); 1909 memcpy(&dst[21], &mtl->Ambient, 4 * sizeof(float)); 1910 memcpy(&dst[22], &mtl->Specular, 4 * sizeof(float)); 1911 dst[23].x = mtl->Power; 1912 memcpy(&dst[24], &mtl->Emissive, 4 * sizeof(float)); 1913 d3dcolor_to_rgba(&dst[25].x, context->rs[D3DRS_AMBIENT]); 1914 dst[19].x = dst[25].x * mtl->Ambient.r + mtl->Emissive.r; 1915 dst[19].y = dst[25].y * mtl->Ambient.g + mtl->Emissive.g; 1916 dst[19].z = dst[25].z * mtl->Ambient.b + mtl->Emissive.b; 1917 } 1918 1919 if (!(context->changed.group & NINE_STATE_FF_LIGHTING)) 1920 return; 1921 1922 for (l = 0; l < context->ff.num_lights_active; ++l) { 1923 const D3DLIGHT9 *light = &context->ff.light[context->ff.active_light[l]]; 1924 1925 dst[32 + l * 8].x = light->Type; 1926 dst[32 + l * 8].y = light->Attenuation0; 1927 dst[32 + l * 8].z = light->Attenuation1; 1928 dst[32 + l * 8].w = light->Attenuation2; 1929 memcpy(&dst[33 + l * 8].x, &light->Diffuse, sizeof(light->Diffuse)); 1930 memcpy(&dst[34 + l * 8].x, &light->Specular, sizeof(light->Specular)); 1931 memcpy(&dst[35 + l * 8].x, &light->Ambient, sizeof(light->Ambient)); 1932 nine_d3d_vector4_matrix_mul((D3DVECTOR *)&dst[36 + l * 8].x, &light->Position, GET_D3DTS(VIEW)); 1933 nine_d3d_vector3_matrix_mul((D3DVECTOR *)&dst[37 + l * 8].x, &light->Direction, GET_D3DTS(VIEW)); 1934 dst[36 + l * 8].w = light->Type == D3DLIGHT_DIRECTIONAL ? 1e9f : light->Range; 1935 dst[37 + l * 8].w = light->Falloff; 1936 dst[38 + l * 8].x = cosf(light->Theta * 0.5f); 1937 dst[38 + l * 8].y = cosf(light->Phi * 0.5f); 1938 dst[38 + l * 8].z = 1.0f / (dst[38 + l * 8].x - dst[38 + l * 8].y); 1939 dst[39 + l * 8].w = (l + 1) == context->ff.num_lights_active; 1940 } 1941 } 1942 1943 static void 1944 nine_ff_load_point_and_fog_params(struct NineDevice9 *device) 1945 { 1946 struct nine_context *context = &device->context; 1947 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const; 1948 1949 if (!(context->changed.group & NINE_STATE_FF_OTHER)) 1950 return; 1951 dst[26].x = asfloat(context->rs[D3DRS_POINTSIZE_MIN]); 1952 dst[26].y = asfloat(context->rs[D3DRS_POINTSIZE_MAX]); 1953 dst[26].z = asfloat(context->rs[D3DRS_POINTSIZE]); 1954 dst[26].w = asfloat(context->rs[D3DRS_POINTSCALE_A]); 1955 dst[27].x = asfloat(context->rs[D3DRS_POINTSCALE_B]); 1956 dst[27].y = asfloat(context->rs[D3DRS_POINTSCALE_C]); 1957 dst[28].x = asfloat(context->rs[D3DRS_FOGEND]); 1958 dst[28].y = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART])); 1959 if (isinf(dst[28].y)) 1960 dst[28].y = 0.0f; 1961 dst[28].z = asfloat(context->rs[D3DRS_FOGDENSITY]); 1962 } 1963 1964 static void 1965 nine_ff_load_tex_matrices(struct NineDevice9 *device) 1966 { 1967 struct nine_context *context = &device->context; 1968 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const; 1969 unsigned s; 1970 1971 if (!(context->ff.changed.transform[0] & 0xff0000)) 1972 return; 1973 for (s = 0; s < 8; ++s) { 1974 if (IS_D3DTS_DIRTY(context, TEXTURE0 + s)) 1975 nine_d3d_matrix_transpose(&M[32 + s], nine_state_access_transform(&context->ff, D3DTS_TEXTURE0 + s, FALSE)); 1976 } 1977 } 1978 1979 static void 1980 nine_ff_load_ps_params(struct NineDevice9 *device) 1981 { 1982 struct nine_context *context = &device->context; 1983 struct fvec4 *dst = (struct fvec4 *)device->ff.ps_const; 1984 unsigned s; 1985 1986 if (!(context->changed.group & (NINE_STATE_FF_PSSTAGES | NINE_STATE_FF_OTHER))) 1987 return; 1988 1989 for (s = 0; s < 8; ++s) 1990 d3dcolor_to_rgba(&dst[s].x, context->ff.tex_stage[s][D3DTSS_CONSTANT]); 1991 1992 for (s = 0; s < 8; ++s) { 1993 dst[8 + s].x = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT00]); 1994 dst[8 + s].y = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT01]); 1995 dst[8 + s].z = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT10]); 1996 dst[8 + s].w = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT11]); 1997 if (s & 1) { 1998 dst[16 + s / 2].z = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]); 1999 dst[16 + s / 2].w = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]); 2000 } else { 2001 dst[16 + s / 2].x = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]); 2002 dst[16 + s / 2].y = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]); 2003 } 2004 } 2005 2006 d3dcolor_to_rgba(&dst[20].x, context->rs[D3DRS_TEXTUREFACTOR]); 2007 d3dcolor_to_rgba(&dst[21].x, context->rs[D3DRS_FOGCOLOR]); 2008 dst[22].x = asfloat(context->rs[D3DRS_FOGEND]); 2009 dst[22].y = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART])); 2010 dst[22].z = asfloat(context->rs[D3DRS_FOGDENSITY]); 2011 } 2012 2013 static void 2014 nine_ff_load_viewport_info(struct NineDevice9 *device) 2015 { 2016 D3DVIEWPORT9 *viewport = &device->context.viewport; 2017 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const; 2018 float diffZ = viewport->MaxZ - viewport->MinZ; 2019 2020 /* Note: the other functions avoids to fill the const again if nothing changed. 2021 * But we don't have much to fill, and adding code to allow that may be complex 2022 * so just fill it always */ 2023 dst[100].x = 2.0f / (float)(viewport->Width); 2024 dst[100].y = 2.0f / (float)(viewport->Height); 2025 dst[100].z = (diffZ == 0.0f) ? 0.0f : (1.0f / diffZ); 2026 dst[100].w = (float)(viewport->Width); 2027 dst[101].x = (float)(viewport->X); 2028 dst[101].y = (float)(viewport->Y); 2029 dst[101].z = (float)(viewport->MinZ); 2030 } 2031 2032 void 2033 nine_ff_update(struct NineDevice9 *device) 2034 { 2035 struct nine_context *context = &device->context; 2036 struct pipe_constant_buffer cb; 2037 2038 DBG("vs=%p ps=%p\n", context->vs, context->ps); 2039 2040 /* NOTE: the only reference belongs to the hash table */ 2041 if (!context->programmable_vs) { 2042 device->ff.vs = nine_ff_get_vs(device); 2043 context->changed.group |= NINE_STATE_VS; 2044 } 2045 if (!context->ps) { 2046 device->ff.ps = nine_ff_get_ps(device); 2047 context->changed.group |= NINE_STATE_PS; 2048 } 2049 2050 if (!context->programmable_vs) { 2051 nine_ff_load_vs_transforms(device); 2052 nine_ff_load_tex_matrices(device); 2053 nine_ff_load_lights(device); 2054 nine_ff_load_point_and_fog_params(device); 2055 nine_ff_load_viewport_info(device); 2056 2057 memset(context->ff.changed.transform, 0, sizeof(context->ff.changed.transform)); 2058 2059 cb.buffer_offset = 0; 2060 cb.buffer = NULL; 2061 cb.user_buffer = device->ff.vs_const; 2062 cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float); 2063 2064 if (!device->driver_caps.user_cbufs) { 2065 context->pipe_data.cb_vs_ff.buffer_size = cb.buffer_size; 2066 u_upload_data(device->constbuf_uploader, 2067 0, 2068 cb.buffer_size, 2069 device->constbuf_alignment, 2070 cb.user_buffer, 2071 &context->pipe_data.cb_vs_ff.buffer_offset, 2072 &context->pipe_data.cb_vs_ff.buffer); 2073 u_upload_unmap(device->constbuf_uploader); 2074 context->pipe_data.cb_vs_ff.user_buffer = NULL; 2075 } else 2076 context->pipe_data.cb_vs_ff = cb; 2077 context->commit |= NINE_STATE_COMMIT_CONST_VS; 2078 } 2079 2080 if (!context->ps) { 2081 nine_ff_load_ps_params(device); 2082 2083 cb.buffer_offset = 0; 2084 cb.buffer = NULL; 2085 cb.user_buffer = device->ff.ps_const; 2086 cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float); 2087 2088 if (!device->driver_caps.user_cbufs) { 2089 context->pipe_data.cb_ps_ff.buffer_size = cb.buffer_size; 2090 u_upload_data(device->constbuf_uploader, 2091 0, 2092 cb.buffer_size, 2093 device->constbuf_alignment, 2094 cb.user_buffer, 2095 &context->pipe_data.cb_ps_ff.buffer_offset, 2096 &context->pipe_data.cb_ps_ff.buffer); 2097 u_upload_unmap(device->constbuf_uploader); 2098 context->pipe_data.cb_ps_ff.user_buffer = NULL; 2099 } else 2100 context->pipe_data.cb_ps_ff = cb; 2101 context->commit |= NINE_STATE_COMMIT_CONST_PS; 2102 } 2103 2104 context->changed.group &= ~NINE_STATE_FF; 2105 } 2106 2107 2108 boolean 2109 nine_ff_init(struct NineDevice9 *device) 2110 { 2111 device->ff.ht_vs = util_hash_table_create(nine_ff_vs_key_hash, 2112 nine_ff_vs_key_comp); 2113 device->ff.ht_ps = util_hash_table_create(nine_ff_ps_key_hash, 2114 nine_ff_ps_key_comp); 2115 2116 device->ff.ht_fvf = util_hash_table_create(nine_ff_fvf_key_hash, 2117 nine_ff_fvf_key_comp); 2118 2119 device->ff.vs_const = CALLOC(NINE_FF_NUM_VS_CONST, 4 * sizeof(float)); 2120 device->ff.ps_const = CALLOC(NINE_FF_NUM_PS_CONST, 4 * sizeof(float)); 2121 2122 return device->ff.ht_vs && device->ff.ht_ps && 2123 device->ff.ht_fvf && 2124 device->ff.vs_const && device->ff.ps_const; 2125 } 2126 2127 static enum pipe_error nine_ff_ht_delete_cb(void *key, void *value, void *data) 2128 { 2129 NineUnknown_Unbind(NineUnknown(value)); 2130 return PIPE_OK; 2131 } 2132 2133 void 2134 nine_ff_fini(struct NineDevice9 *device) 2135 { 2136 if (device->ff.ht_vs) { 2137 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL); 2138 util_hash_table_destroy(device->ff.ht_vs); 2139 } 2140 if (device->ff.ht_ps) { 2141 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL); 2142 util_hash_table_destroy(device->ff.ht_ps); 2143 } 2144 if (device->ff.ht_fvf) { 2145 util_hash_table_foreach(device->ff.ht_fvf, nine_ff_ht_delete_cb, NULL); 2146 util_hash_table_destroy(device->ff.ht_fvf); 2147 } 2148 device->ff.vs = NULL; /* destroyed by unbinding from hash table */ 2149 device->ff.ps = NULL; 2150 2151 FREE(device->ff.vs_const); 2152 FREE(device->ff.ps_const); 2153 } 2154 2155 static void 2156 nine_ff_prune_vs(struct NineDevice9 *device) 2157 { 2158 struct nine_context *context = &device->context; 2159 2160 if (device->ff.num_vs > 100) { 2161 /* could destroy the bound one here, so unbind */ 2162 context->pipe->bind_vs_state(context->pipe, NULL); 2163 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL); 2164 util_hash_table_clear(device->ff.ht_vs); 2165 device->ff.num_vs = 0; 2166 context->changed.group |= NINE_STATE_VS; 2167 } 2168 } 2169 static void 2170 nine_ff_prune_ps(struct NineDevice9 *device) 2171 { 2172 struct nine_context *context = &device->context; 2173 2174 if (device->ff.num_ps > 100) { 2175 /* could destroy the bound one here, so unbind */ 2176 context->pipe->bind_fs_state(context->pipe, NULL); 2177 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL); 2178 util_hash_table_clear(device->ff.ht_ps); 2179 device->ff.num_ps = 0; 2180 context->changed.group |= NINE_STATE_PS; 2181 } 2182 } 2183 2184 /* ========================================================================== */ 2185 2186 /* Matrix multiplication: 2187 * 2188 * in memory: 0 1 2 3 (row major) 2189 * 4 5 6 7 2190 * 8 9 a b 2191 * c d e f 2192 * 2193 * cA cB cC cD 2194 * r0 = (r0 * cA) (r0 * cB) . . 2195 * r1 = (r1 * cA) (r1 * cB) 2196 * r2 = (r2 * cA) . 2197 * r3 = (r3 * cA) . 2198 * 2199 * r: (11) (12) (13) (14) 2200 * (21) (22) (23) (24) 2201 * (31) (32) (33) (34) 2202 * (41) (42) (43) (44) 2203 * l: (11 12 13 14) 2204 * (21 22 23 24) 2205 * (31 32 33 34) 2206 * (41 42 43 44) 2207 * 2208 * v: (x y z 1 ) 2209 * 2210 * t.xyzw = MUL(v.xxxx, r[0]); 2211 * t.xyzw = MAD(v.yyyy, r[1], t.xyzw); 2212 * t.xyzw = MAD(v.zzzz, r[2], t.xyzw); 2213 * v.xyzw = MAD(v.wwww, r[3], t.xyzw); 2214 * 2215 * v.x = DP4(v, c[0]); 2216 * v.y = DP4(v, c[1]); 2217 * v.z = DP4(v, c[2]); 2218 * v.w = DP4(v, c[3]) = 1 2219 */ 2220 2221 /* 2222 static void 2223 nine_D3DMATRIX_print(const D3DMATRIX *M) 2224 { 2225 DBG("\n(%f %f %f %f)\n" 2226 "(%f %f %f %f)\n" 2227 "(%f %f %f %f)\n" 2228 "(%f %f %f %f)\n", 2229 M->m[0][0], M->m[0][1], M->m[0][2], M->m[0][3], 2230 M->m[1][0], M->m[1][1], M->m[1][2], M->m[1][3], 2231 M->m[2][0], M->m[2][1], M->m[2][2], M->m[2][3], 2232 M->m[3][0], M->m[3][1], M->m[3][2], M->m[3][3]); 2233 } 2234 */ 2235 2236 static inline float 2237 nine_DP4_row_col(const D3DMATRIX *A, int r, const D3DMATRIX *B, int c) 2238 { 2239 return A->m[r][0] * B->m[0][c] + 2240 A->m[r][1] * B->m[1][c] + 2241 A->m[r][2] * B->m[2][c] + 2242 A->m[r][3] * B->m[3][c]; 2243 } 2244 2245 static inline float 2246 nine_DP4_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c) 2247 { 2248 return v->x * M->m[0][c] + 2249 v->y * M->m[1][c] + 2250 v->z * M->m[2][c] + 2251 1.0f * M->m[3][c]; 2252 } 2253 2254 static inline float 2255 nine_DP3_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c) 2256 { 2257 return v->x * M->m[0][c] + 2258 v->y * M->m[1][c] + 2259 v->z * M->m[2][c]; 2260 } 2261 2262 void 2263 nine_d3d_matrix_matrix_mul(D3DMATRIX *D, const D3DMATRIX *L, const D3DMATRIX *R) 2264 { 2265 D->_11 = nine_DP4_row_col(L, 0, R, 0); 2266 D->_12 = nine_DP4_row_col(L, 0, R, 1); 2267 D->_13 = nine_DP4_row_col(L, 0, R, 2); 2268 D->_14 = nine_DP4_row_col(L, 0, R, 3); 2269 2270 D->_21 = nine_DP4_row_col(L, 1, R, 0); 2271 D->_22 = nine_DP4_row_col(L, 1, R, 1); 2272 D->_23 = nine_DP4_row_col(L, 1, R, 2); 2273 D->_24 = nine_DP4_row_col(L, 1, R, 3); 2274 2275 D->_31 = nine_DP4_row_col(L, 2, R, 0); 2276 D->_32 = nine_DP4_row_col(L, 2, R, 1); 2277 D->_33 = nine_DP4_row_col(L, 2, R, 2); 2278 D->_34 = nine_DP4_row_col(L, 2, R, 3); 2279 2280 D->_41 = nine_DP4_row_col(L, 3, R, 0); 2281 D->_42 = nine_DP4_row_col(L, 3, R, 1); 2282 D->_43 = nine_DP4_row_col(L, 3, R, 2); 2283 D->_44 = nine_DP4_row_col(L, 3, R, 3); 2284 } 2285 2286 void 2287 nine_d3d_vector4_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M) 2288 { 2289 d->x = nine_DP4_vec_col(v, M, 0); 2290 d->y = nine_DP4_vec_col(v, M, 1); 2291 d->z = nine_DP4_vec_col(v, M, 2); 2292 } 2293 2294 void 2295 nine_d3d_vector3_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M) 2296 { 2297 d->x = nine_DP3_vec_col(v, M, 0); 2298 d->y = nine_DP3_vec_col(v, M, 1); 2299 d->z = nine_DP3_vec_col(v, M, 2); 2300 } 2301 2302 void 2303 nine_d3d_matrix_transpose(D3DMATRIX *D, const D3DMATRIX *M) 2304 { 2305 unsigned i, j; 2306 for (i = 0; i < 4; ++i) 2307 for (j = 0; j < 4; ++j) 2308 D->m[i][j] = M->m[j][i]; 2309 } 2310 2311 #define _M_ADD_PROD_1i_2j_3k_4l(i,j,k,l) do { \ 2312 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \ 2313 if (t > 0.0f) pos += t; else neg += t; } while(0) 2314 2315 #define _M_SUB_PROD_1i_2j_3k_4l(i,j,k,l) do { \ 2316 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \ 2317 if (t > 0.0f) neg -= t; else pos -= t; } while(0) 2318 float 2319 nine_d3d_matrix_det(const D3DMATRIX *M) 2320 { 2321 float pos = 0.0f; 2322 float neg = 0.0f; 2323 2324 _M_ADD_PROD_1i_2j_3k_4l(1, 2, 3, 4); 2325 _M_ADD_PROD_1i_2j_3k_4l(1, 3, 4, 2); 2326 _M_ADD_PROD_1i_2j_3k_4l(1, 4, 2, 3); 2327 2328 _M_ADD_PROD_1i_2j_3k_4l(2, 1, 4, 3); 2329 _M_ADD_PROD_1i_2j_3k_4l(2, 3, 1, 4); 2330 _M_ADD_PROD_1i_2j_3k_4l(2, 4, 3, 1); 2331 2332 _M_ADD_PROD_1i_2j_3k_4l(3, 1, 2, 4); 2333 _M_ADD_PROD_1i_2j_3k_4l(3, 2, 4, 1); 2334 _M_ADD_PROD_1i_2j_3k_4l(3, 4, 1, 2); 2335 2336 _M_ADD_PROD_1i_2j_3k_4l(4, 1, 3, 2); 2337 _M_ADD_PROD_1i_2j_3k_4l(4, 2, 1, 3); 2338 _M_ADD_PROD_1i_2j_3k_4l(4, 3, 2, 1); 2339 2340 _M_SUB_PROD_1i_2j_3k_4l(1, 2, 4, 3); 2341 _M_SUB_PROD_1i_2j_3k_4l(1, 3, 2, 4); 2342 _M_SUB_PROD_1i_2j_3k_4l(1, 4, 3, 2); 2343 2344 _M_SUB_PROD_1i_2j_3k_4l(2, 1, 3, 4); 2345 _M_SUB_PROD_1i_2j_3k_4l(2, 3, 4, 1); 2346 _M_SUB_PROD_1i_2j_3k_4l(2, 4, 1, 3); 2347 2348 _M_SUB_PROD_1i_2j_3k_4l(3, 1, 4, 2); 2349 _M_SUB_PROD_1i_2j_3k_4l(3, 2, 1, 4); 2350 _M_SUB_PROD_1i_2j_3k_4l(3, 4, 2, 1); 2351 2352 _M_SUB_PROD_1i_2j_3k_4l(4, 1, 2, 3); 2353 _M_SUB_PROD_1i_2j_3k_4l(4, 2, 3, 1); 2354 _M_SUB_PROD_1i_2j_3k_4l(4, 3, 1, 2); 2355 2356 return pos + neg; 2357 } 2358 2359 /* XXX: Probably better to just use src/mesa/math/m_matrix.c because 2360 * I have no idea where this code came from. 2361 */ 2362 void 2363 nine_d3d_matrix_inverse(D3DMATRIX *D, const D3DMATRIX *M) 2364 { 2365 int i, k; 2366 float det; 2367 2368 D->m[0][0] = 2369 M->m[1][1] * M->m[2][2] * M->m[3][3] - 2370 M->m[1][1] * M->m[3][2] * M->m[2][3] - 2371 M->m[1][2] * M->m[2][1] * M->m[3][3] + 2372 M->m[1][2] * M->m[3][1] * M->m[2][3] + 2373 M->m[1][3] * M->m[2][1] * M->m[3][2] - 2374 M->m[1][3] * M->m[3][1] * M->m[2][2]; 2375 2376 D->m[0][1] = 2377 -M->m[0][1] * M->m[2][2] * M->m[3][3] + 2378 M->m[0][1] * M->m[3][2] * M->m[2][3] + 2379 M->m[0][2] * M->m[2][1] * M->m[3][3] - 2380 M->m[0][2] * M->m[3][1] * M->m[2][3] - 2381 M->m[0][3] * M->m[2][1] * M->m[3][2] + 2382 M->m[0][3] * M->m[3][1] * M->m[2][2]; 2383 2384 D->m[0][2] = 2385 M->m[0][1] * M->m[1][2] * M->m[3][3] - 2386 M->m[0][1] * M->m[3][2] * M->m[1][3] - 2387 M->m[0][2] * M->m[1][1] * M->m[3][3] + 2388 M->m[0][2] * M->m[3][1] * M->m[1][3] + 2389 M->m[0][3] * M->m[1][1] * M->m[3][2] - 2390 M->m[0][3] * M->m[3][1] * M->m[1][2]; 2391 2392 D->m[0][3] = 2393 -M->m[0][1] * M->m[1][2] * M->m[2][3] + 2394 M->m[0][1] * M->m[2][2] * M->m[1][3] + 2395 M->m[0][2] * M->m[1][1] * M->m[2][3] - 2396 M->m[0][2] * M->m[2][1] * M->m[1][3] - 2397 M->m[0][3] * M->m[1][1] * M->m[2][2] + 2398 M->m[0][3] * M->m[2][1] * M->m[1][2]; 2399 2400 D->m[1][0] = 2401 -M->m[1][0] * M->m[2][2] * M->m[3][3] + 2402 M->m[1][0] * M->m[3][2] * M->m[2][3] + 2403 M->m[1][2] * M->m[2][0] * M->m[3][3] - 2404 M->m[1][2] * M->m[3][0] * M->m[2][3] - 2405 M->m[1][3] * M->m[2][0] * M->m[3][2] + 2406 M->m[1][3] * M->m[3][0] * M->m[2][2]; 2407 2408 D->m[1][1] = 2409 M->m[0][0] * M->m[2][2] * M->m[3][3] - 2410 M->m[0][0] * M->m[3][2] * M->m[2][3] - 2411 M->m[0][2] * M->m[2][0] * M->m[3][3] + 2412 M->m[0][2] * M->m[3][0] * M->m[2][3] + 2413 M->m[0][3] * M->m[2][0] * M->m[3][2] - 2414 M->m[0][3] * M->m[3][0] * M->m[2][2]; 2415 2416 D->m[1][2] = 2417 -M->m[0][0] * M->m[1][2] * M->m[3][3] + 2418 M->m[0][0] * M->m[3][2] * M->m[1][3] + 2419 M->m[0][2] * M->m[1][0] * M->m[3][3] - 2420 M->m[0][2] * M->m[3][0] * M->m[1][3] - 2421 M->m[0][3] * M->m[1][0] * M->m[3][2] + 2422 M->m[0][3] * M->m[3][0] * M->m[1][2]; 2423 2424 D->m[1][3] = 2425 M->m[0][0] * M->m[1][2] * M->m[2][3] - 2426 M->m[0][0] * M->m[2][2] * M->m[1][3] - 2427 M->m[0][2] * M->m[1][0] * M->m[2][3] + 2428 M->m[0][2] * M->m[2][0] * M->m[1][3] + 2429 M->m[0][3] * M->m[1][0] * M->m[2][2] - 2430 M->m[0][3] * M->m[2][0] * M->m[1][2]; 2431 2432 D->m[2][0] = 2433 M->m[1][0] * M->m[2][1] * M->m[3][3] - 2434 M->m[1][0] * M->m[3][1] * M->m[2][3] - 2435 M->m[1][1] * M->m[2][0] * M->m[3][3] + 2436 M->m[1][1] * M->m[3][0] * M->m[2][3] + 2437 M->m[1][3] * M->m[2][0] * M->m[3][1] - 2438 M->m[1][3] * M->m[3][0] * M->m[2][1]; 2439 2440 D->m[2][1] = 2441 -M->m[0][0] * M->m[2][1] * M->m[3][3] + 2442 M->m[0][0] * M->m[3][1] * M->m[2][3] + 2443 M->m[0][1] * M->m[2][0] * M->m[3][3] - 2444 M->m[0][1] * M->m[3][0] * M->m[2][3] - 2445 M->m[0][3] * M->m[2][0] * M->m[3][1] + 2446 M->m[0][3] * M->m[3][0] * M->m[2][1]; 2447 2448 D->m[2][2] = 2449 M->m[0][0] * M->m[1][1] * M->m[3][3] - 2450 M->m[0][0] * M->m[3][1] * M->m[1][3] - 2451 M->m[0][1] * M->m[1][0] * M->m[3][3] + 2452 M->m[0][1] * M->m[3][0] * M->m[1][3] + 2453 M->m[0][3] * M->m[1][0] * M->m[3][1] - 2454 M->m[0][3] * M->m[3][0] * M->m[1][1]; 2455 2456 D->m[2][3] = 2457 -M->m[0][0] * M->m[1][1] * M->m[2][3] + 2458 M->m[0][0] * M->m[2][1] * M->m[1][3] + 2459 M->m[0][1] * M->m[1][0] * M->m[2][3] - 2460 M->m[0][1] * M->m[2][0] * M->m[1][3] - 2461 M->m[0][3] * M->m[1][0] * M->m[2][1] + 2462 M->m[0][3] * M->m[2][0] * M->m[1][1]; 2463 2464 D->m[3][0] = 2465 -M->m[1][0] * M->m[2][1] * M->m[3][2] + 2466 M->m[1][0] * M->m[3][1] * M->m[2][2] + 2467 M->m[1][1] * M->m[2][0] * M->m[3][2] - 2468 M->m[1][1] * M->m[3][0] * M->m[2][2] - 2469 M->m[1][2] * M->m[2][0] * M->m[3][1] + 2470 M->m[1][2] * M->m[3][0] * M->m[2][1]; 2471 2472 D->m[3][1] = 2473 M->m[0][0] * M->m[2][1] * M->m[3][2] - 2474 M->m[0][0] * M->m[3][1] * M->m[2][2] - 2475 M->m[0][1] * M->m[2][0] * M->m[3][2] + 2476 M->m[0][1] * M->m[3][0] * M->m[2][2] + 2477 M->m[0][2] * M->m[2][0] * M->m[3][1] - 2478 M->m[0][2] * M->m[3][0] * M->m[2][1]; 2479 2480 D->m[3][2] = 2481 -M->m[0][0] * M->m[1][1] * M->m[3][2] + 2482 M->m[0][0] * M->m[3][1] * M->m[1][2] + 2483 M->m[0][1] * M->m[1][0] * M->m[3][2] - 2484 M->m[0][1] * M->m[3][0] * M->m[1][2] - 2485 M->m[0][2] * M->m[1][0] * M->m[3][1] + 2486 M->m[0][2] * M->m[3][0] * M->m[1][1]; 2487 2488 D->m[3][3] = 2489 M->m[0][0] * M->m[1][1] * M->m[2][2] - 2490 M->m[0][0] * M->m[2][1] * M->m[1][2] - 2491 M->m[0][1] * M->m[1][0] * M->m[2][2] + 2492 M->m[0][1] * M->m[2][0] * M->m[1][2] + 2493 M->m[0][2] * M->m[1][0] * M->m[2][1] - 2494 M->m[0][2] * M->m[2][0] * M->m[1][1]; 2495 2496 det = 2497 M->m[0][0] * D->m[0][0] + 2498 M->m[1][0] * D->m[0][1] + 2499 M->m[2][0] * D->m[0][2] + 2500 M->m[3][0] * D->m[0][3]; 2501 2502 if (det < 1e-30) {/* non inversible */ 2503 *D = *M; /* wine tests */ 2504 return; 2505 } 2506 2507 det = 1.0 / det; 2508 2509 for (i = 0; i < 4; i++) 2510 for (k = 0; k < 4; k++) 2511 D->m[i][k] *= det; 2512 2513 #ifdef DEBUG 2514 { 2515 D3DMATRIX I; 2516 2517 nine_d3d_matrix_matrix_mul(&I, D, M); 2518 2519 for (i = 0; i < 4; ++i) 2520 for (k = 0; k < 4; ++k) 2521 if (fabsf(I.m[i][k] - (float)(i == k)) > 1e-3) 2522 DBG("Matrix inversion check FAILED !\n"); 2523 } 2524 #endif 2525 } 2526