1 2 /* FF is big and ugly so feel free to write lines as long as you like. 3 * Aieeeeeeeee ! 4 * 5 * Let me make that clearer: 6 * Aieeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee ! !! !!! 7 */ 8 9 #include "device9.h" 10 #include "basetexture9.h" 11 #include "vertexdeclaration9.h" 12 #include "vertexshader9.h" 13 #include "pixelshader9.h" 14 #include "nine_ff.h" 15 #include "nine_defines.h" 16 #include "nine_helpers.h" 17 #include "nine_pipe.h" 18 #include "nine_dump.h" 19 20 #include "pipe/p_context.h" 21 #include "tgsi/tgsi_ureg.h" 22 #include "tgsi/tgsi_dump.h" 23 #include "util/u_box.h" 24 #include "util/u_hash_table.h" 25 #include "util/u_upload_mgr.h" 26 27 #define DBG_CHANNEL DBG_FF 28 29 #define NINE_FF_NUM_VS_CONST 196 30 #define NINE_FF_NUM_PS_CONST 24 31 32 struct fvec4 33 { 34 float x, y, z, w; 35 }; 36 37 struct nine_ff_vs_key 38 { 39 union { 40 struct { 41 uint32_t position_t : 1; 42 uint32_t lighting : 1; 43 uint32_t darkness : 1; /* lighting enabled but no active lights */ 44 uint32_t localviewer : 1; 45 uint32_t vertexpointsize : 1; 46 uint32_t pointscale : 1; 47 uint32_t vertexblend : 3; 48 uint32_t vertexblend_indexed : 1; 49 uint32_t vertextween : 1; 50 uint32_t mtl_diffuse : 2; /* 0 = material, 1 = color1, 2 = color2 */ 51 uint32_t mtl_ambient : 2; 52 uint32_t mtl_specular : 2; 53 uint32_t mtl_emissive : 2; 54 uint32_t fog_mode : 2; 55 uint32_t fog_range : 1; 56 uint32_t color0in_one : 1; 57 uint32_t color1in_zero : 1; 58 uint32_t has_normal : 1; 59 uint32_t fog : 1; 60 uint32_t normalizenormals : 1; 61 uint32_t ucp : 1; 62 uint32_t pad1 : 4; 63 uint32_t tc_dim_input: 16; /* 8 * 2 bits */ 64 uint32_t pad2 : 16; 65 uint32_t tc_dim_output: 24; /* 8 * 3 bits */ 66 uint32_t pad3 : 8; 67 uint32_t tc_gen : 24; /* 8 * 3 bits */ 68 uint32_t pad4 : 8; 69 uint32_t tc_idx : 24; 70 uint32_t pad5 : 8; 71 uint32_t passthrough; 72 }; 73 uint64_t value64[3]; /* don't forget to resize VertexShader9.ff_key */ 74 uint32_t value32[6]; 75 }; 76 }; 77 78 /* Texture stage state: 79 * 80 * COLOROP D3DTOP 5 bit 81 * ALPHAOP D3DTOP 5 bit 82 * COLORARG0 D3DTA 3 bit 83 * COLORARG1 D3DTA 3 bit 84 * COLORARG2 D3DTA 3 bit 85 * ALPHAARG0 D3DTA 3 bit 86 * ALPHAARG1 D3DTA 3 bit 87 * ALPHAARG2 D3DTA 3 bit 88 * RESULTARG D3DTA 1 bit (CURRENT:0 or TEMP:1) 89 * TEXCOORDINDEX 0 - 7 3 bit 90 * =========================== 91 * 32 bit per stage 92 */ 93 struct nine_ff_ps_key 94 { 95 union { 96 struct { 97 struct { 98 uint32_t colorop : 5; 99 uint32_t alphaop : 5; 100 uint32_t colorarg0 : 3; 101 uint32_t colorarg1 : 3; 102 uint32_t colorarg2 : 3; 103 uint32_t alphaarg0 : 3; 104 uint32_t alphaarg1 : 3; 105 uint32_t alphaarg2 : 3; 106 uint32_t resultarg : 1; /* CURRENT:0 or TEMP:1 */ 107 uint32_t textarget : 2; /* 1D/2D/3D/CUBE */ 108 uint32_t pad : 1; 109 /* that's 32 bit exactly */ 110 } ts[8]; 111 uint32_t projected : 16; 112 uint32_t fog : 1; /* for vFog coming from VS */ 113 uint32_t fog_mode : 2; 114 uint32_t fog_source : 1; /* 0: Z, 1: W */ 115 uint32_t specular : 1; 116 uint32_t pad1 : 11; /* 9 32-bit words with this */ 117 uint8_t colorarg_b4[3]; 118 uint8_t colorarg_b5[3]; 119 uint8_t alphaarg_b4[3]; /* 11 32-bit words plus a byte */ 120 uint8_t pad2[3]; 121 }; 122 uint64_t value64[6]; /* don't forget to resize PixelShader9.ff_key */ 123 uint32_t value32[12]; 124 }; 125 }; 126 127 static unsigned nine_ff_vs_key_hash(void *key) 128 { 129 struct nine_ff_vs_key *vs = key; 130 unsigned i; 131 uint32_t hash = vs->value32[0]; 132 for (i = 1; i < ARRAY_SIZE(vs->value32); ++i) 133 hash ^= vs->value32[i]; 134 return hash; 135 } 136 static int nine_ff_vs_key_comp(void *key1, void *key2) 137 { 138 struct nine_ff_vs_key *a = (struct nine_ff_vs_key *)key1; 139 struct nine_ff_vs_key *b = (struct nine_ff_vs_key *)key2; 140 141 return memcmp(a->value64, b->value64, sizeof(a->value64)); 142 } 143 static unsigned nine_ff_ps_key_hash(void *key) 144 { 145 struct nine_ff_ps_key *ps = key; 146 unsigned i; 147 uint32_t hash = ps->value32[0]; 148 for (i = 1; i < ARRAY_SIZE(ps->value32); ++i) 149 hash ^= ps->value32[i]; 150 return hash; 151 } 152 static int nine_ff_ps_key_comp(void *key1, void *key2) 153 { 154 struct nine_ff_ps_key *a = (struct nine_ff_ps_key *)key1; 155 struct nine_ff_ps_key *b = (struct nine_ff_ps_key *)key2; 156 157 return memcmp(a->value64, b->value64, sizeof(a->value64)); 158 } 159 static unsigned nine_ff_fvf_key_hash(void *key) 160 { 161 return *(DWORD *)key; 162 } 163 static int nine_ff_fvf_key_comp(void *key1, void *key2) 164 { 165 return *(DWORD *)key1 != *(DWORD *)key2; 166 } 167 168 static void nine_ff_prune_vs(struct NineDevice9 *); 169 static void nine_ff_prune_ps(struct NineDevice9 *); 170 171 static void nine_ureg_tgsi_dump(struct ureg_program *ureg, boolean override) 172 { 173 if (debug_get_bool_option("NINE_FF_DUMP", FALSE) || override) { 174 const struct tgsi_token *toks = ureg_get_tokens(ureg, NULL); 175 tgsi_dump(toks, 0); 176 ureg_free_tokens(toks); 177 } 178 } 179 180 #define _X(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_X) 181 #define _Y(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Y) 182 #define _Z(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Z) 183 #define _W(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_W) 184 185 #define _XXXX(r) ureg_scalar(r, TGSI_SWIZZLE_X) 186 #define _YYYY(r) ureg_scalar(r, TGSI_SWIZZLE_Y) 187 #define _ZZZZ(r) ureg_scalar(r, TGSI_SWIZZLE_Z) 188 #define _WWWW(r) ureg_scalar(r, TGSI_SWIZZLE_W) 189 190 #define _XYZW(r) (r) 191 192 /* AL should contain base address of lights table. */ 193 #define LIGHT_CONST(i) \ 194 ureg_src_indirect(ureg_DECL_constant(ureg, i), _X(AL)) 195 196 #define MATERIAL_CONST(i) \ 197 ureg_DECL_constant(ureg, 19 + (i)) 198 199 #define _CONST(n) ureg_DECL_constant(ureg, n) 200 201 /* VS FF constants layout: 202 * 203 * CONST[ 0.. 3] D3DTS_WORLD * D3DTS_VIEW * D3DTS_PROJECTION 204 * CONST[ 4.. 7] D3DTS_WORLD * D3DTS_VIEW 205 * CONST[ 8..11] D3DTS_PROJECTION 206 * CONST[12..15] D3DTS_VIEW^(-1) 207 * CONST[16..18] Normal matrix 208 * 209 * CONST[19].xyz MATERIAL.Emissive + Material.Ambient * RS.Ambient 210 * CONST[20] MATERIAL.Diffuse 211 * CONST[21] MATERIAL.Ambient 212 * CONST[22] MATERIAL.Specular 213 * CONST[23].x___ MATERIAL.Power 214 * CONST[24] MATERIAL.Emissive 215 * CONST[25] RS.Ambient 216 * 217 * CONST[26].x___ RS.PointSizeMin 218 * CONST[26]._y__ RS.PointSizeMax 219 * CONST[26].__z_ RS.PointSize 220 * CONST[26].___w RS.PointScaleA 221 * CONST[27].x___ RS.PointScaleB 222 * CONST[27]._y__ RS.PointScaleC 223 * 224 * CONST[28].x___ RS.FogEnd 225 * CONST[28]._y__ 1.0f / (RS.FogEnd - RS.FogStart) 226 * CONST[28].__z_ RS.FogDensity 227 228 * CONST[30].x___ TWEENFACTOR 229 * 230 * CONST[32].x___ LIGHT[0].Type 231 * CONST[32]._yzw LIGHT[0].Attenuation0,1,2 232 * CONST[33] LIGHT[0].Diffuse 233 * CONST[34] LIGHT[0].Specular 234 * CONST[35] LIGHT[0].Ambient 235 * CONST[36].xyz_ LIGHT[0].Position 236 * CONST[36].___w LIGHT[0].Range 237 * CONST[37].xyz_ LIGHT[0].Direction 238 * CONST[37].___w LIGHT[0].Falloff 239 * CONST[38].x___ cos(LIGHT[0].Theta / 2) 240 * CONST[38]._y__ cos(LIGHT[0].Phi / 2) 241 * CONST[38].__z_ 1.0f / (cos(LIGHT[0].Theta / 2) - cos(Light[0].Phi / 2)) 242 * CONST[39].xyz_ LIGHT[0].HalfVector (for directional lights) 243 * CONST[39].___w 1 if this is the last active light, 0 if not 244 * CONST[40] LIGHT[1] 245 * CONST[48] LIGHT[2] 246 * CONST[56] LIGHT[3] 247 * CONST[64] LIGHT[4] 248 * CONST[72] LIGHT[5] 249 * CONST[80] LIGHT[6] 250 * CONST[88] LIGHT[7] 251 * NOTE: no lighting code is generated if there are no active lights 252 * 253 * CONST[100].x___ Viewport 2/width 254 * CONST[100]._y__ Viewport 2/height 255 * CONST[100].__z_ Viewport 1/(zmax - zmin) 256 * CONST[100].___w Viewport width 257 * CONST[101].x___ Viewport x0 258 * CONST[101]._y__ Viewport y0 259 * CONST[101].__z_ Viewport z0 260 * 261 * CONST[128..131] D3DTS_TEXTURE0 262 * CONST[132..135] D3DTS_TEXTURE1 263 * CONST[136..139] D3DTS_TEXTURE2 264 * CONST[140..143] D3DTS_TEXTURE3 265 * CONST[144..147] D3DTS_TEXTURE4 266 * CONST[148..151] D3DTS_TEXTURE5 267 * CONST[152..155] D3DTS_TEXTURE6 268 * CONST[156..159] D3DTS_TEXTURE7 269 * 270 * CONST[160] D3DTS_WORLDMATRIX[0] * D3DTS_VIEW 271 * CONST[164] D3DTS_WORLDMATRIX[1] * D3DTS_VIEW 272 * ... 273 * CONST[192] D3DTS_WORLDMATRIX[8] * D3DTS_VIEW 274 */ 275 struct vs_build_ctx 276 { 277 struct ureg_program *ureg; 278 const struct nine_ff_vs_key *key; 279 280 uint16_t input[PIPE_MAX_ATTRIBS]; 281 unsigned num_inputs; 282 283 struct ureg_src aVtx; 284 struct ureg_src aNrm; 285 struct ureg_src aCol[2]; 286 struct ureg_src aTex[8]; 287 struct ureg_src aPsz; 288 struct ureg_src aInd; 289 struct ureg_src aWgt; 290 291 struct ureg_src aVtx1; /* tweening */ 292 struct ureg_src aNrm1; 293 294 struct ureg_src mtlA; 295 struct ureg_src mtlD; 296 struct ureg_src mtlS; 297 struct ureg_src mtlE; 298 }; 299 300 static inline unsigned 301 get_texcoord_sn(struct pipe_screen *screen) 302 { 303 if (screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD)) 304 return TGSI_SEMANTIC_TEXCOORD; 305 return TGSI_SEMANTIC_GENERIC; 306 } 307 308 static inline struct ureg_src 309 build_vs_add_input(struct vs_build_ctx *vs, uint16_t ndecl) 310 { 311 const unsigned i = vs->num_inputs++; 312 assert(i < PIPE_MAX_ATTRIBS); 313 vs->input[i] = ndecl; 314 return ureg_DECL_vs_input(vs->ureg, i); 315 } 316 317 /* NOTE: dst may alias src */ 318 static inline void 319 ureg_normalize3(struct ureg_program *ureg, 320 struct ureg_dst dst, struct ureg_src src) 321 { 322 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 323 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 324 325 ureg_DP3(ureg, tmp_x, src, src); 326 ureg_RSQ(ureg, tmp_x, _X(tmp)); 327 ureg_MUL(ureg, dst, src, _X(tmp)); 328 ureg_release_temporary(ureg, tmp); 329 } 330 331 static void * 332 nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) 333 { 334 const struct nine_ff_vs_key *key = vs->key; 335 struct ureg_program *ureg = ureg_create(PIPE_SHADER_VERTEX); 336 struct ureg_dst oPos, oCol[2], oPsz, oFog; 337 struct ureg_dst AR; 338 unsigned i, c; 339 unsigned label[32], l = 0; 340 boolean need_aNrm = key->lighting || key->passthrough & (1 << NINE_DECLUSAGE_NORMAL); 341 boolean has_aNrm = need_aNrm && key->has_normal; 342 boolean need_aVtx = key->lighting || key->fog_mode || key->pointscale || key->ucp; 343 const unsigned texcoord_sn = get_texcoord_sn(device->screen); 344 345 vs->ureg = ureg; 346 347 /* Check which inputs we should transform. */ 348 for (i = 0; i < 8 * 3; i += 3) { 349 switch ((key->tc_gen >> i) & 0x7) { 350 case NINED3DTSS_TCI_CAMERASPACENORMAL: 351 need_aNrm = TRUE; 352 break; 353 case NINED3DTSS_TCI_CAMERASPACEPOSITION: 354 need_aVtx = TRUE; 355 break; 356 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR: 357 need_aVtx = need_aNrm = TRUE; 358 break; 359 case NINED3DTSS_TCI_SPHEREMAP: 360 need_aVtx = need_aNrm = TRUE; 361 break; 362 default: 363 break; 364 } 365 } 366 367 /* Declare and record used inputs (needed for linkage with vertex format): 368 * (texture coordinates handled later) 369 */ 370 vs->aVtx = build_vs_add_input(vs, 371 key->position_t ? NINE_DECLUSAGE_POSITIONT : NINE_DECLUSAGE_POSITION); 372 373 vs->aNrm = ureg_imm1f(ureg, 0.0f); 374 if (has_aNrm) 375 vs->aNrm = build_vs_add_input(vs, NINE_DECLUSAGE_NORMAL); 376 377 vs->aCol[0] = ureg_imm1f(ureg, 1.0f); 378 vs->aCol[1] = ureg_imm1f(ureg, 0.0f); 379 380 if (key->lighting || key->darkness) { 381 const unsigned mask = key->mtl_diffuse | key->mtl_specular | 382 key->mtl_ambient | key->mtl_emissive; 383 if ((mask & 0x1) && !key->color0in_one) 384 vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0)); 385 if ((mask & 0x2) && !key->color1in_zero) 386 vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1)); 387 388 vs->mtlD = MATERIAL_CONST(1); 389 vs->mtlA = MATERIAL_CONST(2); 390 vs->mtlS = MATERIAL_CONST(3); 391 vs->mtlE = MATERIAL_CONST(5); 392 if (key->mtl_diffuse == 1) vs->mtlD = vs->aCol[0]; else 393 if (key->mtl_diffuse == 2) vs->mtlD = vs->aCol[1]; 394 if (key->mtl_ambient == 1) vs->mtlA = vs->aCol[0]; else 395 if (key->mtl_ambient == 2) vs->mtlA = vs->aCol[1]; 396 if (key->mtl_specular == 1) vs->mtlS = vs->aCol[0]; else 397 if (key->mtl_specular == 2) vs->mtlS = vs->aCol[1]; 398 if (key->mtl_emissive == 1) vs->mtlE = vs->aCol[0]; else 399 if (key->mtl_emissive == 2) vs->mtlE = vs->aCol[1]; 400 } else { 401 if (!key->color0in_one) vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0)); 402 if (!key->color1in_zero) vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1)); 403 } 404 405 if (key->vertexpointsize) 406 vs->aPsz = build_vs_add_input(vs, NINE_DECLUSAGE_PSIZE); 407 408 if (key->vertexblend_indexed || key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) 409 vs->aInd = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDINDICES); 410 if (key->vertexblend || key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) 411 vs->aWgt = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDWEIGHT); 412 if (key->vertextween) { 413 vs->aVtx1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(POSITION,1)); 414 vs->aNrm1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(NORMAL,1)); 415 } 416 417 /* Declare outputs: 418 */ 419 oPos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); /* HPOS */ 420 oCol[0] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0)); 421 oCol[1] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1)); 422 if (key->fog || key->passthrough & (1 << NINE_DECLUSAGE_FOG)) { 423 oFog = ureg_DECL_output(ureg, TGSI_SEMANTIC_FOG, 0); 424 oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X); 425 } 426 427 if (key->vertexpointsize || key->pointscale) { 428 oPsz = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_PSIZE, 0, 429 TGSI_WRITEMASK_X, 0, 1); 430 oPsz = ureg_writemask(oPsz, TGSI_WRITEMASK_X); 431 } 432 433 if (key->lighting || key->vertexblend) 434 AR = ureg_DECL_address(ureg); 435 436 /* === Vertex transformation / vertex blending: 437 */ 438 439 if (key->position_t) { 440 if (device->driver_caps.window_space_position_support) { 441 ureg_MOV(ureg, oPos, vs->aVtx); 442 } else { 443 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 444 /* vs->aVtx contains the coordinates buffer wise. 445 * later in the pipeline, clipping, viewport and division 446 * by w (rhw = 1/w) are going to be applied, so do the reverse 447 * of these transformations (except clipping) to have the good 448 * position at the end.*/ 449 ureg_MOV(ureg, tmp, vs->aVtx); 450 /* X from [X_min, X_min + width] to [-1, 1], same for Y. Z to [0, 1] */ 451 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_negate(_CONST(101))); 452 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(100)); 453 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, -1.0f)); 454 /* Y needs to be reversed */ 455 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_negate(ureg_src(tmp))); 456 /* inverse rhw */ 457 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), _W(tmp)); 458 /* multiply X, Y, Z by w */ 459 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _W(tmp)); 460 ureg_MOV(ureg, oPos, ureg_src(tmp)); 461 ureg_release_temporary(ureg, tmp); 462 } 463 } else if (key->vertexblend) { 464 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 465 struct ureg_dst tmp2 = ureg_DECL_temporary(ureg); 466 struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg); 467 struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg); 468 struct ureg_dst sum_blendweights = ureg_DECL_temporary(ureg); 469 struct ureg_src cWM[4]; 470 471 for (i = 160; i <= 195; ++i) 472 ureg_DECL_constant(ureg, i); 473 474 /* translate world matrix index to constant file index */ 475 if (key->vertexblend_indexed) { 476 ureg_MAD(ureg, tmp, vs->aInd, ureg_imm1f(ureg, 4.0f), ureg_imm1f(ureg, 160.0f)); 477 ureg_ARL(ureg, AR, ureg_src(tmp)); 478 } 479 480 ureg_MOV(ureg, aVtx_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f)); 481 ureg_MOV(ureg, aNrm_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f)); 482 ureg_MOV(ureg, sum_blendweights, ureg_imm4f(ureg, 1.0f, 1.0f, 1.0f, 1.0f)); 483 484 for (i = 0; i < key->vertexblend; ++i) { 485 for (c = 0; c < 4; ++c) { 486 cWM[c] = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, (160 + i * 4) * !key->vertexblend_indexed + c), 0); 487 if (key->vertexblend_indexed) 488 cWM[c] = ureg_src_indirect(cWM[c], ureg_scalar(ureg_src(AR), i)); 489 } 490 491 /* multiply by WORLD(index) */ 492 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), cWM[0]); 493 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), cWM[1], ureg_src(tmp)); 494 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), cWM[2], ureg_src(tmp)); 495 ureg_MAD(ureg, tmp, _WWWW(vs->aVtx), cWM[3], ureg_src(tmp)); 496 497 if (has_aNrm) { 498 /* Note: the spec says the transpose of the inverse of the 499 * WorldView matrices should be used, but all tests show 500 * otherwise. 501 * Only case unknown: D3DVBF_0WEIGHTS */ 502 ureg_MUL(ureg, tmp2, _XXXX(vs->aNrm), cWM[0]); 503 ureg_MAD(ureg, tmp2, _YYYY(vs->aNrm), cWM[1], ureg_src(tmp2)); 504 ureg_MAD(ureg, tmp2, _ZZZZ(vs->aNrm), cWM[2], ureg_src(tmp2)); 505 } 506 507 if (i < (key->vertexblend - 1)) { 508 /* accumulate weighted position value */ 509 ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(vs->aWgt, i), ureg_src(aVtx_dst)); 510 if (has_aNrm) 511 ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(vs->aWgt, i), ureg_src(aNrm_dst)); 512 /* subtract weighted position value for last value */ 513 ureg_ADD(ureg, sum_blendweights, ureg_src(sum_blendweights), ureg_negate(ureg_scalar(vs->aWgt, i))); 514 } 515 } 516 517 /* the last weighted position is always 1 - sum_of_previous_weights */ 518 ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aVtx_dst)); 519 if (has_aNrm) 520 ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aNrm_dst)); 521 522 /* multiply by VIEW_PROJ */ 523 ureg_MUL(ureg, tmp, _X(aVtx_dst), _CONST(8)); 524 ureg_MAD(ureg, tmp, _Y(aVtx_dst), _CONST(9), ureg_src(tmp)); 525 ureg_MAD(ureg, tmp, _Z(aVtx_dst), _CONST(10), ureg_src(tmp)); 526 ureg_MAD(ureg, oPos, _W(aVtx_dst), _CONST(11), ureg_src(tmp)); 527 528 if (need_aVtx) 529 vs->aVtx = ureg_src(aVtx_dst); 530 531 ureg_release_temporary(ureg, tmp); 532 ureg_release_temporary(ureg, tmp2); 533 ureg_release_temporary(ureg, sum_blendweights); 534 if (!need_aVtx) 535 ureg_release_temporary(ureg, aVtx_dst); 536 537 if (has_aNrm) { 538 if (key->normalizenormals) 539 ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst)); 540 vs->aNrm = ureg_src(aNrm_dst); 541 } else 542 ureg_release_temporary(ureg, aNrm_dst); 543 } else { 544 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 545 546 if (key->vertextween) { 547 struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg); 548 ureg_LRP(ureg, aVtx_dst, _XXXX(_CONST(30)), vs->aVtx1, vs->aVtx); 549 vs->aVtx = ureg_src(aVtx_dst); 550 if (has_aNrm) { 551 struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg); 552 ureg_LRP(ureg, aNrm_dst, _XXXX(_CONST(30)), vs->aNrm1, vs->aNrm); 553 vs->aNrm = ureg_src(aNrm_dst); 554 } 555 } 556 557 /* position = vertex * WORLD_VIEW_PROJ */ 558 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(0)); 559 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(1), ureg_src(tmp)); 560 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(2), ureg_src(tmp)); 561 ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(tmp)); 562 ureg_release_temporary(ureg, tmp); 563 564 if (need_aVtx) { 565 struct ureg_dst aVtx_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 566 ureg_MUL(ureg, aVtx_dst, _XXXX(vs->aVtx), _CONST(4)); 567 ureg_MAD(ureg, aVtx_dst, _YYYY(vs->aVtx), _CONST(5), ureg_src(aVtx_dst)); 568 ureg_MAD(ureg, aVtx_dst, _ZZZZ(vs->aVtx), _CONST(6), ureg_src(aVtx_dst)); 569 ureg_MAD(ureg, aVtx_dst, _WWWW(vs->aVtx), _CONST(7), ureg_src(aVtx_dst)); 570 vs->aVtx = ureg_src(aVtx_dst); 571 } 572 if (has_aNrm) { 573 struct ureg_dst aNrm_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 574 ureg_MUL(ureg, aNrm_dst, _XXXX(vs->aNrm), _CONST(16)); 575 ureg_MAD(ureg, aNrm_dst, _YYYY(vs->aNrm), _CONST(17), ureg_src(aNrm_dst)); 576 ureg_MAD(ureg, aNrm_dst, _ZZZZ(vs->aNrm), _CONST(18), ureg_src(aNrm_dst)); 577 if (key->normalizenormals) 578 ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst)); 579 vs->aNrm = ureg_src(aNrm_dst); 580 } 581 } 582 583 /* === Process point size: 584 */ 585 if (key->vertexpointsize || key->pointscale) { 586 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 587 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 588 struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y); 589 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z); 590 if (key->vertexpointsize) { 591 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26); 592 ureg_MAX(ureg, tmp_z, _XXXX(vs->aPsz), _XXXX(cPsz1)); 593 ureg_MIN(ureg, tmp_z, _Z(tmp), _YYYY(cPsz1)); 594 } else { 595 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26); 596 ureg_MOV(ureg, tmp_z, _ZZZZ(cPsz1)); 597 } 598 599 if (key->pointscale) { 600 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26); 601 struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27); 602 603 ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx); 604 ureg_RSQ(ureg, tmp_y, _X(tmp)); 605 ureg_MUL(ureg, tmp_y, _Y(tmp), _X(tmp)); 606 ureg_CMP(ureg, tmp_y, ureg_negate(_Y(tmp)), _Y(tmp), ureg_imm1f(ureg, 0.0f)); 607 ureg_MAD(ureg, tmp_x, _Y(tmp), _YYYY(cPsz2), _XXXX(cPsz2)); 608 ureg_MAD(ureg, tmp_x, _Y(tmp), _X(tmp), _WWWW(cPsz1)); 609 ureg_RSQ(ureg, tmp_x, _X(tmp)); 610 ureg_MUL(ureg, tmp_x, _X(tmp), _Z(tmp)); 611 ureg_MUL(ureg, tmp_x, _X(tmp), _WWWW(_CONST(100))); 612 ureg_MAX(ureg, tmp_x, _X(tmp), _XXXX(cPsz1)); 613 ureg_MIN(ureg, tmp_z, _X(tmp), _YYYY(cPsz1)); 614 } 615 616 ureg_MOV(ureg, oPsz, _Z(tmp)); 617 ureg_release_temporary(ureg, tmp); 618 } 619 620 for (i = 0; i < 8; ++i) { 621 struct ureg_dst tmp, tmp_x, tmp2; 622 struct ureg_dst oTex, input_coord, transformed, t, aVtx_normed; 623 unsigned c, writemask; 624 const unsigned tci = (key->tc_gen >> (i * 3)) & 0x7; 625 const unsigned idx = (key->tc_idx >> (i * 3)) & 0x7; 626 unsigned dim_input = 1 + ((key->tc_dim_input >> (i * 2)) & 0x3); 627 const unsigned dim_output = (key->tc_dim_output >> (i * 3)) & 0x7; 628 629 /* No texture output of index s */ 630 if (tci == NINED3DTSS_TCI_DISABLE) 631 continue; 632 oTex = ureg_DECL_output(ureg, texcoord_sn, i); 633 tmp = ureg_DECL_temporary(ureg); 634 tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 635 input_coord = ureg_DECL_temporary(ureg); 636 transformed = ureg_DECL_temporary(ureg); 637 638 /* Get the coordinate */ 639 switch (tci) { 640 case NINED3DTSS_TCI_PASSTHRU: 641 /* NINED3DTSS_TCI_PASSTHRU => Use texcoord coming from index idx * 642 * Else the idx is used only to determine wrapping mode. */ 643 vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx)); 644 ureg_MOV(ureg, input_coord, vs->aTex[idx]); 645 break; 646 case NINED3DTSS_TCI_CAMERASPACENORMAL: 647 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aNrm); 648 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); 649 dim_input = 4; 650 break; 651 case NINED3DTSS_TCI_CAMERASPACEPOSITION: 652 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aVtx); 653 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); 654 dim_input = 4; 655 break; 656 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR: 657 tmp.WriteMask = TGSI_WRITEMASK_XYZ; 658 aVtx_normed = ureg_DECL_temporary(ureg); 659 ureg_normalize3(ureg, aVtx_normed, vs->aVtx); 660 ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm); 661 ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp)); 662 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp)); 663 ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp))); 664 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); 665 ureg_release_temporary(ureg, aVtx_normed); 666 dim_input = 4; 667 tmp.WriteMask = TGSI_WRITEMASK_XYZW; 668 break; 669 case NINED3DTSS_TCI_SPHEREMAP: 670 /* Implement the formula of GL_SPHERE_MAP */ 671 tmp.WriteMask = TGSI_WRITEMASK_XYZ; 672 aVtx_normed = ureg_DECL_temporary(ureg); 673 tmp2 = ureg_DECL_temporary(ureg); 674 ureg_normalize3(ureg, aVtx_normed, vs->aVtx); 675 ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm); 676 ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp)); 677 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp)); 678 ureg_ADD(ureg, tmp, ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp))); 679 /* now tmp = normed(Vtx) - 2 dot3(normed(Vtx), Nrm) Nrm */ 680 ureg_MOV(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_XYZ), ureg_src(tmp)); 681 ureg_MUL(ureg, tmp2, ureg_src(tmp2), ureg_src(tmp2)); 682 ureg_DP3(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_src(tmp2)); 683 ureg_RSQ(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2)); 684 ureg_MUL(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_imm1f(ureg, 0.5f)); 685 /* tmp2 = 0.5 / sqrt(tmp.x^2 + tmp.y^2 + (tmp.z+1)^2) 686 * TODO: z coordinates are a bit different gl vs d3d, should the formula be adapted ? */ 687 ureg_MUL(ureg, tmp, ureg_src(tmp), _X(tmp2)); 688 ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 0.5f)); 689 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_ZW), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f)); 690 ureg_release_temporary(ureg, aVtx_normed); 691 ureg_release_temporary(ureg, tmp2); 692 dim_input = 4; 693 tmp.WriteMask = TGSI_WRITEMASK_XYZW; 694 break; 695 default: 696 assert(0); 697 break; 698 } 699 700 /* Apply the transformation */ 701 /* dim_output == 0 => do not transform the components. 702 * XYZRHW also disables transformation */ 703 if (!dim_output || key->position_t) { 704 ureg_release_temporary(ureg, transformed); 705 transformed = input_coord; 706 writemask = TGSI_WRITEMASK_XYZW; 707 } else { 708 for (c = 0; c < dim_output; c++) { 709 t = ureg_writemask(transformed, 1 << c); 710 switch (dim_input) { 711 /* dim_input = 1 2 3: -> we add trailing 1 to input*/ 712 case 1: ureg_MAD(ureg, t, _X(input_coord), _XXXX(_CONST(128 + i * 4 + c)), _YYYY(_CONST(128 + i * 4 + c))); 713 break; 714 case 2: ureg_DP2(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); 715 ureg_ADD(ureg, t, ureg_src(transformed), _ZZZZ(_CONST(128 + i * 4 + c))); 716 break; 717 case 3: ureg_DP3(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); 718 ureg_ADD(ureg, t, ureg_src(transformed), _WWWW(_CONST(128 + i * 4 + c))); 719 break; 720 case 4: ureg_DP4(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); break; 721 default: 722 assert(0); 723 } 724 } 725 writemask = (1 << dim_output) - 1; 726 ureg_release_temporary(ureg, input_coord); 727 } 728 729 ureg_MOV(ureg, ureg_writemask(oTex, writemask), ureg_src(transformed)); 730 ureg_release_temporary(ureg, transformed); 731 ureg_release_temporary(ureg, tmp); 732 } 733 734 /* === Lighting: 735 * 736 * DIRECTIONAL: Light at infinite distance, parallel rays, no attenuation. 737 * POINT: Finite distance to scene, divergent rays, isotropic, attenuation. 738 * SPOT: Finite distance, divergent rays, angular dependence, attenuation. 739 * 740 * vec3 normal = normalize(in.Normal * NormalMatrix); 741 * vec3 hitDir = light.direction; 742 * float atten = 1.0; 743 * 744 * if (light.type != DIRECTIONAL) 745 * { 746 * vec3 hitVec = light.position - eyeVertex; 747 * float d = length(hitVec); 748 * hitDir = hitVec / d; 749 * atten = 1 / ((light.atten2 * d + light.atten1) * d + light.atten0); 750 * } 751 * 752 * if (light.type == SPOTLIGHT) 753 * { 754 * float rho = dp3(-hitVec, light.direction); 755 * if (rho < cos(light.phi / 2)) 756 * atten = 0; 757 * if (rho < cos(light.theta / 2)) 758 * atten *= pow(some_func(rho), light.falloff); 759 * } 760 * 761 * float nDotHit = dp3_sat(normal, hitVec); 762 * float powFact = 0.0; 763 * 764 * if (nDotHit > 0.0) 765 * { 766 * vec3 midVec = normalize(hitDir + eye); 767 * float nDotMid = dp3_sat(normal, midVec); 768 * pFact = pow(nDotMid, material.power); 769 * } 770 * 771 * ambient += light.ambient * atten; 772 * diffuse += light.diffuse * atten * nDotHit; 773 * specular += light.specular * atten * powFact; 774 */ 775 if (key->lighting) { 776 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 777 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 778 struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y); 779 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z); 780 struct ureg_dst rAtt = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W); 781 struct ureg_dst rHit = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 782 struct ureg_dst rMid = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 783 784 struct ureg_dst rCtr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W); 785 786 struct ureg_dst AL = ureg_writemask(AR, TGSI_WRITEMASK_X); 787 788 /* Light.*.Alpha is not used. */ 789 struct ureg_dst rD = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 790 struct ureg_dst rA = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 791 struct ureg_dst rS = ureg_DECL_temporary(ureg); 792 793 struct ureg_src mtlP = _XXXX(MATERIAL_CONST(4)); 794 795 struct ureg_src cLKind = _XXXX(LIGHT_CONST(0)); 796 struct ureg_src cLAtt0 = _YYYY(LIGHT_CONST(0)); 797 struct ureg_src cLAtt1 = _ZZZZ(LIGHT_CONST(0)); 798 struct ureg_src cLAtt2 = _WWWW(LIGHT_CONST(0)); 799 struct ureg_src cLColD = _XYZW(LIGHT_CONST(1)); 800 struct ureg_src cLColS = _XYZW(LIGHT_CONST(2)); 801 struct ureg_src cLColA = _XYZW(LIGHT_CONST(3)); 802 struct ureg_src cLPos = _XYZW(LIGHT_CONST(4)); 803 struct ureg_src cLRng = _WWWW(LIGHT_CONST(4)); 804 struct ureg_src cLDir = _XYZW(LIGHT_CONST(5)); 805 struct ureg_src cLFOff = _WWWW(LIGHT_CONST(5)); 806 struct ureg_src cLTht = _XXXX(LIGHT_CONST(6)); 807 struct ureg_src cLPhi = _YYYY(LIGHT_CONST(6)); 808 struct ureg_src cLSDiv = _ZZZZ(LIGHT_CONST(6)); 809 struct ureg_src cLLast = _WWWW(LIGHT_CONST(7)); 810 811 const unsigned loop_label = l++; 812 813 /* Declare all light constants to allow indirect adressing */ 814 for (i = 32; i < 96; i++) 815 ureg_DECL_constant(ureg, i); 816 817 ureg_MOV(ureg, rCtr, ureg_imm1f(ureg, 32.0f)); /* &lightconst(0) */ 818 ureg_MOV(ureg, rD, ureg_imm1f(ureg, 0.0f)); 819 ureg_MOV(ureg, rA, ureg_imm1f(ureg, 0.0f)); 820 ureg_MOV(ureg, rS, ureg_imm1f(ureg, 0.0f)); 821 822 /* loop management */ 823 ureg_BGNLOOP(ureg, &label[loop_label]); 824 ureg_ARL(ureg, AL, _W(rCtr)); 825 826 /* if (not DIRECTIONAL light): */ 827 ureg_SNE(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_DIRECTIONAL)); 828 ureg_MOV(ureg, rHit, ureg_negate(cLDir)); 829 ureg_MOV(ureg, rAtt, ureg_imm1f(ureg, 1.0f)); 830 ureg_IF(ureg, _X(tmp), &label[l++]); 831 { 832 /* hitDir = light.position - eyeVtx 833 * d = length(hitDir) 834 */ 835 ureg_ADD(ureg, rHit, cLPos, ureg_negate(vs->aVtx)); 836 ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit)); 837 ureg_RSQ(ureg, tmp_y, _X(tmp)); 838 ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */ 839 840 /* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */ 841 ureg_MAD(ureg, rAtt, _X(tmp), cLAtt2, cLAtt1); 842 ureg_MAD(ureg, rAtt, _X(tmp), _W(rAtt), cLAtt0); 843 ureg_RCP(ureg, rAtt, _W(rAtt)); 844 /* cut-off if distance exceeds Light.Range */ 845 ureg_SLT(ureg, tmp_x, _X(tmp), cLRng); 846 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp)); 847 } 848 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg)); 849 ureg_ENDIF(ureg); 850 851 /* normalize hitDir */ 852 ureg_normalize3(ureg, rHit, ureg_src(rHit)); 853 854 /* if (SPOT light) */ 855 ureg_SEQ(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_SPOT)); 856 ureg_IF(ureg, _X(tmp), &label[l++]); 857 { 858 /* rho = dp3(-hitDir, light.spotDir) 859 * 860 * if (rho > light.ctht2) NOTE: 0 <= phi <= pi, 0 <= theta <= phi 861 * spotAtt = 1 862 * else 863 * if (rho <= light.cphi2) 864 * spotAtt = 0 865 * else 866 * spotAtt = (rho - light.cphi2) / (light.ctht2 - light.cphi2) ^ light.falloff 867 */ 868 ureg_DP3(ureg, tmp_y, ureg_negate(ureg_src(rHit)), cLDir); /* rho */ 869 ureg_ADD(ureg, tmp_x, _Y(tmp), ureg_negate(cLPhi)); 870 ureg_MUL(ureg, tmp_x, _X(tmp), cLSDiv); 871 ureg_POW(ureg, tmp_x, _X(tmp), cLFOff); /* spotAtten */ 872 ureg_SGE(ureg, tmp_z, _Y(tmp), cLTht); /* if inside theta && phi */ 873 ureg_SGE(ureg, tmp_y, _Y(tmp), cLPhi); /* if inside phi */ 874 ureg_MAD(ureg, ureg_saturate(tmp_x), _X(tmp), _Y(tmp), _Z(tmp)); 875 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp)); 876 } 877 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg)); 878 ureg_ENDIF(ureg); 879 880 /* directional factors, let's not use LIT because of clarity */ 881 882 if (has_aNrm) { 883 if (key->localviewer) { 884 ureg_normalize3(ureg, rMid, vs->aVtx); 885 ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_negate(ureg_src(rMid))); 886 } else { 887 ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, -1.0f)); 888 } 889 ureg_normalize3(ureg, rMid, ureg_src(rMid)); 890 ureg_DP3(ureg, ureg_saturate(tmp_x), vs->aNrm, ureg_src(rHit)); 891 ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid)); 892 ureg_MUL(ureg, tmp_z, _X(tmp), _Y(tmp)); 893 /* Tests show that specular is computed only if (dp3(normal,hitDir) > 0). 894 * For front facing, it is more restrictive than test (dp3(normal,mid) > 0). 895 * No tests were made for backfacing, so add the two conditions */ 896 ureg_IF(ureg, _Z(tmp), &label[l++]); 897 { 898 ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid)); 899 ureg_POW(ureg, tmp_y, _Y(tmp), mtlP); 900 ureg_MUL(ureg, tmp_y, _W(rAtt), _Y(tmp)); /* power factor * att */ 901 ureg_MAD(ureg, rS, cLColS, _Y(tmp), ureg_src(rS)); /* accumulate specular */ 902 } 903 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg)); 904 ureg_ENDIF(ureg); 905 906 ureg_MUL(ureg, tmp_x, _W(rAtt), _X(tmp)); /* dp3(normal,hitDir) * att */ 907 ureg_MAD(ureg, rD, cLColD, _X(tmp), ureg_src(rD)); /* accumulate diffuse */ 908 } 909 910 ureg_MAD(ureg, rA, cLColA, _W(rAtt), ureg_src(rA)); /* accumulate ambient */ 911 912 /* break if this was the last light */ 913 ureg_IF(ureg, cLLast, &label[l++]); 914 ureg_BRK(ureg); 915 ureg_ENDIF(ureg); 916 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg)); 917 918 ureg_ADD(ureg, rCtr, _W(rCtr), ureg_imm1f(ureg, 8.0f)); 919 ureg_fixup_label(ureg, label[loop_label], ureg_get_instruction_number(ureg)); 920 ureg_ENDLOOP(ureg, &label[loop_label]); 921 922 /* Apply to material: 923 * 924 * oCol[0] = (material.emissive + material.ambient * rs.ambient) + 925 * material.ambient * ambient + 926 * material.diffuse * diffuse + 927 * oCol[1] = material.specular * specular; 928 */ 929 if (key->mtl_emissive == 0 && key->mtl_ambient == 0) 930 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), vs->mtlA, _CONST(19)); 931 else { 932 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), _CONST(25)); 933 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE); 934 } 935 936 ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), ureg_src(rD), vs->mtlD, ureg_src(tmp)); 937 ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD); 938 ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS); 939 ureg_release_temporary(ureg, rAtt); 940 ureg_release_temporary(ureg, rHit); 941 ureg_release_temporary(ureg, rMid); 942 ureg_release_temporary(ureg, rCtr); 943 ureg_release_temporary(ureg, rD); 944 ureg_release_temporary(ureg, rA); 945 ureg_release_temporary(ureg, rS); 946 ureg_release_temporary(ureg, rAtt); 947 ureg_release_temporary(ureg, tmp); 948 } else 949 /* COLOR */ 950 if (key->darkness) { 951 if (key->mtl_emissive == 0 && key->mtl_ambient == 0) 952 ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), _CONST(19)); 953 else 954 ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE); 955 ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD); 956 ureg_MOV(ureg, oCol[1], ureg_imm1f(ureg, 0.0f)); 957 } else { 958 ureg_MOV(ureg, oCol[0], vs->aCol[0]); 959 ureg_MOV(ureg, oCol[1], vs->aCol[1]); 960 } 961 962 /* === Process fog. 963 * 964 * exp(x) = ex2(log2(e) * x) 965 */ 966 if (key->fog_mode) { 967 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 968 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 969 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z); 970 if (key->fog_range) { 971 ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx); 972 ureg_RSQ(ureg, tmp_z, _X(tmp)); 973 ureg_MUL(ureg, tmp_z, _Z(tmp), _X(tmp)); 974 } else { 975 ureg_MOV(ureg, tmp_z, ureg_abs(_ZZZZ(vs->aVtx))); 976 } 977 978 if (key->fog_mode == D3DFOG_EXP) { 979 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28))); 980 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f)); 981 ureg_EX2(ureg, tmp_x, _X(tmp)); 982 } else 983 if (key->fog_mode == D3DFOG_EXP2) { 984 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28))); 985 ureg_MUL(ureg, tmp_x, _X(tmp), _X(tmp)); 986 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f)); 987 ureg_EX2(ureg, tmp_x, _X(tmp)); 988 } else 989 if (key->fog_mode == D3DFOG_LINEAR) { 990 ureg_ADD(ureg, tmp_x, _XXXX(_CONST(28)), ureg_negate(_Z(tmp))); 991 ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28))); 992 } 993 ureg_MOV(ureg, oFog, _X(tmp)); 994 ureg_release_temporary(ureg, tmp); 995 } else if (key->fog && !(key->passthrough & (1 << NINE_DECLUSAGE_FOG))) { 996 ureg_MOV(ureg, oFog, ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W)); 997 } 998 999 if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) { 1000 struct ureg_src input; 1001 struct ureg_dst output; 1002 input = vs->aWgt; 1003 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 18); 1004 ureg_MOV(ureg, output, input); 1005 } 1006 if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) { 1007 struct ureg_src input; 1008 struct ureg_dst output; 1009 input = vs->aInd; 1010 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 19); 1011 ureg_MOV(ureg, output, input); 1012 } 1013 if (key->passthrough & (1 << NINE_DECLUSAGE_NORMAL)) { 1014 struct ureg_src input; 1015 struct ureg_dst output; 1016 input = vs->aNrm; 1017 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 20); 1018 ureg_MOV(ureg, output, input); 1019 } 1020 if (key->passthrough & (1 << NINE_DECLUSAGE_TANGENT)) { 1021 struct ureg_src input; 1022 struct ureg_dst output; 1023 input = build_vs_add_input(vs, NINE_DECLUSAGE_TANGENT); 1024 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 21); 1025 ureg_MOV(ureg, output, input); 1026 } 1027 if (key->passthrough & (1 << NINE_DECLUSAGE_BINORMAL)) { 1028 struct ureg_src input; 1029 struct ureg_dst output; 1030 input = build_vs_add_input(vs, NINE_DECLUSAGE_BINORMAL); 1031 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 22); 1032 ureg_MOV(ureg, output, input); 1033 } 1034 if (key->passthrough & (1 << NINE_DECLUSAGE_FOG)) { 1035 struct ureg_src input; 1036 struct ureg_dst output; 1037 input = build_vs_add_input(vs, NINE_DECLUSAGE_FOG); 1038 input = ureg_scalar(input, TGSI_SWIZZLE_X); 1039 output = oFog; 1040 ureg_MOV(ureg, output, input); 1041 } 1042 if (key->passthrough & (1 << NINE_DECLUSAGE_DEPTH)) { 1043 (void) 0; /* TODO: replace z of position output ? */ 1044 } 1045 1046 /* ucp for ff applies on world coordinates. 1047 * aVtx is in worldview coordinates. */ 1048 if (key->ucp) { 1049 struct ureg_dst clipVect = ureg_DECL_output(ureg, TGSI_SEMANTIC_CLIPVERTEX, 0); 1050 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 1051 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(12)); 1052 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(13), ureg_src(tmp)); 1053 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(14), ureg_src(tmp)); 1054 ureg_ADD(ureg, clipVect, _CONST(15), ureg_src(tmp)); 1055 ureg_release_temporary(ureg, tmp); 1056 } 1057 1058 if (key->position_t && device->driver_caps.window_space_position_support) 1059 ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE); 1060 1061 ureg_END(ureg); 1062 nine_ureg_tgsi_dump(ureg, FALSE); 1063 return ureg_create_shader_and_destroy(ureg, device->context.pipe); 1064 } 1065 1066 /* PS FF constants layout: 1067 * 1068 * CONST[ 0.. 7] stage[i].D3DTSS_CONSTANT 1069 * CONST[ 8..15].x___ stage[i].D3DTSS_BUMPENVMAT00 1070 * CONST[ 8..15]._y__ stage[i].D3DTSS_BUMPENVMAT01 1071 * CONST[ 8..15].__z_ stage[i].D3DTSS_BUMPENVMAT10 1072 * CONST[ 8..15].___w stage[i].D3DTSS_BUMPENVMAT11 1073 * CONST[16..19].x_z_ stage[i].D3DTSS_BUMPENVLSCALE 1074 * CONST[17..19]._y_w stage[i].D3DTSS_BUMPENVLOFFSET 1075 * 1076 * CONST[20] D3DRS_TEXTUREFACTOR 1077 * CONST[21] D3DRS_FOGCOLOR 1078 * CONST[22].x___ RS.FogEnd 1079 * CONST[22]._y__ 1.0f / (RS.FogEnd - RS.FogStart) 1080 * CONST[22].__z_ RS.FogDensity 1081 */ 1082 struct ps_build_ctx 1083 { 1084 struct ureg_program *ureg; 1085 1086 struct ureg_src vC[2]; /* DIFFUSE, SPECULAR */ 1087 struct ureg_src vT[8]; /* TEXCOORD[i] */ 1088 struct ureg_dst rCur; /* D3DTA_CURRENT */ 1089 struct ureg_dst rMod; 1090 struct ureg_src rCurSrc; 1091 struct ureg_dst rTmp; /* D3DTA_TEMP */ 1092 struct ureg_src rTmpSrc; 1093 struct ureg_dst rTex; 1094 struct ureg_src rTexSrc; 1095 struct ureg_src cBEM[8]; 1096 struct ureg_src s[8]; 1097 1098 struct { 1099 unsigned index; 1100 unsigned index_pre_mod; 1101 } stage; 1102 }; 1103 1104 static struct ureg_src 1105 ps_get_ts_arg(struct ps_build_ctx *ps, unsigned ta) 1106 { 1107 struct ureg_src reg; 1108 1109 switch (ta & D3DTA_SELECTMASK) { 1110 case D3DTA_CONSTANT: 1111 reg = ureg_DECL_constant(ps->ureg, ps->stage.index); 1112 break; 1113 case D3DTA_CURRENT: 1114 reg = (ps->stage.index == ps->stage.index_pre_mod) ? ureg_src(ps->rMod) : ps->rCurSrc; 1115 break; 1116 case D3DTA_DIFFUSE: 1117 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR); 1118 break; 1119 case D3DTA_SPECULAR: 1120 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR); 1121 break; 1122 case D3DTA_TEMP: 1123 reg = ps->rTmpSrc; 1124 break; 1125 case D3DTA_TEXTURE: 1126 reg = ps->rTexSrc; 1127 break; 1128 case D3DTA_TFACTOR: 1129 reg = ureg_DECL_constant(ps->ureg, 20); 1130 break; 1131 default: 1132 assert(0); 1133 reg = ureg_src_undef(); 1134 break; 1135 } 1136 if (ta & D3DTA_COMPLEMENT) { 1137 struct ureg_dst dst = ureg_DECL_temporary(ps->ureg); 1138 ureg_ADD(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), ureg_negate(reg)); 1139 reg = ureg_src(dst); 1140 } 1141 if (ta & D3DTA_ALPHAREPLICATE) 1142 reg = _WWWW(reg); 1143 return reg; 1144 } 1145 1146 static struct ureg_dst 1147 ps_get_ts_dst(struct ps_build_ctx *ps, unsigned ta) 1148 { 1149 assert(!(ta & (D3DTA_COMPLEMENT | D3DTA_ALPHAREPLICATE))); 1150 1151 switch (ta & D3DTA_SELECTMASK) { 1152 case D3DTA_CURRENT: 1153 return ps->rCur; 1154 case D3DTA_TEMP: 1155 return ps->rTmp; 1156 default: 1157 assert(0); 1158 return ureg_dst_undef(); 1159 } 1160 } 1161 1162 static uint8_t ps_d3dtop_args_mask(D3DTEXTUREOP top) 1163 { 1164 switch (top) { 1165 case D3DTOP_DISABLE: 1166 return 0x0; 1167 case D3DTOP_SELECTARG1: 1168 case D3DTOP_PREMODULATE: 1169 return 0x2; 1170 case D3DTOP_SELECTARG2: 1171 return 0x4; 1172 case D3DTOP_MULTIPLYADD: 1173 case D3DTOP_LERP: 1174 return 0x7; 1175 default: 1176 return 0x6; 1177 } 1178 } 1179 1180 static inline boolean 1181 is_MOV_no_op(struct ureg_dst dst, struct ureg_src src) 1182 { 1183 return !dst.WriteMask || 1184 (dst.File == src.File && 1185 dst.Index == src.Index && 1186 !dst.Indirect && 1187 !dst.Saturate && 1188 !src.Indirect && 1189 !src.Negate && 1190 !src.Absolute && 1191 (!(dst.WriteMask & TGSI_WRITEMASK_X) || (src.SwizzleX == TGSI_SWIZZLE_X)) && 1192 (!(dst.WriteMask & TGSI_WRITEMASK_Y) || (src.SwizzleY == TGSI_SWIZZLE_Y)) && 1193 (!(dst.WriteMask & TGSI_WRITEMASK_Z) || (src.SwizzleZ == TGSI_SWIZZLE_Z)) && 1194 (!(dst.WriteMask & TGSI_WRITEMASK_W) || (src.SwizzleW == TGSI_SWIZZLE_W))); 1195 1196 } 1197 1198 static void 1199 ps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct ureg_src *arg) 1200 { 1201 struct ureg_program *ureg = ps->ureg; 1202 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 1203 struct ureg_dst tmp2 = ureg_DECL_temporary(ureg); 1204 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 1205 1206 tmp.WriteMask = dst.WriteMask; 1207 1208 if (top != D3DTOP_SELECTARG1 && top != D3DTOP_SELECTARG2 && 1209 top != D3DTOP_MODULATE && top != D3DTOP_PREMODULATE && 1210 top != D3DTOP_BLENDDIFFUSEALPHA && top != D3DTOP_BLENDTEXTUREALPHA && 1211 top != D3DTOP_BLENDFACTORALPHA && top != D3DTOP_BLENDCURRENTALPHA && 1212 top != D3DTOP_BUMPENVMAP && top != D3DTOP_BUMPENVMAPLUMINANCE && 1213 top != D3DTOP_LERP) 1214 dst = ureg_saturate(dst); 1215 1216 switch (top) { 1217 case D3DTOP_SELECTARG1: 1218 if (!is_MOV_no_op(dst, arg[1])) 1219 ureg_MOV(ureg, dst, arg[1]); 1220 break; 1221 case D3DTOP_SELECTARG2: 1222 if (!is_MOV_no_op(dst, arg[2])) 1223 ureg_MOV(ureg, dst, arg[2]); 1224 break; 1225 case D3DTOP_MODULATE: 1226 ureg_MUL(ureg, dst, arg[1], arg[2]); 1227 break; 1228 case D3DTOP_MODULATE2X: 1229 ureg_MUL(ureg, tmp, arg[1], arg[2]); 1230 ureg_ADD(ureg, dst, ureg_src(tmp), ureg_src(tmp)); 1231 break; 1232 case D3DTOP_MODULATE4X: 1233 ureg_MUL(ureg, tmp, arg[1], arg[2]); 1234 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 4.0f)); 1235 break; 1236 case D3DTOP_ADD: 1237 ureg_ADD(ureg, dst, arg[1], arg[2]); 1238 break; 1239 case D3DTOP_ADDSIGNED: 1240 ureg_ADD(ureg, tmp, arg[1], arg[2]); 1241 ureg_ADD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, -0.5f)); 1242 break; 1243 case D3DTOP_ADDSIGNED2X: 1244 ureg_ADD(ureg, tmp, arg[1], arg[2]); 1245 ureg_MAD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f)); 1246 break; 1247 case D3DTOP_SUBTRACT: 1248 ureg_ADD(ureg, dst, arg[1], ureg_negate(arg[2])); 1249 break; 1250 case D3DTOP_ADDSMOOTH: 1251 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1])); 1252 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], arg[1]); 1253 break; 1254 case D3DTOP_BLENDDIFFUSEALPHA: 1255 ureg_LRP(ureg, dst, _WWWW(ps->vC[0]), arg[1], arg[2]); 1256 break; 1257 case D3DTOP_BLENDTEXTUREALPHA: 1258 /* XXX: alpha taken from previous stage, texture or result ? */ 1259 ureg_LRP(ureg, dst, _W(ps->rTex), arg[1], arg[2]); 1260 break; 1261 case D3DTOP_BLENDFACTORALPHA: 1262 ureg_LRP(ureg, dst, _WWWW(_CONST(20)), arg[1], arg[2]); 1263 break; 1264 case D3DTOP_BLENDTEXTUREALPHAPM: 1265 ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_W(ps->rTex))); 1266 ureg_MAD(ureg, dst, arg[2], _X(tmp), arg[1]); 1267 break; 1268 case D3DTOP_BLENDCURRENTALPHA: 1269 ureg_LRP(ureg, dst, _WWWW(ps->rCurSrc), arg[1], arg[2]); 1270 break; 1271 case D3DTOP_PREMODULATE: 1272 ureg_MOV(ureg, dst, arg[1]); 1273 ps->stage.index_pre_mod = ps->stage.index + 1; 1274 break; 1275 case D3DTOP_MODULATEALPHA_ADDCOLOR: 1276 ureg_MAD(ureg, dst, _WWWW(arg[1]), arg[2], arg[1]); 1277 break; 1278 case D3DTOP_MODULATECOLOR_ADDALPHA: 1279 ureg_MAD(ureg, dst, arg[1], arg[2], _WWWW(arg[1])); 1280 break; 1281 case D3DTOP_MODULATEINVALPHA_ADDCOLOR: 1282 ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_WWWW(arg[1]))); 1283 ureg_MAD(ureg, dst, _X(tmp), arg[2], arg[1]); 1284 break; 1285 case D3DTOP_MODULATEINVCOLOR_ADDALPHA: 1286 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1])); 1287 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], _WWWW(arg[1])); 1288 break; 1289 case D3DTOP_BUMPENVMAP: 1290 break; 1291 case D3DTOP_BUMPENVMAPLUMINANCE: 1292 break; 1293 case D3DTOP_DOTPRODUCT3: 1294 ureg_ADD(ureg, tmp, arg[1], ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5)); 1295 ureg_ADD(ureg, tmp2, arg[2] , ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5)); 1296 ureg_DP3(ureg, tmp, ureg_src(tmp), ureg_src(tmp2)); 1297 ureg_MUL(ureg, ureg_saturate(dst), ureg_src(tmp), ureg_imm4f(ureg,4.0,4.0,4.0,4.0)); 1298 break; 1299 case D3DTOP_MULTIPLYADD: 1300 ureg_MAD(ureg, dst, arg[1], arg[2], arg[0]); 1301 break; 1302 case D3DTOP_LERP: 1303 ureg_LRP(ureg, dst, arg[0], arg[1], arg[2]); 1304 break; 1305 case D3DTOP_DISABLE: 1306 /* no-op ? */ 1307 break; 1308 default: 1309 assert(!"invalid D3DTOP"); 1310 break; 1311 } 1312 ureg_release_temporary(ureg, tmp); 1313 ureg_release_temporary(ureg, tmp2); 1314 } 1315 1316 static void * 1317 nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key) 1318 { 1319 struct ps_build_ctx ps; 1320 struct ureg_program *ureg = ureg_create(PIPE_SHADER_FRAGMENT); 1321 struct ureg_dst oCol; 1322 unsigned s; 1323 const unsigned texcoord_sn = get_texcoord_sn(device->screen); 1324 1325 memset(&ps, 0, sizeof(ps)); 1326 ps.ureg = ureg; 1327 ps.stage.index_pre_mod = -1; 1328 1329 ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR); 1330 1331 ps.rCur = ureg_DECL_temporary(ureg); 1332 ps.rTmp = ureg_DECL_temporary(ureg); 1333 ps.rTex = ureg_DECL_temporary(ureg); 1334 ps.rCurSrc = ureg_src(ps.rCur); 1335 ps.rTmpSrc = ureg_src(ps.rTmp); 1336 ps.rTexSrc = ureg_src(ps.rTex); 1337 1338 /* Initial values */ 1339 ureg_MOV(ureg, ps.rCur, ps.vC[0]); 1340 ureg_MOV(ureg, ps.rTmp, ureg_imm1f(ureg, 0.0f)); 1341 ureg_MOV(ureg, ps.rTex, ureg_imm1f(ureg, 0.0f)); 1342 1343 for (s = 0; s < 8; ++s) { 1344 ps.s[s] = ureg_src_undef(); 1345 1346 if (key->ts[s].colorop != D3DTOP_DISABLE) { 1347 if (key->ts[s].colorarg0 == D3DTA_SPECULAR || 1348 key->ts[s].colorarg1 == D3DTA_SPECULAR || 1349 key->ts[s].colorarg2 == D3DTA_SPECULAR) 1350 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR); 1351 1352 if (key->ts[s].colorarg0 == D3DTA_TEXTURE || 1353 key->ts[s].colorarg1 == D3DTA_TEXTURE || 1354 key->ts[s].colorarg2 == D3DTA_TEXTURE) { 1355 ps.s[s] = ureg_DECL_sampler(ureg, s); 1356 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE); 1357 } 1358 if (s && (key->ts[s - 1].colorop == D3DTOP_PREMODULATE || 1359 key->ts[s - 1].alphaop == D3DTOP_PREMODULATE)) 1360 ps.s[s] = ureg_DECL_sampler(ureg, s); 1361 } 1362 1363 if (key->ts[s].alphaop != D3DTOP_DISABLE) { 1364 if (key->ts[s].alphaarg0 == D3DTA_SPECULAR || 1365 key->ts[s].alphaarg1 == D3DTA_SPECULAR || 1366 key->ts[s].alphaarg2 == D3DTA_SPECULAR) 1367 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR); 1368 1369 if (key->ts[s].alphaarg0 == D3DTA_TEXTURE || 1370 key->ts[s].alphaarg1 == D3DTA_TEXTURE || 1371 key->ts[s].alphaarg2 == D3DTA_TEXTURE) { 1372 ps.s[s] = ureg_DECL_sampler(ureg, s); 1373 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE); 1374 } 1375 } 1376 } 1377 if (key->specular) 1378 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR); 1379 1380 oCol = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); 1381 1382 /* Run stages. 1383 */ 1384 for (s = 0; s < 8; ++s) { 1385 unsigned colorarg[3]; 1386 unsigned alphaarg[3]; 1387 const uint8_t used_c = ps_d3dtop_args_mask(key->ts[s].colorop); 1388 const uint8_t used_a = ps_d3dtop_args_mask(key->ts[s].alphaop); 1389 struct ureg_dst dst; 1390 struct ureg_src arg[3]; 1391 1392 if (key->ts[s].colorop == D3DTOP_DISABLE) { 1393 assert (key->ts[s].alphaop == D3DTOP_DISABLE); 1394 continue; 1395 } 1396 ps.stage.index = s; 1397 1398 DBG("STAGE[%u]: colorop=%s alphaop=%s\n", s, 1399 nine_D3DTOP_to_str(key->ts[s].colorop), 1400 nine_D3DTOP_to_str(key->ts[s].alphaop)); 1401 1402 if (!ureg_src_is_undef(ps.s[s])) { 1403 unsigned target; 1404 struct ureg_src texture_coord = ps.vT[s]; 1405 struct ureg_dst delta; 1406 switch (key->ts[s].textarget) { 1407 case 0: target = TGSI_TEXTURE_1D; break; 1408 case 1: target = TGSI_TEXTURE_2D; break; 1409 case 2: target = TGSI_TEXTURE_3D; break; 1410 case 3: target = TGSI_TEXTURE_CUBE; break; 1411 /* this is a 2 bit bitfield, do I really need a default case ? */ 1412 } 1413 1414 /* Modify coordinates */ 1415 if (s >= 1 && 1416 (key->ts[s-1].colorop == D3DTOP_BUMPENVMAP || 1417 key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)) { 1418 delta = ureg_DECL_temporary(ureg); 1419 /* Du' = D3DTSS_BUMPENVMAT00(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT10(stage s-1)*t(s-1)G */ 1420 ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _X(ps.rTex), _XXXX(_CONST(8 + s - 1))); 1421 ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _Y(ps.rTex), _ZZZZ(_CONST(8 + s - 1)), ureg_src(delta)); 1422 /* Dv' = D3DTSS_BUMPENVMAT01(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT11(stage s-1)*t(s-1)G */ 1423 ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _X(ps.rTex), _YYYY(_CONST(8 + s - 1))); 1424 ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _Y(ps.rTex), _WWWW(_CONST(8 + s - 1)), ureg_src(delta)); 1425 texture_coord = ureg_src(ureg_DECL_temporary(ureg)); 1426 ureg_MOV(ureg, ureg_writemask(ureg_dst(texture_coord), ureg_dst(ps.vT[s]).WriteMask), ps.vT[s]); 1427 ureg_ADD(ureg, ureg_writemask(ureg_dst(texture_coord), TGSI_WRITEMASK_XY), texture_coord, ureg_src(delta)); 1428 /* Prepare luminance multiplier 1429 * t(s)RGBA = t(s)RGBA * clamp[(t(s-1)B * D3DTSS_BUMPENVLSCALE(stage s-1)) + D3DTSS_BUMPENVLOFFSET(stage s-1)] */ 1430 if (key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE) { 1431 struct ureg_src bumpenvlscale = ((s-1) & 1) ? _ZZZZ(_CONST(16 + (s-1) / 2)) : _XXXX(_CONST(16 + (s-1) / 2)); 1432 struct ureg_src bumpenvloffset = ((s-1) & 1) ? _WWWW(_CONST(16 + (s-1) / 2)) : _YYYY(_CONST(16 + (s-1) / 2)); 1433 1434 ureg_MAD(ureg, ureg_saturate(ureg_writemask(delta, TGSI_WRITEMASK_X)), _Z(ps.rTex), bumpenvlscale, bumpenvloffset); 1435 } 1436 } 1437 if (key->projected & (3 << (s *2))) { 1438 unsigned dim = 1 + ((key->projected >> (2 * s)) & 3); 1439 if (dim == 4) 1440 ureg_TXP(ureg, ps.rTex, target, texture_coord, ps.s[s]); 1441 else { 1442 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 1443 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(texture_coord, dim-1)); 1444 ureg_MUL(ureg, ps.rTmp, _X(tmp), texture_coord); 1445 ureg_TEX(ureg, ps.rTex, target, ps.rTmpSrc, ps.s[s]); 1446 ureg_release_temporary(ureg, tmp); 1447 } 1448 } else { 1449 ureg_TEX(ureg, ps.rTex, target, texture_coord, ps.s[s]); 1450 } 1451 if (s >= 1 && key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE) 1452 ureg_MUL(ureg, ps.rTex, ureg_src(ps.rTex), _X(delta)); 1453 } 1454 1455 if (key->ts[s].colorop == D3DTOP_BUMPENVMAP || 1456 key->ts[s].colorop == D3DTOP_BUMPENVMAPLUMINANCE) 1457 continue; 1458 1459 dst = ps_get_ts_dst(&ps, key->ts[s].resultarg ? D3DTA_TEMP : D3DTA_CURRENT); 1460 1461 if (ps.stage.index_pre_mod == ps.stage.index) { 1462 ps.rMod = ureg_DECL_temporary(ureg); 1463 ureg_MUL(ureg, ps.rMod, ps.rCurSrc, ps.rTexSrc); 1464 } 1465 1466 colorarg[0] = (key->ts[s].colorarg0 | ((key->colorarg_b4[0] >> s) << 4) | ((key->colorarg_b5[0] >> s) << 5)) & 0x3f; 1467 colorarg[1] = (key->ts[s].colorarg1 | ((key->colorarg_b4[1] >> s) << 4) | ((key->colorarg_b5[1] >> s) << 5)) & 0x3f; 1468 colorarg[2] = (key->ts[s].colorarg2 | ((key->colorarg_b4[2] >> s) << 4) | ((key->colorarg_b5[2] >> s) << 5)) & 0x3f; 1469 alphaarg[0] = (key->ts[s].alphaarg0 | ((key->alphaarg_b4[0] >> s) << 4)) & 0x1f; 1470 alphaarg[1] = (key->ts[s].alphaarg1 | ((key->alphaarg_b4[1] >> s) << 4)) & 0x1f; 1471 alphaarg[2] = (key->ts[s].alphaarg2 | ((key->alphaarg_b4[2] >> s) << 4)) & 0x1f; 1472 1473 if (key->ts[s].colorop != key->ts[s].alphaop || 1474 colorarg[0] != alphaarg[0] || 1475 colorarg[1] != alphaarg[1] || 1476 colorarg[2] != alphaarg[2]) 1477 dst.WriteMask = TGSI_WRITEMASK_XYZ; 1478 1479 /* Special DOTPRODUCT behaviour (see wine tests) */ 1480 if (key->ts[s].colorop == D3DTOP_DOTPRODUCT3) 1481 dst.WriteMask = TGSI_WRITEMASK_XYZW; 1482 1483 if (used_c & 0x1) arg[0] = ps_get_ts_arg(&ps, colorarg[0]); 1484 if (used_c & 0x2) arg[1] = ps_get_ts_arg(&ps, colorarg[1]); 1485 if (used_c & 0x4) arg[2] = ps_get_ts_arg(&ps, colorarg[2]); 1486 ps_do_ts_op(&ps, key->ts[s].colorop, dst, arg); 1487 1488 if (dst.WriteMask != TGSI_WRITEMASK_XYZW) { 1489 dst.WriteMask = TGSI_WRITEMASK_W; 1490 1491 if (used_a & 0x1) arg[0] = ps_get_ts_arg(&ps, alphaarg[0]); 1492 if (used_a & 0x2) arg[1] = ps_get_ts_arg(&ps, alphaarg[1]); 1493 if (used_a & 0x4) arg[2] = ps_get_ts_arg(&ps, alphaarg[2]); 1494 ps_do_ts_op(&ps, key->ts[s].alphaop, dst, arg); 1495 } 1496 } 1497 1498 if (key->specular) 1499 ureg_ADD(ureg, ureg_writemask(ps.rCur, TGSI_WRITEMASK_XYZ), ps.rCurSrc, ps.vC[1]); 1500 1501 /* Fog. 1502 */ 1503 if (key->fog_mode) { 1504 struct ureg_dst rFog = ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X); 1505 struct ureg_src vPos; 1506 if (device->screen->get_param(device->screen, 1507 PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL)) { 1508 vPos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0); 1509 } else { 1510 vPos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0, 1511 TGSI_INTERPOLATE_LINEAR); 1512 } 1513 1514 /* Source is either W or Z. 1515 * When we use vs ff, 1516 * Z is when an orthogonal projection matrix is detected, 1517 * W (WFOG) else. 1518 * Z is used for programmable vs. 1519 * Note: Tests indicate that the projection matrix coefficients do 1520 * actually affect pixel fog (and not vertex fog) when vs ff is used, 1521 * which justifies taking the position's w instead of taking the z coordinate 1522 * before the projection in the vs shader. 1523 */ 1524 if (!key->fog_source) 1525 ureg_MOV(ureg, rFog, _ZZZZ(vPos)); 1526 else 1527 /* Position's w is 1/w */ 1528 ureg_RCP(ureg, rFog, _WWWW(vPos)); 1529 1530 if (key->fog_mode == D3DFOG_EXP) { 1531 ureg_MUL(ureg, rFog, _X(rFog), _ZZZZ(_CONST(22))); 1532 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f)); 1533 ureg_EX2(ureg, rFog, _X(rFog)); 1534 } else 1535 if (key->fog_mode == D3DFOG_EXP2) { 1536 ureg_MUL(ureg, rFog, _X(rFog), _ZZZZ(_CONST(22))); 1537 ureg_MUL(ureg, rFog, _X(rFog), _X(rFog)); 1538 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f)); 1539 ureg_EX2(ureg, rFog, _X(rFog)); 1540 } else 1541 if (key->fog_mode == D3DFOG_LINEAR) { 1542 ureg_ADD(ureg, rFog, _XXXX(_CONST(22)), ureg_negate(_X(rFog))); 1543 ureg_MUL(ureg, ureg_saturate(rFog), _X(rFog), _YYYY(_CONST(22))); 1544 } 1545 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _X(rFog), ps.rCurSrc, _CONST(21)); 1546 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc); 1547 } else 1548 if (key->fog) { 1549 struct ureg_src vFog = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0, TGSI_INTERPOLATE_PERSPECTIVE); 1550 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _XXXX(vFog), ps.rCurSrc, _CONST(21)); 1551 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc); 1552 } else { 1553 ureg_MOV(ureg, oCol, ps.rCurSrc); 1554 } 1555 1556 ureg_END(ureg); 1557 nine_ureg_tgsi_dump(ureg, FALSE); 1558 return ureg_create_shader_and_destroy(ureg, device->context.pipe); 1559 } 1560 1561 static struct NineVertexShader9 * 1562 nine_ff_get_vs(struct NineDevice9 *device) 1563 { 1564 const struct nine_context *context = &device->context; 1565 struct NineVertexShader9 *vs; 1566 enum pipe_error err; 1567 struct vs_build_ctx bld; 1568 struct nine_ff_vs_key key; 1569 unsigned s, i; 1570 boolean has_indexes = false; 1571 boolean has_weights = false; 1572 char input_texture_coord[8]; 1573 1574 assert(sizeof(key) <= sizeof(key.value32)); 1575 1576 memset(&key, 0, sizeof(key)); 1577 memset(&bld, 0, sizeof(bld)); 1578 memset(&input_texture_coord, 0, sizeof(input_texture_coord)); 1579 1580 bld.key = &key; 1581 1582 /* FIXME: this shouldn't be NULL, but it is on init */ 1583 if (context->vdecl) { 1584 key.color0in_one = 1; 1585 key.color1in_zero = 1; 1586 for (i = 0; i < context->vdecl->nelems; i++) { 1587 uint16_t usage = context->vdecl->usage_map[i]; 1588 if (usage == NINE_DECLUSAGE_POSITIONT) 1589 key.position_t = 1; 1590 else if (usage == NINE_DECLUSAGE_i(COLOR, 0)) 1591 key.color0in_one = 0; 1592 else if (usage == NINE_DECLUSAGE_i(COLOR, 1)) 1593 key.color1in_zero = 0; 1594 else if (usage == NINE_DECLUSAGE_i(BLENDINDICES, 0)) { 1595 has_indexes = true; 1596 key.passthrough |= 1 << usage; 1597 } else if (usage == NINE_DECLUSAGE_i(BLENDWEIGHT, 0)) { 1598 has_weights = true; 1599 key.passthrough |= 1 << usage; 1600 } else if (usage == NINE_DECLUSAGE_i(NORMAL, 0)) { 1601 key.has_normal = 1; 1602 key.passthrough |= 1 << usage; 1603 } else if (usage == NINE_DECLUSAGE_PSIZE) 1604 key.vertexpointsize = 1; 1605 else if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) { 1606 s = usage / NINE_DECLUSAGE_COUNT; 1607 if (s < 8) 1608 input_texture_coord[s] = nine_decltype_get_dim(context->vdecl->decls[i].Type); 1609 else 1610 DBG("FF given texture coordinate >= 8. Ignoring\n"); 1611 } else if (usage < NINE_DECLUSAGE_NONE) 1612 key.passthrough |= 1 << usage; 1613 } 1614 } 1615 /* ff vs + ps 3.0: some elements are passed to the ps (wine test). 1616 * We do restrict to indices 0 */ 1617 key.passthrough &= ~((1 << NINE_DECLUSAGE_POSITION) | (1 << NINE_DECLUSAGE_PSIZE) | 1618 (1 << NINE_DECLUSAGE_TEXCOORD) | (1 << NINE_DECLUSAGE_POSITIONT) | 1619 (1 << NINE_DECLUSAGE_TESSFACTOR) | (1 << NINE_DECLUSAGE_SAMPLE)); 1620 if (!key.position_t) 1621 key.passthrough = 0; 1622 key.pointscale = !!context->rs[D3DRS_POINTSCALEENABLE]; 1623 1624 key.lighting = !!context->rs[D3DRS_LIGHTING] && context->ff.num_lights_active; 1625 key.darkness = !!context->rs[D3DRS_LIGHTING] && !context->ff.num_lights_active; 1626 if (key.position_t) { 1627 key.darkness = 0; /* |= key.lighting; */ /* XXX ? */ 1628 key.lighting = 0; 1629 } 1630 if ((key.lighting | key.darkness) && context->rs[D3DRS_COLORVERTEX]) { 1631 uint32_t mask = (key.color0in_one ? 0 : 1) | (key.color1in_zero ? 0 : 2); 1632 key.mtl_diffuse = context->rs[D3DRS_DIFFUSEMATERIALSOURCE] & mask; 1633 key.mtl_ambient = context->rs[D3DRS_AMBIENTMATERIALSOURCE] & mask; 1634 key.mtl_specular = context->rs[D3DRS_SPECULARMATERIALSOURCE] & mask; 1635 key.mtl_emissive = context->rs[D3DRS_EMISSIVEMATERIALSOURCE] & mask; 1636 } 1637 key.fog = !!context->rs[D3DRS_FOGENABLE]; 1638 key.fog_mode = (!key.position_t && context->rs[D3DRS_FOGENABLE]) ? context->rs[D3DRS_FOGVERTEXMODE] : 0; 1639 if (key.fog_mode) 1640 key.fog_range = context->rs[D3DRS_RANGEFOGENABLE]; 1641 1642 key.localviewer = !!context->rs[D3DRS_LOCALVIEWER]; 1643 key.normalizenormals = !!context->rs[D3DRS_NORMALIZENORMALS]; 1644 key.ucp = !!context->rs[D3DRS_CLIPPLANEENABLE]; 1645 1646 if (context->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) { 1647 key.vertexblend_indexed = !!context->rs[D3DRS_INDEXEDVERTEXBLENDENABLE] && has_indexes; 1648 1649 switch (context->rs[D3DRS_VERTEXBLEND]) { 1650 case D3DVBF_0WEIGHTS: key.vertexblend = key.vertexblend_indexed; break; 1651 case D3DVBF_1WEIGHTS: key.vertexblend = 2; break; 1652 case D3DVBF_2WEIGHTS: key.vertexblend = 3; break; 1653 case D3DVBF_3WEIGHTS: key.vertexblend = 4; break; 1654 case D3DVBF_TWEENING: key.vertextween = 1; break; 1655 default: 1656 assert(!"invalid D3DVBF"); 1657 break; 1658 } 1659 if (!has_weights && context->rs[D3DRS_VERTEXBLEND] != D3DVBF_0WEIGHTS) 1660 key.vertexblend = 0; /* TODO: if key.vertexblend_indexed, perhaps it should use 1.0 as weight, or revert to D3DVBF_0WEIGHTS */ 1661 } 1662 1663 for (s = 0; s < 8; ++s) { 1664 unsigned gen = (context->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1; 1665 unsigned idx = context->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] & 7; 1666 unsigned dim; 1667 1668 if (key.position_t && gen > NINED3DTSS_TCI_PASSTHRU) 1669 gen = NINED3DTSS_TCI_PASSTHRU; 1670 1671 if (!input_texture_coord[idx] && gen == NINED3DTSS_TCI_PASSTHRU) 1672 gen = NINED3DTSS_TCI_DISABLE; 1673 1674 key.tc_gen |= gen << (s * 3); 1675 key.tc_idx |= idx << (s * 3); 1676 key.tc_dim_input |= ((input_texture_coord[idx]-1) & 0x3) << (s * 2); 1677 1678 dim = context->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7; 1679 if (dim > 4) 1680 dim = input_texture_coord[idx]; 1681 if (dim == 1) /* NV behaviour */ 1682 dim = 0; 1683 key.tc_dim_output |= dim << (s * 3); 1684 } 1685 1686 vs = util_hash_table_get(device->ff.ht_vs, &key); 1687 if (vs) 1688 return vs; 1689 NineVertexShader9_new(device, &vs, NULL, nine_ff_build_vs(device, &bld)); 1690 1691 nine_ff_prune_vs(device); 1692 if (vs) { 1693 unsigned n; 1694 1695 memcpy(&vs->ff_key, &key, sizeof(vs->ff_key)); 1696 1697 err = util_hash_table_set(device->ff.ht_vs, &vs->ff_key, vs); 1698 (void)err; 1699 assert(err == PIPE_OK); 1700 device->ff.num_vs++; 1701 NineUnknown_ConvertRefToBind(NineUnknown(vs)); 1702 1703 vs->num_inputs = bld.num_inputs; 1704 for (n = 0; n < bld.num_inputs; ++n) 1705 vs->input_map[n].ndecl = bld.input[n]; 1706 1707 vs->position_t = key.position_t; 1708 vs->point_size = key.vertexpointsize | key.pointscale; 1709 } 1710 return vs; 1711 } 1712 1713 #define GET_D3DTS(n) nine_state_access_transform(&context->ff, D3DTS_##n, FALSE) 1714 #define IS_D3DTS_DIRTY(s,n) ((s)->ff.changed.transform[(D3DTS_##n) / 32] & (1 << ((D3DTS_##n) % 32))) 1715 1716 static struct NinePixelShader9 * 1717 nine_ff_get_ps(struct NineDevice9 *device) 1718 { 1719 struct nine_context *context = &device->context; 1720 D3DMATRIX *projection_matrix = GET_D3DTS(PROJECTION); 1721 struct NinePixelShader9 *ps; 1722 enum pipe_error err; 1723 struct nine_ff_ps_key key; 1724 unsigned s; 1725 uint8_t sampler_mask = 0; 1726 1727 assert(sizeof(key) <= sizeof(key.value32)); 1728 1729 memset(&key, 0, sizeof(key)); 1730 for (s = 0; s < 8; ++s) { 1731 key.ts[s].colorop = context->ff.tex_stage[s][D3DTSS_COLOROP]; 1732 key.ts[s].alphaop = context->ff.tex_stage[s][D3DTSS_ALPHAOP]; 1733 const uint8_t used_c = ps_d3dtop_args_mask(key.ts[s].colorop); 1734 const uint8_t used_a = ps_d3dtop_args_mask(key.ts[s].alphaop); 1735 /* MSDN says D3DTOP_DISABLE disables this and all subsequent stages. 1736 * ALPHAOP cannot be enabled if COLOROP is disabled. 1737 * Verified on Windows. */ 1738 if (key.ts[s].colorop == D3DTOP_DISABLE) { 1739 key.ts[s].alphaop = D3DTOP_DISABLE; /* DISABLE == 1, avoid degenerate keys */ 1740 break; 1741 } 1742 1743 if (!context->texture[s].enabled && 1744 ((context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE && 1745 used_c & 0x1) || 1746 (context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE && 1747 used_c & 0x2) || 1748 (context->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE && 1749 used_c & 0x4))) { 1750 /* Tested on Windows: Invalid texture read disables the stage 1751 * and the subsequent ones, but only for colorop. For alpha, 1752 * it's as if the texture had alpha of 1.0, which is what 1753 * has our dummy texture in that case. Invalid color also 1754 * disabled the following alpha stages. */ 1755 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE; 1756 break; 1757 } 1758 1759 if (context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE || 1760 context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE || 1761 context->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE || 1762 context->ff.tex_stage[s][D3DTSS_ALPHAARG0] == D3DTA_TEXTURE || 1763 context->ff.tex_stage[s][D3DTSS_ALPHAARG1] == D3DTA_TEXTURE || 1764 context->ff.tex_stage[s][D3DTSS_ALPHAARG2] == D3DTA_TEXTURE) 1765 sampler_mask |= (1 << s); 1766 1767 if (key.ts[s].colorop != D3DTOP_DISABLE) { 1768 if (used_c & 0x1) key.ts[s].colorarg0 = context->ff.tex_stage[s][D3DTSS_COLORARG0]; 1769 if (used_c & 0x2) key.ts[s].colorarg1 = context->ff.tex_stage[s][D3DTSS_COLORARG1]; 1770 if (used_c & 0x4) key.ts[s].colorarg2 = context->ff.tex_stage[s][D3DTSS_COLORARG2]; 1771 if (used_c & 0x1) key.colorarg_b4[0] |= (context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 4) << s; 1772 if (used_c & 0x1) key.colorarg_b5[0] |= (context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 5) << s; 1773 if (used_c & 0x2) key.colorarg_b4[1] |= (context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 4) << s; 1774 if (used_c & 0x2) key.colorarg_b5[1] |= (context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 5) << s; 1775 if (used_c & 0x4) key.colorarg_b4[2] |= (context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 4) << s; 1776 if (used_c & 0x4) key.colorarg_b5[2] |= (context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 5) << s; 1777 } 1778 if (key.ts[s].alphaop != D3DTOP_DISABLE) { 1779 if (used_a & 0x1) key.ts[s].alphaarg0 = context->ff.tex_stage[s][D3DTSS_ALPHAARG0]; 1780 if (used_a & 0x2) key.ts[s].alphaarg1 = context->ff.tex_stage[s][D3DTSS_ALPHAARG1]; 1781 if (used_a & 0x4) key.ts[s].alphaarg2 = context->ff.tex_stage[s][D3DTSS_ALPHAARG2]; 1782 if (used_a & 0x1) key.alphaarg_b4[0] |= (context->ff.tex_stage[s][D3DTSS_ALPHAARG0] >> 4) << s; 1783 if (used_a & 0x2) key.alphaarg_b4[1] |= (context->ff.tex_stage[s][D3DTSS_ALPHAARG1] >> 4) << s; 1784 if (used_a & 0x4) key.alphaarg_b4[2] |= (context->ff.tex_stage[s][D3DTSS_ALPHAARG2] >> 4) << s; 1785 } 1786 key.ts[s].resultarg = context->ff.tex_stage[s][D3DTSS_RESULTARG] == D3DTA_TEMP; 1787 1788 if (context->texture[s].enabled) { 1789 switch (context->texture[s].type) { 1790 case D3DRTYPE_TEXTURE: key.ts[s].textarget = 1; break; 1791 case D3DRTYPE_VOLUMETEXTURE: key.ts[s].textarget = 2; break; 1792 case D3DRTYPE_CUBETEXTURE: key.ts[s].textarget = 3; break; 1793 default: 1794 assert(!"unexpected texture type"); 1795 break; 1796 } 1797 } else { 1798 key.ts[s].textarget = 1; 1799 } 1800 } 1801 1802 /* Note: If colorop is D3DTOP_DISABLE for the first stage 1803 * (which implies alphaop is too), nothing particular happens, 1804 * that is, current is equal to diffuse (which is the case anyway, 1805 * because it is how it is initialized). 1806 * Special case seems if alphaop is D3DTOP_DISABLE and not colorop, 1807 * because then if the resultarg is TEMP, then diffuse alpha is written 1808 * to it. */ 1809 if (key.ts[0].colorop != D3DTOP_DISABLE && 1810 key.ts[0].alphaop == D3DTOP_DISABLE && 1811 key.ts[0].resultarg != 0) { 1812 key.ts[0].alphaop = D3DTOP_SELECTARG1; 1813 key.ts[0].alphaarg1 = D3DTA_DIFFUSE; 1814 } 1815 /* When no alpha stage writes to current, diffuse alpha is taken. 1816 * Since we initialize current to diffuse, we have the behaviour. */ 1817 1818 /* Last stage always writes to Current */ 1819 if (s >= 1) 1820 key.ts[s-1].resultarg = 0; 1821 1822 key.projected = nine_ff_get_projected_key(context); 1823 key.specular = !!context->rs[D3DRS_SPECULARENABLE]; 1824 1825 for (; s < 8; ++s) 1826 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE; 1827 if (context->rs[D3DRS_FOGENABLE]) 1828 key.fog_mode = context->rs[D3DRS_FOGTABLEMODE]; 1829 key.fog = !!context->rs[D3DRS_FOGENABLE]; 1830 /* Pixel fog (with WFOG advertised): source is either Z or W. 1831 * W is the source if vs ff is used, and the 1832 * projection matrix is not orthogonal. 1833 * Tests on Win 10 seem to indicate _34 1834 * and _33 are checked against 0, 1. */ 1835 if (key.fog_mode && key.fog) 1836 key.fog_source = !context->programmable_vs && 1837 !(projection_matrix->_34 == 0.0f && 1838 projection_matrix->_44 == 1.0f); 1839 1840 ps = util_hash_table_get(device->ff.ht_ps, &key); 1841 if (ps) 1842 return ps; 1843 NinePixelShader9_new(device, &ps, NULL, nine_ff_build_ps(device, &key)); 1844 1845 nine_ff_prune_ps(device); 1846 if (ps) { 1847 memcpy(&ps->ff_key, &key, sizeof(ps->ff_key)); 1848 1849 err = util_hash_table_set(device->ff.ht_ps, &ps->ff_key, ps); 1850 (void)err; 1851 assert(err == PIPE_OK); 1852 device->ff.num_ps++; 1853 NineUnknown_ConvertRefToBind(NineUnknown(ps)); 1854 1855 ps->rt_mask = 0x1; 1856 ps->sampler_mask = sampler_mask; 1857 } 1858 return ps; 1859 } 1860 1861 static void 1862 nine_ff_load_vs_transforms(struct NineDevice9 *device) 1863 { 1864 struct nine_context *context = &device->context; 1865 D3DMATRIX T; 1866 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const; 1867 unsigned i; 1868 1869 /* TODO: make this nicer, and only upload the ones we need */ 1870 /* TODO: use ff.vs_const as storage of W, V, P matrices */ 1871 1872 if (IS_D3DTS_DIRTY(context, WORLD) || 1873 IS_D3DTS_DIRTY(context, VIEW) || 1874 IS_D3DTS_DIRTY(context, PROJECTION)) { 1875 /* WVP, WV matrices */ 1876 nine_d3d_matrix_matrix_mul(&M[1], GET_D3DTS(WORLD), GET_D3DTS(VIEW)); 1877 nine_d3d_matrix_matrix_mul(&M[0], &M[1], GET_D3DTS(PROJECTION)); 1878 1879 /* normal matrix == transpose(inverse(WV)) */ 1880 nine_d3d_matrix_inverse(&T, &M[1]); 1881 nine_d3d_matrix_transpose(&M[4], &T); 1882 1883 /* P matrix */ 1884 M[2] = *GET_D3DTS(PROJECTION); 1885 1886 /* V and W matrix */ 1887 nine_d3d_matrix_inverse(&M[3], GET_D3DTS(VIEW)); 1888 M[40] = M[1]; 1889 } 1890 1891 if (context->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) { 1892 /* load other world matrices */ 1893 for (i = 1; i <= 8; ++i) { 1894 nine_d3d_matrix_matrix_mul(&M[40 + i], GET_D3DTS(WORLDMATRIX(i)), GET_D3DTS(VIEW)); 1895 } 1896 } 1897 1898 device->ff.vs_const[30 * 4] = asfloat(context->rs[D3DRS_TWEENFACTOR]); 1899 } 1900 1901 static void 1902 nine_ff_load_lights(struct NineDevice9 *device) 1903 { 1904 struct nine_context *context = &device->context; 1905 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const; 1906 unsigned l; 1907 1908 if (context->changed.group & NINE_STATE_FF_MATERIAL) { 1909 const D3DMATERIAL9 *mtl = &context->ff.material; 1910 1911 memcpy(&dst[20], &mtl->Diffuse, 4 * sizeof(float)); 1912 memcpy(&dst[21], &mtl->Ambient, 4 * sizeof(float)); 1913 memcpy(&dst[22], &mtl->Specular, 4 * sizeof(float)); 1914 dst[23].x = mtl->Power; 1915 memcpy(&dst[24], &mtl->Emissive, 4 * sizeof(float)); 1916 d3dcolor_to_rgba(&dst[25].x, context->rs[D3DRS_AMBIENT]); 1917 dst[19].x = dst[25].x * mtl->Ambient.r + mtl->Emissive.r; 1918 dst[19].y = dst[25].y * mtl->Ambient.g + mtl->Emissive.g; 1919 dst[19].z = dst[25].z * mtl->Ambient.b + mtl->Emissive.b; 1920 } 1921 1922 if (!(context->changed.group & NINE_STATE_FF_LIGHTING)) 1923 return; 1924 1925 for (l = 0; l < context->ff.num_lights_active; ++l) { 1926 const D3DLIGHT9 *light = &context->ff.light[context->ff.active_light[l]]; 1927 1928 dst[32 + l * 8].x = light->Type; 1929 dst[32 + l * 8].y = light->Attenuation0; 1930 dst[32 + l * 8].z = light->Attenuation1; 1931 dst[32 + l * 8].w = light->Attenuation2; 1932 memcpy(&dst[33 + l * 8].x, &light->Diffuse, sizeof(light->Diffuse)); 1933 memcpy(&dst[34 + l * 8].x, &light->Specular, sizeof(light->Specular)); 1934 memcpy(&dst[35 + l * 8].x, &light->Ambient, sizeof(light->Ambient)); 1935 nine_d3d_vector4_matrix_mul((D3DVECTOR *)&dst[36 + l * 8].x, &light->Position, GET_D3DTS(VIEW)); 1936 nine_d3d_vector3_matrix_mul((D3DVECTOR *)&dst[37 + l * 8].x, &light->Direction, GET_D3DTS(VIEW)); 1937 dst[36 + l * 8].w = light->Type == D3DLIGHT_DIRECTIONAL ? 1e9f : light->Range; 1938 dst[37 + l * 8].w = light->Falloff; 1939 dst[38 + l * 8].x = cosf(light->Theta * 0.5f); 1940 dst[38 + l * 8].y = cosf(light->Phi * 0.5f); 1941 dst[38 + l * 8].z = 1.0f / (dst[38 + l * 8].x - dst[38 + l * 8].y); 1942 dst[39 + l * 8].w = (float)((l + 1) == context->ff.num_lights_active); 1943 } 1944 } 1945 1946 static void 1947 nine_ff_load_point_and_fog_params(struct NineDevice9 *device) 1948 { 1949 struct nine_context *context = &device->context; 1950 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const; 1951 1952 if (!(context->changed.group & NINE_STATE_FF_OTHER)) 1953 return; 1954 dst[26].x = asfloat(context->rs[D3DRS_POINTSIZE_MIN]); 1955 dst[26].y = asfloat(context->rs[D3DRS_POINTSIZE_MAX]); 1956 dst[26].z = asfloat(context->rs[D3DRS_POINTSIZE]); 1957 dst[26].w = asfloat(context->rs[D3DRS_POINTSCALE_A]); 1958 dst[27].x = asfloat(context->rs[D3DRS_POINTSCALE_B]); 1959 dst[27].y = asfloat(context->rs[D3DRS_POINTSCALE_C]); 1960 dst[28].x = asfloat(context->rs[D3DRS_FOGEND]); 1961 dst[28].y = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART])); 1962 if (isinf(dst[28].y)) 1963 dst[28].y = 0.0f; 1964 dst[28].z = asfloat(context->rs[D3DRS_FOGDENSITY]); 1965 } 1966 1967 static void 1968 nine_ff_load_tex_matrices(struct NineDevice9 *device) 1969 { 1970 struct nine_context *context = &device->context; 1971 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const; 1972 unsigned s; 1973 1974 if (!(context->ff.changed.transform[0] & 0xff0000)) 1975 return; 1976 for (s = 0; s < 8; ++s) { 1977 if (IS_D3DTS_DIRTY(context, TEXTURE0 + s)) 1978 nine_d3d_matrix_transpose(&M[32 + s], nine_state_access_transform(&context->ff, D3DTS_TEXTURE0 + s, FALSE)); 1979 } 1980 } 1981 1982 static void 1983 nine_ff_load_ps_params(struct NineDevice9 *device) 1984 { 1985 struct nine_context *context = &device->context; 1986 struct fvec4 *dst = (struct fvec4 *)device->ff.ps_const; 1987 unsigned s; 1988 1989 if (!(context->changed.group & (NINE_STATE_FF_PSSTAGES | NINE_STATE_FF_OTHER))) 1990 return; 1991 1992 for (s = 0; s < 8; ++s) 1993 d3dcolor_to_rgba(&dst[s].x, context->ff.tex_stage[s][D3DTSS_CONSTANT]); 1994 1995 for (s = 0; s < 8; ++s) { 1996 dst[8 + s].x = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT00]); 1997 dst[8 + s].y = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT01]); 1998 dst[8 + s].z = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT10]); 1999 dst[8 + s].w = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT11]); 2000 if (s & 1) { 2001 dst[16 + s / 2].z = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]); 2002 dst[16 + s / 2].w = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]); 2003 } else { 2004 dst[16 + s / 2].x = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]); 2005 dst[16 + s / 2].y = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]); 2006 } 2007 } 2008 2009 d3dcolor_to_rgba(&dst[20].x, context->rs[D3DRS_TEXTUREFACTOR]); 2010 d3dcolor_to_rgba(&dst[21].x, context->rs[D3DRS_FOGCOLOR]); 2011 dst[22].x = asfloat(context->rs[D3DRS_FOGEND]); 2012 dst[22].y = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART])); 2013 dst[22].z = asfloat(context->rs[D3DRS_FOGDENSITY]); 2014 } 2015 2016 static void 2017 nine_ff_load_viewport_info(struct NineDevice9 *device) 2018 { 2019 D3DVIEWPORT9 *viewport = &device->context.viewport; 2020 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const; 2021 float diffZ = viewport->MaxZ - viewport->MinZ; 2022 2023 /* Note: the other functions avoids to fill the const again if nothing changed. 2024 * But we don't have much to fill, and adding code to allow that may be complex 2025 * so just fill it always */ 2026 dst[100].x = 2.0f / (float)(viewport->Width); 2027 dst[100].y = 2.0f / (float)(viewport->Height); 2028 dst[100].z = (diffZ == 0.0f) ? 0.0f : (1.0f / diffZ); 2029 dst[100].w = (float)(viewport->Width); 2030 dst[101].x = (float)(viewport->X); 2031 dst[101].y = (float)(viewport->Y); 2032 dst[101].z = (float)(viewport->MinZ); 2033 } 2034 2035 void 2036 nine_ff_update(struct NineDevice9 *device) 2037 { 2038 struct nine_context *context = &device->context; 2039 struct pipe_constant_buffer cb; 2040 2041 DBG("vs=%p ps=%p\n", context->vs, context->ps); 2042 2043 /* NOTE: the only reference belongs to the hash table */ 2044 if (!context->programmable_vs) { 2045 device->ff.vs = nine_ff_get_vs(device); 2046 context->changed.group |= NINE_STATE_VS; 2047 } 2048 if (!context->ps) { 2049 device->ff.ps = nine_ff_get_ps(device); 2050 context->changed.group |= NINE_STATE_PS; 2051 } 2052 2053 if (!context->programmable_vs) { 2054 nine_ff_load_vs_transforms(device); 2055 nine_ff_load_tex_matrices(device); 2056 nine_ff_load_lights(device); 2057 nine_ff_load_point_and_fog_params(device); 2058 nine_ff_load_viewport_info(device); 2059 2060 memset(context->ff.changed.transform, 0, sizeof(context->ff.changed.transform)); 2061 2062 cb.buffer_offset = 0; 2063 cb.buffer = NULL; 2064 cb.user_buffer = device->ff.vs_const; 2065 cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float); 2066 2067 context->pipe_data.cb_vs_ff = cb; 2068 context->commit |= NINE_STATE_COMMIT_CONST_VS; 2069 } 2070 2071 if (!context->ps) { 2072 nine_ff_load_ps_params(device); 2073 2074 cb.buffer_offset = 0; 2075 cb.buffer = NULL; 2076 cb.user_buffer = device->ff.ps_const; 2077 cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float); 2078 2079 context->pipe_data.cb_ps_ff = cb; 2080 context->commit |= NINE_STATE_COMMIT_CONST_PS; 2081 } 2082 2083 context->changed.group &= ~NINE_STATE_FF; 2084 } 2085 2086 2087 boolean 2088 nine_ff_init(struct NineDevice9 *device) 2089 { 2090 device->ff.ht_vs = util_hash_table_create(nine_ff_vs_key_hash, 2091 nine_ff_vs_key_comp); 2092 device->ff.ht_ps = util_hash_table_create(nine_ff_ps_key_hash, 2093 nine_ff_ps_key_comp); 2094 2095 device->ff.ht_fvf = util_hash_table_create(nine_ff_fvf_key_hash, 2096 nine_ff_fvf_key_comp); 2097 2098 device->ff.vs_const = CALLOC(NINE_FF_NUM_VS_CONST, 4 * sizeof(float)); 2099 device->ff.ps_const = CALLOC(NINE_FF_NUM_PS_CONST, 4 * sizeof(float)); 2100 2101 return device->ff.ht_vs && device->ff.ht_ps && 2102 device->ff.ht_fvf && 2103 device->ff.vs_const && device->ff.ps_const; 2104 } 2105 2106 static enum pipe_error nine_ff_ht_delete_cb(void *key, void *value, void *data) 2107 { 2108 NineUnknown_Unbind(NineUnknown(value)); 2109 return PIPE_OK; 2110 } 2111 2112 void 2113 nine_ff_fini(struct NineDevice9 *device) 2114 { 2115 if (device->ff.ht_vs) { 2116 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL); 2117 util_hash_table_destroy(device->ff.ht_vs); 2118 } 2119 if (device->ff.ht_ps) { 2120 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL); 2121 util_hash_table_destroy(device->ff.ht_ps); 2122 } 2123 if (device->ff.ht_fvf) { 2124 util_hash_table_foreach(device->ff.ht_fvf, nine_ff_ht_delete_cb, NULL); 2125 util_hash_table_destroy(device->ff.ht_fvf); 2126 } 2127 device->ff.vs = NULL; /* destroyed by unbinding from hash table */ 2128 device->ff.ps = NULL; 2129 2130 FREE(device->ff.vs_const); 2131 FREE(device->ff.ps_const); 2132 } 2133 2134 static void 2135 nine_ff_prune_vs(struct NineDevice9 *device) 2136 { 2137 struct nine_context *context = &device->context; 2138 2139 if (device->ff.num_vs > 100) { 2140 /* could destroy the bound one here, so unbind */ 2141 context->pipe->bind_vs_state(context->pipe, NULL); 2142 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL); 2143 util_hash_table_clear(device->ff.ht_vs); 2144 device->ff.num_vs = 0; 2145 context->changed.group |= NINE_STATE_VS; 2146 } 2147 } 2148 static void 2149 nine_ff_prune_ps(struct NineDevice9 *device) 2150 { 2151 struct nine_context *context = &device->context; 2152 2153 if (device->ff.num_ps > 100) { 2154 /* could destroy the bound one here, so unbind */ 2155 context->pipe->bind_fs_state(context->pipe, NULL); 2156 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL); 2157 util_hash_table_clear(device->ff.ht_ps); 2158 device->ff.num_ps = 0; 2159 context->changed.group |= NINE_STATE_PS; 2160 } 2161 } 2162 2163 /* ========================================================================== */ 2164 2165 /* Matrix multiplication: 2166 * 2167 * in memory: 0 1 2 3 (row major) 2168 * 4 5 6 7 2169 * 8 9 a b 2170 * c d e f 2171 * 2172 * cA cB cC cD 2173 * r0 = (r0 * cA) (r0 * cB) . . 2174 * r1 = (r1 * cA) (r1 * cB) 2175 * r2 = (r2 * cA) . 2176 * r3 = (r3 * cA) . 2177 * 2178 * r: (11) (12) (13) (14) 2179 * (21) (22) (23) (24) 2180 * (31) (32) (33) (34) 2181 * (41) (42) (43) (44) 2182 * l: (11 12 13 14) 2183 * (21 22 23 24) 2184 * (31 32 33 34) 2185 * (41 42 43 44) 2186 * 2187 * v: (x y z 1 ) 2188 * 2189 * t.xyzw = MUL(v.xxxx, r[0]); 2190 * t.xyzw = MAD(v.yyyy, r[1], t.xyzw); 2191 * t.xyzw = MAD(v.zzzz, r[2], t.xyzw); 2192 * v.xyzw = MAD(v.wwww, r[3], t.xyzw); 2193 * 2194 * v.x = DP4(v, c[0]); 2195 * v.y = DP4(v, c[1]); 2196 * v.z = DP4(v, c[2]); 2197 * v.w = DP4(v, c[3]) = 1 2198 */ 2199 2200 /* 2201 static void 2202 nine_D3DMATRIX_print(const D3DMATRIX *M) 2203 { 2204 DBG("\n(%f %f %f %f)\n" 2205 "(%f %f %f %f)\n" 2206 "(%f %f %f %f)\n" 2207 "(%f %f %f %f)\n", 2208 M->m[0][0], M->m[0][1], M->m[0][2], M->m[0][3], 2209 M->m[1][0], M->m[1][1], M->m[1][2], M->m[1][3], 2210 M->m[2][0], M->m[2][1], M->m[2][2], M->m[2][3], 2211 M->m[3][0], M->m[3][1], M->m[3][2], M->m[3][3]); 2212 } 2213 */ 2214 2215 static inline float 2216 nine_DP4_row_col(const D3DMATRIX *A, int r, const D3DMATRIX *B, int c) 2217 { 2218 return A->m[r][0] * B->m[0][c] + 2219 A->m[r][1] * B->m[1][c] + 2220 A->m[r][2] * B->m[2][c] + 2221 A->m[r][3] * B->m[3][c]; 2222 } 2223 2224 static inline float 2225 nine_DP4_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c) 2226 { 2227 return v->x * M->m[0][c] + 2228 v->y * M->m[1][c] + 2229 v->z * M->m[2][c] + 2230 1.0f * M->m[3][c]; 2231 } 2232 2233 static inline float 2234 nine_DP3_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c) 2235 { 2236 return v->x * M->m[0][c] + 2237 v->y * M->m[1][c] + 2238 v->z * M->m[2][c]; 2239 } 2240 2241 void 2242 nine_d3d_matrix_matrix_mul(D3DMATRIX *D, const D3DMATRIX *L, const D3DMATRIX *R) 2243 { 2244 D->_11 = nine_DP4_row_col(L, 0, R, 0); 2245 D->_12 = nine_DP4_row_col(L, 0, R, 1); 2246 D->_13 = nine_DP4_row_col(L, 0, R, 2); 2247 D->_14 = nine_DP4_row_col(L, 0, R, 3); 2248 2249 D->_21 = nine_DP4_row_col(L, 1, R, 0); 2250 D->_22 = nine_DP4_row_col(L, 1, R, 1); 2251 D->_23 = nine_DP4_row_col(L, 1, R, 2); 2252 D->_24 = nine_DP4_row_col(L, 1, R, 3); 2253 2254 D->_31 = nine_DP4_row_col(L, 2, R, 0); 2255 D->_32 = nine_DP4_row_col(L, 2, R, 1); 2256 D->_33 = nine_DP4_row_col(L, 2, R, 2); 2257 D->_34 = nine_DP4_row_col(L, 2, R, 3); 2258 2259 D->_41 = nine_DP4_row_col(L, 3, R, 0); 2260 D->_42 = nine_DP4_row_col(L, 3, R, 1); 2261 D->_43 = nine_DP4_row_col(L, 3, R, 2); 2262 D->_44 = nine_DP4_row_col(L, 3, R, 3); 2263 } 2264 2265 void 2266 nine_d3d_vector4_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M) 2267 { 2268 d->x = nine_DP4_vec_col(v, M, 0); 2269 d->y = nine_DP4_vec_col(v, M, 1); 2270 d->z = nine_DP4_vec_col(v, M, 2); 2271 } 2272 2273 void 2274 nine_d3d_vector3_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M) 2275 { 2276 d->x = nine_DP3_vec_col(v, M, 0); 2277 d->y = nine_DP3_vec_col(v, M, 1); 2278 d->z = nine_DP3_vec_col(v, M, 2); 2279 } 2280 2281 void 2282 nine_d3d_matrix_transpose(D3DMATRIX *D, const D3DMATRIX *M) 2283 { 2284 unsigned i, j; 2285 for (i = 0; i < 4; ++i) 2286 for (j = 0; j < 4; ++j) 2287 D->m[i][j] = M->m[j][i]; 2288 } 2289 2290 #define _M_ADD_PROD_1i_2j_3k_4l(i,j,k,l) do { \ 2291 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \ 2292 if (t > 0.0f) pos += t; else neg += t; } while(0) 2293 2294 #define _M_SUB_PROD_1i_2j_3k_4l(i,j,k,l) do { \ 2295 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \ 2296 if (t > 0.0f) neg -= t; else pos -= t; } while(0) 2297 float 2298 nine_d3d_matrix_det(const D3DMATRIX *M) 2299 { 2300 float pos = 0.0f; 2301 float neg = 0.0f; 2302 2303 _M_ADD_PROD_1i_2j_3k_4l(1, 2, 3, 4); 2304 _M_ADD_PROD_1i_2j_3k_4l(1, 3, 4, 2); 2305 _M_ADD_PROD_1i_2j_3k_4l(1, 4, 2, 3); 2306 2307 _M_ADD_PROD_1i_2j_3k_4l(2, 1, 4, 3); 2308 _M_ADD_PROD_1i_2j_3k_4l(2, 3, 1, 4); 2309 _M_ADD_PROD_1i_2j_3k_4l(2, 4, 3, 1); 2310 2311 _M_ADD_PROD_1i_2j_3k_4l(3, 1, 2, 4); 2312 _M_ADD_PROD_1i_2j_3k_4l(3, 2, 4, 1); 2313 _M_ADD_PROD_1i_2j_3k_4l(3, 4, 1, 2); 2314 2315 _M_ADD_PROD_1i_2j_3k_4l(4, 1, 3, 2); 2316 _M_ADD_PROD_1i_2j_3k_4l(4, 2, 1, 3); 2317 _M_ADD_PROD_1i_2j_3k_4l(4, 3, 2, 1); 2318 2319 _M_SUB_PROD_1i_2j_3k_4l(1, 2, 4, 3); 2320 _M_SUB_PROD_1i_2j_3k_4l(1, 3, 2, 4); 2321 _M_SUB_PROD_1i_2j_3k_4l(1, 4, 3, 2); 2322 2323 _M_SUB_PROD_1i_2j_3k_4l(2, 1, 3, 4); 2324 _M_SUB_PROD_1i_2j_3k_4l(2, 3, 4, 1); 2325 _M_SUB_PROD_1i_2j_3k_4l(2, 4, 1, 3); 2326 2327 _M_SUB_PROD_1i_2j_3k_4l(3, 1, 4, 2); 2328 _M_SUB_PROD_1i_2j_3k_4l(3, 2, 1, 4); 2329 _M_SUB_PROD_1i_2j_3k_4l(3, 4, 2, 1); 2330 2331 _M_SUB_PROD_1i_2j_3k_4l(4, 1, 2, 3); 2332 _M_SUB_PROD_1i_2j_3k_4l(4, 2, 3, 1); 2333 _M_SUB_PROD_1i_2j_3k_4l(4, 3, 1, 2); 2334 2335 return pos + neg; 2336 } 2337 2338 /* XXX: Probably better to just use src/mesa/math/m_matrix.c because 2339 * I have no idea where this code came from. 2340 */ 2341 void 2342 nine_d3d_matrix_inverse(D3DMATRIX *D, const D3DMATRIX *M) 2343 { 2344 int i, k; 2345 float det; 2346 2347 D->m[0][0] = 2348 M->m[1][1] * M->m[2][2] * M->m[3][3] - 2349 M->m[1][1] * M->m[3][2] * M->m[2][3] - 2350 M->m[1][2] * M->m[2][1] * M->m[3][3] + 2351 M->m[1][2] * M->m[3][1] * M->m[2][3] + 2352 M->m[1][3] * M->m[2][1] * M->m[3][2] - 2353 M->m[1][3] * M->m[3][1] * M->m[2][2]; 2354 2355 D->m[0][1] = 2356 -M->m[0][1] * M->m[2][2] * M->m[3][3] + 2357 M->m[0][1] * M->m[3][2] * M->m[2][3] + 2358 M->m[0][2] * M->m[2][1] * M->m[3][3] - 2359 M->m[0][2] * M->m[3][1] * M->m[2][3] - 2360 M->m[0][3] * M->m[2][1] * M->m[3][2] + 2361 M->m[0][3] * M->m[3][1] * M->m[2][2]; 2362 2363 D->m[0][2] = 2364 M->m[0][1] * M->m[1][2] * M->m[3][3] - 2365 M->m[0][1] * M->m[3][2] * M->m[1][3] - 2366 M->m[0][2] * M->m[1][1] * M->m[3][3] + 2367 M->m[0][2] * M->m[3][1] * M->m[1][3] + 2368 M->m[0][3] * M->m[1][1] * M->m[3][2] - 2369 M->m[0][3] * M->m[3][1] * M->m[1][2]; 2370 2371 D->m[0][3] = 2372 -M->m[0][1] * M->m[1][2] * M->m[2][3] + 2373 M->m[0][1] * M->m[2][2] * M->m[1][3] + 2374 M->m[0][2] * M->m[1][1] * M->m[2][3] - 2375 M->m[0][2] * M->m[2][1] * M->m[1][3] - 2376 M->m[0][3] * M->m[1][1] * M->m[2][2] + 2377 M->m[0][3] * M->m[2][1] * M->m[1][2]; 2378 2379 D->m[1][0] = 2380 -M->m[1][0] * M->m[2][2] * M->m[3][3] + 2381 M->m[1][0] * M->m[3][2] * M->m[2][3] + 2382 M->m[1][2] * M->m[2][0] * M->m[3][3] - 2383 M->m[1][2] * M->m[3][0] * M->m[2][3] - 2384 M->m[1][3] * M->m[2][0] * M->m[3][2] + 2385 M->m[1][3] * M->m[3][0] * M->m[2][2]; 2386 2387 D->m[1][1] = 2388 M->m[0][0] * M->m[2][2] * M->m[3][3] - 2389 M->m[0][0] * M->m[3][2] * M->m[2][3] - 2390 M->m[0][2] * M->m[2][0] * M->m[3][3] + 2391 M->m[0][2] * M->m[3][0] * M->m[2][3] + 2392 M->m[0][3] * M->m[2][0] * M->m[3][2] - 2393 M->m[0][3] * M->m[3][0] * M->m[2][2]; 2394 2395 D->m[1][2] = 2396 -M->m[0][0] * M->m[1][2] * M->m[3][3] + 2397 M->m[0][0] * M->m[3][2] * M->m[1][3] + 2398 M->m[0][2] * M->m[1][0] * M->m[3][3] - 2399 M->m[0][2] * M->m[3][0] * M->m[1][3] - 2400 M->m[0][3] * M->m[1][0] * M->m[3][2] + 2401 M->m[0][3] * M->m[3][0] * M->m[1][2]; 2402 2403 D->m[1][3] = 2404 M->m[0][0] * M->m[1][2] * M->m[2][3] - 2405 M->m[0][0] * M->m[2][2] * M->m[1][3] - 2406 M->m[0][2] * M->m[1][0] * M->m[2][3] + 2407 M->m[0][2] * M->m[2][0] * M->m[1][3] + 2408 M->m[0][3] * M->m[1][0] * M->m[2][2] - 2409 M->m[0][3] * M->m[2][0] * M->m[1][2]; 2410 2411 D->m[2][0] = 2412 M->m[1][0] * M->m[2][1] * M->m[3][3] - 2413 M->m[1][0] * M->m[3][1] * M->m[2][3] - 2414 M->m[1][1] * M->m[2][0] * M->m[3][3] + 2415 M->m[1][1] * M->m[3][0] * M->m[2][3] + 2416 M->m[1][3] * M->m[2][0] * M->m[3][1] - 2417 M->m[1][3] * M->m[3][0] * M->m[2][1]; 2418 2419 D->m[2][1] = 2420 -M->m[0][0] * M->m[2][1] * M->m[3][3] + 2421 M->m[0][0] * M->m[3][1] * M->m[2][3] + 2422 M->m[0][1] * M->m[2][0] * M->m[3][3] - 2423 M->m[0][1] * M->m[3][0] * M->m[2][3] - 2424 M->m[0][3] * M->m[2][0] * M->m[3][1] + 2425 M->m[0][3] * M->m[3][0] * M->m[2][1]; 2426 2427 D->m[2][2] = 2428 M->m[0][0] * M->m[1][1] * M->m[3][3] - 2429 M->m[0][0] * M->m[3][1] * M->m[1][3] - 2430 M->m[0][1] * M->m[1][0] * M->m[3][3] + 2431 M->m[0][1] * M->m[3][0] * M->m[1][3] + 2432 M->m[0][3] * M->m[1][0] * M->m[3][1] - 2433 M->m[0][3] * M->m[3][0] * M->m[1][1]; 2434 2435 D->m[2][3] = 2436 -M->m[0][0] * M->m[1][1] * M->m[2][3] + 2437 M->m[0][0] * M->m[2][1] * M->m[1][3] + 2438 M->m[0][1] * M->m[1][0] * M->m[2][3] - 2439 M->m[0][1] * M->m[2][0] * M->m[1][3] - 2440 M->m[0][3] * M->m[1][0] * M->m[2][1] + 2441 M->m[0][3] * M->m[2][0] * M->m[1][1]; 2442 2443 D->m[3][0] = 2444 -M->m[1][0] * M->m[2][1] * M->m[3][2] + 2445 M->m[1][0] * M->m[3][1] * M->m[2][2] + 2446 M->m[1][1] * M->m[2][0] * M->m[3][2] - 2447 M->m[1][1] * M->m[3][0] * M->m[2][2] - 2448 M->m[1][2] * M->m[2][0] * M->m[3][1] + 2449 M->m[1][2] * M->m[3][0] * M->m[2][1]; 2450 2451 D->m[3][1] = 2452 M->m[0][0] * M->m[2][1] * M->m[3][2] - 2453 M->m[0][0] * M->m[3][1] * M->m[2][2] - 2454 M->m[0][1] * M->m[2][0] * M->m[3][2] + 2455 M->m[0][1] * M->m[3][0] * M->m[2][2] + 2456 M->m[0][2] * M->m[2][0] * M->m[3][1] - 2457 M->m[0][2] * M->m[3][0] * M->m[2][1]; 2458 2459 D->m[3][2] = 2460 -M->m[0][0] * M->m[1][1] * M->m[3][2] + 2461 M->m[0][0] * M->m[3][1] * M->m[1][2] + 2462 M->m[0][1] * M->m[1][0] * M->m[3][2] - 2463 M->m[0][1] * M->m[3][0] * M->m[1][2] - 2464 M->m[0][2] * M->m[1][0] * M->m[3][1] + 2465 M->m[0][2] * M->m[3][0] * M->m[1][1]; 2466 2467 D->m[3][3] = 2468 M->m[0][0] * M->m[1][1] * M->m[2][2] - 2469 M->m[0][0] * M->m[2][1] * M->m[1][2] - 2470 M->m[0][1] * M->m[1][0] * M->m[2][2] + 2471 M->m[0][1] * M->m[2][0] * M->m[1][2] + 2472 M->m[0][2] * M->m[1][0] * M->m[2][1] - 2473 M->m[0][2] * M->m[2][0] * M->m[1][1]; 2474 2475 det = 2476 M->m[0][0] * D->m[0][0] + 2477 M->m[1][0] * D->m[0][1] + 2478 M->m[2][0] * D->m[0][2] + 2479 M->m[3][0] * D->m[0][3]; 2480 2481 if (fabsf(det) < 1e-30) {/* non inversible */ 2482 *D = *M; /* wine tests */ 2483 return; 2484 } 2485 2486 det = 1.0 / det; 2487 2488 for (i = 0; i < 4; i++) 2489 for (k = 0; k < 4; k++) 2490 D->m[i][k] *= det; 2491 2492 #ifdef DEBUG 2493 { 2494 D3DMATRIX I; 2495 2496 nine_d3d_matrix_matrix_mul(&I, D, M); 2497 2498 for (i = 0; i < 4; ++i) 2499 for (k = 0; k < 4; ++k) 2500 if (fabsf(I.m[i][k] - (float)(i == k)) > 1e-3) 2501 DBG("Matrix inversion check FAILED !\n"); 2502 } 2503 #endif 2504 } 2505