1 /********************************************************** 2 * Copyright 2008-2009 VMware, Inc. All rights reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person 5 * obtaining a copy of this software and associated documentation 6 * files (the "Software"), to deal in the Software without 7 * restriction, including without limitation the rights to use, copy, 8 * modify, merge, publish, distribute, sublicense, and/or sell copies 9 * of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 **********************************************************/ 25 26 #include "util/u_inlines.h" 27 #include "pipe/p_defines.h" 28 #include "util/u_math.h" 29 #include "util/u_memory.h" 30 #include "util/u_bitmask.h" 31 #include "tgsi/tgsi_ureg.h" 32 33 #include "svga_context.h" 34 #include "svga_state.h" 35 #include "svga_cmd.h" 36 #include "svga_shader.h" 37 #include "svga_resource_texture.h" 38 #include "svga_tgsi.h" 39 #include "svga_format.h" 40 41 #include "svga_hw_reg.h" 42 43 44 45 /** 46 * If we fail to compile a fragment shader (because it uses too many 47 * registers, for example) we'll use a dummy/fallback shader that 48 * simply emits a constant color (red for debug, black for release). 49 * We hit this with the Unigine/Heaven demo when Shaders = High. 50 * With black, the demo still looks good. 51 */ 52 static const struct tgsi_token * 53 get_dummy_fragment_shader(void) 54 { 55 #ifdef DEBUG 56 static const float color[4] = { 1.0, 0.0, 0.0, 0.0 }; /* red */ 57 #else 58 static const float color[4] = { 0.0, 0.0, 0.0, 0.0 }; /* black */ 59 #endif 60 struct ureg_program *ureg; 61 const struct tgsi_token *tokens; 62 struct ureg_src src; 63 struct ureg_dst dst; 64 unsigned num_tokens; 65 66 ureg = ureg_create(PIPE_SHADER_FRAGMENT); 67 if (!ureg) 68 return NULL; 69 70 dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); 71 src = ureg_DECL_immediate(ureg, color, 4); 72 ureg_MOV(ureg, dst, src); 73 ureg_END(ureg); 74 75 tokens = ureg_get_tokens(ureg, &num_tokens); 76 77 ureg_destroy(ureg); 78 79 return tokens; 80 } 81 82 83 static struct svga_shader_variant * 84 translate_fragment_program(struct svga_context *svga, 85 const struct svga_fragment_shader *fs, 86 const struct svga_compile_key *key) 87 { 88 if (svga_have_vgpu10(svga)) { 89 return svga_tgsi_vgpu10_translate(svga, &fs->base, key, 90 PIPE_SHADER_FRAGMENT); 91 } 92 else { 93 return svga_tgsi_vgpu9_translate(svga, &fs->base, key, 94 PIPE_SHADER_FRAGMENT); 95 } 96 } 97 98 99 /** 100 * Replace the given shader's instruction with a simple constant-color 101 * shader. We use this when normal shader translation fails. 102 */ 103 static struct svga_shader_variant * 104 get_compiled_dummy_shader(struct svga_context *svga, 105 struct svga_fragment_shader *fs, 106 const struct svga_compile_key *key) 107 { 108 const struct tgsi_token *dummy = get_dummy_fragment_shader(); 109 struct svga_shader_variant *variant; 110 111 if (!dummy) { 112 return NULL; 113 } 114 115 FREE((void *) fs->base.tokens); 116 fs->base.tokens = dummy; 117 118 variant = translate_fragment_program(svga, fs, key); 119 return variant; 120 } 121 122 123 /** 124 * Translate TGSI shader into an svga shader variant. 125 */ 126 static enum pipe_error 127 compile_fs(struct svga_context *svga, 128 struct svga_fragment_shader *fs, 129 const struct svga_compile_key *key, 130 struct svga_shader_variant **out_variant) 131 { 132 struct svga_shader_variant *variant; 133 enum pipe_error ret = PIPE_ERROR; 134 135 variant = translate_fragment_program(svga, fs, key); 136 if (variant == NULL) { 137 debug_printf("Failed to compile fragment shader," 138 " using dummy shader instead.\n"); 139 variant = get_compiled_dummy_shader(svga, fs, key); 140 } 141 else if (svga_shader_too_large(svga, variant)) { 142 /* too big, use dummy shader */ 143 debug_printf("Shader too large (%u bytes)," 144 " using dummy shader instead.\n", 145 (unsigned) (variant->nr_tokens 146 * sizeof(variant->tokens[0]))); 147 /* Free the too-large variant */ 148 svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant); 149 /* Use simple pass-through shader instead */ 150 variant = get_compiled_dummy_shader(svga, fs, key); 151 } 152 153 if (!variant) { 154 return PIPE_ERROR; 155 } 156 157 ret = svga_define_shader(svga, SVGA3D_SHADERTYPE_PS, variant); 158 if (ret != PIPE_OK) { 159 svga_destroy_shader_variant(svga, SVGA3D_SHADERTYPE_PS, variant); 160 return ret; 161 } 162 163 *out_variant = variant; 164 165 /* insert variant at head of linked list */ 166 variant->next = fs->base.variants; 167 fs->base.variants = variant; 168 169 return PIPE_OK; 170 } 171 172 173 /* SVGA_NEW_TEXTURE_BINDING 174 * SVGA_NEW_RAST 175 * SVGA_NEW_NEED_SWTNL 176 * SVGA_NEW_SAMPLER 177 */ 178 static enum pipe_error 179 make_fs_key(const struct svga_context *svga, 180 struct svga_fragment_shader *fs, 181 struct svga_compile_key *key) 182 { 183 const enum pipe_shader_type shader = PIPE_SHADER_FRAGMENT; 184 unsigned i; 185 186 memset(key, 0, sizeof *key); 187 188 memcpy(key->generic_remap_table, fs->generic_remap_table, 189 sizeof(fs->generic_remap_table)); 190 191 /* SVGA_NEW_GS, SVGA_NEW_VS 192 */ 193 if (svga->curr.gs) { 194 key->fs.gs_generic_outputs = svga->curr.gs->generic_outputs; 195 } else { 196 key->fs.vs_generic_outputs = svga->curr.vs->generic_outputs; 197 } 198 199 /* Only need fragment shader fixup for twoside lighting if doing 200 * hwtnl. Otherwise the draw module does the whole job for us. 201 * 202 * SVGA_NEW_SWTNL 203 */ 204 if (!svga->state.sw.need_swtnl) { 205 /* SVGA_NEW_RAST, SVGA_NEW_REDUCED_PRIMITIVE 206 */ 207 key->fs.light_twoside = svga->curr.rast->templ.light_twoside; 208 key->fs.front_ccw = svga->curr.rast->templ.front_ccw; 209 key->fs.pstipple = (svga->curr.rast->templ.poly_stipple_enable && 210 svga->curr.reduced_prim == PIPE_PRIM_TRIANGLES); 211 key->fs.aa_point = (svga->curr.rast->templ.point_smooth && 212 svga->curr.reduced_prim == PIPE_PRIM_POINTS && 213 (svga->curr.rast->pointsize > 1.0 || 214 svga->curr.vs->base.info.writes_psize)); 215 if (key->fs.aa_point) { 216 assert(svga->curr.gs != NULL); 217 assert(svga->curr.gs->aa_point_coord_index != -1); 218 key->fs.aa_point_coord_index = svga->curr.gs->aa_point_coord_index; 219 } 220 } 221 222 /* The blend workaround for simulating logicop xor behaviour 223 * requires that the incoming fragment color be white. This change 224 * achieves that by creating a variant of the current fragment 225 * shader that overrides all output colors with 1,1,1,1 226 * 227 * This will work for most shaders, including those containing 228 * TEXKIL and/or depth-write. However, it will break on the 229 * combination of xor-logicop plus alphatest. 230 * 231 * Ultimately, we could implement alphatest in the shader using 232 * texkil prior to overriding the outgoing fragment color. 233 * 234 * SVGA_NEW_BLEND 235 */ 236 if (svga->curr.blend->need_white_fragments) { 237 key->fs.white_fragments = 1; 238 } 239 240 #ifdef DEBUG 241 /* 242 * We expect a consistent set of samplers and sampler views. 243 * Do some debug checks/warnings here. 244 */ 245 { 246 static boolean warned = FALSE; 247 unsigned i, n = MAX2(svga->curr.num_sampler_views[shader], 248 svga->curr.num_samplers[shader]); 249 /* Only warn once to prevent too much debug output */ 250 if (!warned) { 251 if (svga->curr.num_sampler_views[shader] != 252 svga->curr.num_samplers[shader]) { 253 debug_printf("svga: mismatched number of sampler views (%u) " 254 "vs. samplers (%u)\n", 255 svga->curr.num_sampler_views[shader], 256 svga->curr.num_samplers[shader]); 257 } 258 for (i = 0; i < n; i++) { 259 if ((svga->curr.sampler_views[shader][i] == NULL) != 260 (svga->curr.sampler[shader][i] == NULL)) 261 debug_printf("sampler_view[%u] = %p but sampler[%u] = %p\n", 262 i, svga->curr.sampler_views[shader][i], 263 i, svga->curr.sampler[shader][i]); 264 } 265 warned = TRUE; 266 } 267 } 268 #endif 269 270 /* XXX: want to limit this to the textures that the shader actually 271 * refers to. 272 * 273 * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER 274 */ 275 svga_init_shader_key_common(svga, shader, key); 276 277 for (i = 0; i < svga->curr.num_samplers[shader]; ++i) { 278 struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i]; 279 const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i]; 280 if (view) { 281 struct pipe_resource *tex = view->texture; 282 if (tex->target != PIPE_BUFFER) { 283 struct svga_texture *stex = svga_texture(tex); 284 SVGA3dSurfaceFormat format = stex->key.format; 285 286 if (!svga_have_vgpu10(svga) && 287 (format == SVGA3D_Z_D16 || 288 format == SVGA3D_Z_D24X8 || 289 format == SVGA3D_Z_D24S8)) { 290 /* If we're sampling from a SVGA3D_Z_D16, SVGA3D_Z_D24X8, 291 * or SVGA3D_Z_D24S8 surface, we'll automatically get 292 * shadow comparison. But we only get LEQUAL mode. 293 * Set TEX_COMPARE_NONE here so we don't emit the extra FS 294 * code for shadow comparison. 295 */ 296 key->tex[i].compare_mode = PIPE_TEX_COMPARE_NONE; 297 key->tex[i].compare_func = PIPE_FUNC_NEVER; 298 /* These depth formats _only_ support comparison mode and 299 * not ordinary sampling so warn if the later is expected. 300 */ 301 if (sampler->compare_mode != PIPE_TEX_COMPARE_R_TO_TEXTURE) { 302 debug_warn_once("Unsupported shadow compare mode"); 303 } 304 /* The shader translation code can emit code to 305 * handle ALWAYS and NEVER compare functions 306 */ 307 else if (sampler->compare_func == PIPE_FUNC_ALWAYS || 308 sampler->compare_func == PIPE_FUNC_NEVER) { 309 key->tex[i].compare_mode = sampler->compare_mode; 310 key->tex[i].compare_func = sampler->compare_func; 311 } 312 else if (sampler->compare_func != PIPE_FUNC_LEQUAL) { 313 debug_warn_once("Unsupported shadow compare function"); 314 } 315 } 316 else { 317 /* For other texture formats, just use the compare func/mode 318 * as-is. Should be no-ops for color textures. For depth 319 * textures, we do not get automatic depth compare. We have 320 * to do it ourselves in the shader. And we don't get PCF. 321 */ 322 key->tex[i].compare_mode = sampler->compare_mode; 323 key->tex[i].compare_func = sampler->compare_func; 324 } 325 } 326 } 327 } 328 329 /* sprite coord gen state */ 330 for (i = 0; i < svga->curr.num_samplers[shader]; ++i) { 331 key->tex[i].sprite_texgen = 332 svga->curr.rast->templ.sprite_coord_enable & (1 << i); 333 } 334 335 key->sprite_origin_lower_left = (svga->curr.rast->templ.sprite_coord_mode 336 == PIPE_SPRITE_COORD_LOWER_LEFT); 337 338 key->fs.flatshade = svga->curr.rast->templ.flatshade; 339 340 /* SVGA_NEW_DEPTH_STENCIL_ALPHA */ 341 if (svga_have_vgpu10(svga)) { 342 /* Alpha testing is not supported in integer-valued render targets. */ 343 if (svga_has_any_integer_cbufs(svga)) { 344 key->fs.alpha_func = SVGA3D_CMP_ALWAYS; 345 key->fs.alpha_ref = 0; 346 } 347 else { 348 key->fs.alpha_func = svga->curr.depth->alphafunc; 349 key->fs.alpha_ref = svga->curr.depth->alpharef; 350 } 351 } 352 353 /* SVGA_NEW_FRAME_BUFFER */ 354 if (fs->base.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS]) { 355 /* Replicate color0 output to N colorbuffers */ 356 key->fs.write_color0_to_n_cbufs = svga->curr.framebuffer.nr_cbufs; 357 } 358 359 return PIPE_OK; 360 } 361 362 363 /** 364 * svga_reemit_fs_bindings - Reemit the fragment shader bindings 365 */ 366 enum pipe_error 367 svga_reemit_fs_bindings(struct svga_context *svga) 368 { 369 enum pipe_error ret; 370 371 assert(svga->rebind.flags.fs); 372 assert(svga_have_gb_objects(svga)); 373 374 if (!svga->state.hw_draw.fs) 375 return PIPE_OK; 376 377 if (!svga_need_to_rebind_resources(svga)) { 378 ret = svga->swc->resource_rebind(svga->swc, NULL, 379 svga->state.hw_draw.fs->gb_shader, 380 SVGA_RELOC_READ); 381 goto out; 382 } 383 384 if (svga_have_vgpu10(svga)) 385 ret = SVGA3D_vgpu10_SetShader(svga->swc, SVGA3D_SHADERTYPE_PS, 386 svga->state.hw_draw.fs->gb_shader, 387 svga->state.hw_draw.fs->id); 388 else 389 ret = SVGA3D_SetGBShader(svga->swc, SVGA3D_SHADERTYPE_PS, 390 svga->state.hw_draw.fs->gb_shader); 391 392 out: 393 if (ret != PIPE_OK) 394 return ret; 395 396 svga->rebind.flags.fs = FALSE; 397 return PIPE_OK; 398 } 399 400 401 402 static enum pipe_error 403 emit_hw_fs(struct svga_context *svga, unsigned dirty) 404 { 405 struct svga_shader_variant *variant = NULL; 406 enum pipe_error ret = PIPE_OK; 407 struct svga_fragment_shader *fs = svga->curr.fs; 408 struct svga_compile_key key; 409 410 SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_EMITFS); 411 412 /* SVGA_NEW_BLEND 413 * SVGA_NEW_TEXTURE_BINDING 414 * SVGA_NEW_RAST 415 * SVGA_NEW_NEED_SWTNL 416 * SVGA_NEW_SAMPLER 417 * SVGA_NEW_FRAME_BUFFER 418 * SVGA_NEW_DEPTH_STENCIL_ALPHA 419 * SVGA_NEW_VS 420 */ 421 ret = make_fs_key(svga, fs, &key); 422 if (ret != PIPE_OK) 423 goto done; 424 425 variant = svga_search_shader_key(&fs->base, &key); 426 if (!variant) { 427 ret = compile_fs(svga, fs, &key, &variant); 428 if (ret != PIPE_OK) 429 goto done; 430 } 431 432 assert(variant); 433 434 if (variant != svga->state.hw_draw.fs) { 435 ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_PS, variant); 436 if (ret != PIPE_OK) 437 goto done; 438 439 svga->rebind.flags.fs = FALSE; 440 441 svga->dirty |= SVGA_NEW_FS_VARIANT; 442 svga->state.hw_draw.fs = variant; 443 } 444 445 done: 446 SVGA_STATS_TIME_POP(svga_sws(svga)); 447 return ret; 448 } 449 450 struct svga_tracked_state svga_hw_fs = 451 { 452 "fragment shader (hwtnl)", 453 (SVGA_NEW_FS | 454 SVGA_NEW_GS | 455 SVGA_NEW_VS | 456 SVGA_NEW_TEXTURE_BINDING | 457 SVGA_NEW_NEED_SWTNL | 458 SVGA_NEW_RAST | 459 SVGA_NEW_STIPPLE | 460 SVGA_NEW_REDUCED_PRIMITIVE | 461 SVGA_NEW_SAMPLER | 462 SVGA_NEW_FRAME_BUFFER | 463 SVGA_NEW_DEPTH_STENCIL_ALPHA | 464 SVGA_NEW_BLEND), 465 emit_hw_fs 466 }; 467 468 469 470