1 /********************************************************** 2 * Copyright 2008-2012 VMware, Inc. All rights reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person 5 * obtaining a copy of this software and associated documentation 6 * files (the "Software"), to deal in the Software without 7 * restriction, including without limitation the rights to use, copy, 8 * modify, merge, publish, distribute, sublicense, and/or sell copies 9 * of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 **********************************************************/ 25 26 #include "util/u_bitmask.h" 27 #include "util/u_memory.h" 28 #include "util/u_format.h" 29 #include "svga_context.h" 30 #include "svga_cmd.h" 31 #include "svga_format.h" 32 #include "svga_shader.h" 33 #include "svga_resource_texture.h" 34 35 36 /** 37 * This bit isn't really used anywhere. It only serves to help 38 * generate a unique "signature" for the vertex shader output bitmask. 39 * Shader input/output signatures are used to resolve shader linking 40 * issues. 41 */ 42 #define FOG_GENERIC_BIT (((uint64_t) 1) << 63) 43 44 45 /** 46 * Use the shader info to generate a bitmask indicating which generic 47 * inputs are used by the shader. A set bit indicates that GENERIC[i] 48 * is used. 49 */ 50 uint64_t 51 svga_get_generic_inputs_mask(const struct tgsi_shader_info *info) 52 { 53 unsigned i; 54 uint64_t mask = 0x0; 55 56 for (i = 0; i < info->num_inputs; i++) { 57 if (info->input_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { 58 unsigned j = info->input_semantic_index[i]; 59 assert(j < sizeof(mask) * 8); 60 mask |= ((uint64_t) 1) << j; 61 } 62 } 63 64 return mask; 65 } 66 67 68 /** 69 * Scan shader info to return a bitmask of written outputs. 70 */ 71 uint64_t 72 svga_get_generic_outputs_mask(const struct tgsi_shader_info *info) 73 { 74 unsigned i; 75 uint64_t mask = 0x0; 76 77 for (i = 0; i < info->num_outputs; i++) { 78 switch (info->output_semantic_name[i]) { 79 case TGSI_SEMANTIC_GENERIC: 80 { 81 unsigned j = info->output_semantic_index[i]; 82 assert(j < sizeof(mask) * 8); 83 mask |= ((uint64_t) 1) << j; 84 } 85 break; 86 case TGSI_SEMANTIC_FOG: 87 mask |= FOG_GENERIC_BIT; 88 break; 89 } 90 } 91 92 return mask; 93 } 94 95 96 97 /** 98 * Given a mask of used generic variables (as returned by the above functions) 99 * fill in a table which maps those indexes to small integers. 100 * This table is used by the remap_generic_index() function in 101 * svga_tgsi_decl_sm30.c 102 * Example: if generics_mask = binary(1010) it means that GENERIC[1] and 103 * GENERIC[3] are used. The remap_table will contain: 104 * table[1] = 0; 105 * table[3] = 1; 106 * The remaining table entries will be filled in with the next unused 107 * generic index (in this example, 2). 108 */ 109 void 110 svga_remap_generics(uint64_t generics_mask, 111 int8_t remap_table[MAX_GENERIC_VARYING]) 112 { 113 /* Note texcoord[0] is reserved so start at 1 */ 114 unsigned count = 1, i; 115 116 for (i = 0; i < MAX_GENERIC_VARYING; i++) { 117 remap_table[i] = -1; 118 } 119 120 /* for each bit set in generic_mask */ 121 while (generics_mask) { 122 unsigned index = ffsll(generics_mask) - 1; 123 remap_table[index] = count++; 124 generics_mask &= ~((uint64_t) 1 << index); 125 } 126 } 127 128 129 /** 130 * Use the generic remap table to map a TGSI generic varying variable 131 * index to a small integer. If the remapping table doesn't have a 132 * valid value for the given index (the table entry is -1) it means 133 * the fragment shader doesn't use that VS output. Just allocate 134 * the next free value in that case. Alternately, we could cull 135 * VS instructions that write to register, or replace the register 136 * with a dummy temp register. 137 * XXX TODO: we should do one of the later as it would save precious 138 * texcoord registers. 139 */ 140 int 141 svga_remap_generic_index(int8_t remap_table[MAX_GENERIC_VARYING], 142 int generic_index) 143 { 144 assert(generic_index < MAX_GENERIC_VARYING); 145 146 if (generic_index >= MAX_GENERIC_VARYING) { 147 /* just don't return a random/garbage value */ 148 generic_index = MAX_GENERIC_VARYING - 1; 149 } 150 151 if (remap_table[generic_index] == -1) { 152 /* This is a VS output that has no matching PS input. Find a 153 * free index. 154 */ 155 int i, max = 0; 156 for (i = 0; i < MAX_GENERIC_VARYING; i++) { 157 max = MAX2(max, remap_table[i]); 158 } 159 remap_table[generic_index] = max + 1; 160 } 161 162 return remap_table[generic_index]; 163 } 164 165 static const enum pipe_swizzle copy_alpha[PIPE_SWIZZLE_MAX] = { 166 PIPE_SWIZZLE_X, 167 PIPE_SWIZZLE_Y, 168 PIPE_SWIZZLE_Z, 169 PIPE_SWIZZLE_W, 170 PIPE_SWIZZLE_0, 171 PIPE_SWIZZLE_1, 172 PIPE_SWIZZLE_NONE 173 }; 174 175 static const enum pipe_swizzle set_alpha[PIPE_SWIZZLE_MAX] = { 176 PIPE_SWIZZLE_X, 177 PIPE_SWIZZLE_Y, 178 PIPE_SWIZZLE_Z, 179 PIPE_SWIZZLE_1, 180 PIPE_SWIZZLE_0, 181 PIPE_SWIZZLE_1, 182 PIPE_SWIZZLE_NONE 183 }; 184 185 /** 186 * Initialize the shader-neutral fields of svga_compile_key from context 187 * state. This is basically the texture-related state. 188 */ 189 void 190 svga_init_shader_key_common(const struct svga_context *svga, 191 enum pipe_shader_type shader, 192 struct svga_compile_key *key) 193 { 194 unsigned i, idx = 0; 195 196 assert(shader < ARRAY_SIZE(svga->curr.num_sampler_views)); 197 198 /* In case the number of samplers and sampler_views doesn't match, 199 * loop over the lower of the two counts. 200 */ 201 key->num_textures = MAX2(svga->curr.num_sampler_views[shader], 202 svga->curr.num_samplers[shader]); 203 204 for (i = 0; i < key->num_textures; i++) { 205 struct pipe_sampler_view *view = svga->curr.sampler_views[shader][i]; 206 const struct svga_sampler_state *sampler = svga->curr.sampler[shader][i]; 207 if (view) { 208 assert(view->texture); 209 assert(view->texture->target < (1 << 4)); /* texture_target:4 */ 210 211 /* 1D/2D array textures with one slice are treated as non-arrays 212 * by the SVGA3D device. Convert the texture type here so that 213 * we emit the right TEX/SAMPLE instruction in the shader. 214 */ 215 if (view->texture->target == PIPE_TEXTURE_1D_ARRAY || 216 view->texture->target == PIPE_TEXTURE_2D_ARRAY) { 217 if (view->texture->array_size == 1) { 218 key->tex[i].is_array = 0; 219 } 220 else { 221 assert(view->texture->array_size > 1); 222 key->tex[i].is_array = 1; 223 } 224 } 225 226 /* If we have a non-alpha view into an svga3d surface with an 227 * alpha channel, then explicitly set the alpha channel to 1 228 * when sampling. Note that we need to check the 229 * actual device format to cover also imported surface cases. 230 */ 231 const enum pipe_swizzle *swizzle_tab = 232 (view->texture->target != PIPE_BUFFER && 233 !util_format_has_alpha(view->format) && 234 svga_texture_device_format_has_alpha(view->texture)) ? 235 set_alpha : copy_alpha; 236 237 key->tex[i].swizzle_r = swizzle_tab[view->swizzle_r]; 238 key->tex[i].swizzle_g = swizzle_tab[view->swizzle_g]; 239 key->tex[i].swizzle_b = swizzle_tab[view->swizzle_b]; 240 key->tex[i].swizzle_a = swizzle_tab[view->swizzle_a]; 241 } 242 243 if (sampler) { 244 if (!sampler->normalized_coords) { 245 assert(idx < (1 << 5)); /* width_height_idx:5 bitfield */ 246 key->tex[i].width_height_idx = idx++; 247 key->tex[i].unnormalized = TRUE; 248 ++key->num_unnormalized_coords; 249 250 if (sampler->magfilter == SVGA3D_TEX_FILTER_NEAREST || 251 sampler->minfilter == SVGA3D_TEX_FILTER_NEAREST) { 252 key->tex[i].texel_bias = TRUE; 253 } 254 } 255 } 256 } 257 } 258 259 260 /** Search for a compiled shader variant with the same compile key */ 261 struct svga_shader_variant * 262 svga_search_shader_key(const struct svga_shader *shader, 263 const struct svga_compile_key *key) 264 { 265 struct svga_shader_variant *variant = shader->variants; 266 267 assert(key); 268 269 for ( ; variant; variant = variant->next) { 270 if (svga_compile_keys_equal(key, &variant->key)) 271 return variant; 272 } 273 return NULL; 274 } 275 276 /** Search for a shader with the same token key */ 277 struct svga_shader * 278 svga_search_shader_token_key(struct svga_shader *pshader, 279 const struct svga_token_key *key) 280 { 281 struct svga_shader *shader = pshader; 282 283 assert(key); 284 285 for ( ; shader; shader = shader->next) { 286 if (memcmp(key, &shader->token_key, sizeof(struct svga_token_key)) == 0) 287 return shader; 288 } 289 return NULL; 290 } 291 292 /** 293 * Helper function to define a gb shader for non-vgpu10 device 294 */ 295 static enum pipe_error 296 define_gb_shader_vgpu9(struct svga_context *svga, 297 SVGA3dShaderType type, 298 struct svga_shader_variant *variant, 299 unsigned codeLen) 300 { 301 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; 302 enum pipe_error ret; 303 304 /** 305 * Create gb memory for the shader and upload the shader code. 306 * Kernel module will allocate an id for the shader and issue 307 * the DefineGBShader command. 308 */ 309 variant->gb_shader = sws->shader_create(sws, type, 310 variant->tokens, codeLen); 311 312 if (!variant->gb_shader) 313 return PIPE_ERROR_OUT_OF_MEMORY; 314 315 ret = SVGA3D_BindGBShader(svga->swc, variant->gb_shader); 316 317 return ret; 318 } 319 320 /** 321 * Helper function to define a gb shader for vgpu10 device 322 */ 323 static enum pipe_error 324 define_gb_shader_vgpu10(struct svga_context *svga, 325 SVGA3dShaderType type, 326 struct svga_shader_variant *variant, 327 unsigned codeLen) 328 { 329 struct svga_winsys_context *swc = svga->swc; 330 enum pipe_error ret; 331 332 /** 333 * Shaders in VGPU10 enabled device reside in the device COTable. 334 * SVGA driver will allocate an integer ID for the shader and 335 * issue DXDefineShader and DXBindShader commands. 336 */ 337 variant->id = util_bitmask_add(svga->shader_id_bm); 338 if (variant->id == UTIL_BITMASK_INVALID_INDEX) { 339 return PIPE_ERROR_OUT_OF_MEMORY; 340 } 341 342 /* Create gb memory for the shader and upload the shader code */ 343 variant->gb_shader = swc->shader_create(swc, 344 variant->id, type, 345 variant->tokens, codeLen); 346 347 if (!variant->gb_shader) { 348 /* Free the shader ID */ 349 assert(variant->id != UTIL_BITMASK_INVALID_INDEX); 350 goto fail_no_allocation; 351 } 352 353 /** 354 * Since we don't want to do any flush within state emission to avoid 355 * partial state in a command buffer, it's important to make sure that 356 * there is enough room to send both the DXDefineShader & DXBindShader 357 * commands in the same command buffer. So let's send both 358 * commands in one command reservation. If it fails, we'll undo 359 * the shader creation and return an error. 360 */ 361 ret = SVGA3D_vgpu10_DefineAndBindShader(swc, variant->gb_shader, 362 variant->id, type, codeLen); 363 364 if (ret != PIPE_OK) 365 goto fail; 366 367 return PIPE_OK; 368 369 fail: 370 swc->shader_destroy(swc, variant->gb_shader); 371 variant->gb_shader = NULL; 372 373 fail_no_allocation: 374 util_bitmask_clear(svga->shader_id_bm, variant->id); 375 variant->id = UTIL_BITMASK_INVALID_INDEX; 376 377 return PIPE_ERROR_OUT_OF_MEMORY; 378 } 379 380 /** 381 * Issue the SVGA3D commands to define a new shader. 382 * \param variant contains the shader tokens, etc. The result->id field will 383 * be set here. 384 */ 385 enum pipe_error 386 svga_define_shader(struct svga_context *svga, 387 SVGA3dShaderType type, 388 struct svga_shader_variant *variant) 389 { 390 unsigned codeLen = variant->nr_tokens * sizeof(variant->tokens[0]); 391 enum pipe_error ret; 392 393 SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_DEFINESHADER); 394 395 variant->id = UTIL_BITMASK_INVALID_INDEX; 396 397 if (svga_have_gb_objects(svga)) { 398 if (svga_have_vgpu10(svga)) 399 ret = define_gb_shader_vgpu10(svga, type, variant, codeLen); 400 else 401 ret = define_gb_shader_vgpu9(svga, type, variant, codeLen); 402 } 403 else { 404 /* Allocate an integer ID for the shader */ 405 variant->id = util_bitmask_add(svga->shader_id_bm); 406 if (variant->id == UTIL_BITMASK_INVALID_INDEX) { 407 ret = PIPE_ERROR_OUT_OF_MEMORY; 408 goto done; 409 } 410 411 /* Issue SVGA3D device command to define the shader */ 412 ret = SVGA3D_DefineShader(svga->swc, 413 variant->id, 414 type, 415 variant->tokens, 416 codeLen); 417 if (ret != PIPE_OK) { 418 /* free the ID */ 419 assert(variant->id != UTIL_BITMASK_INVALID_INDEX); 420 util_bitmask_clear(svga->shader_id_bm, variant->id); 421 variant->id = UTIL_BITMASK_INVALID_INDEX; 422 } 423 } 424 425 done: 426 SVGA_STATS_TIME_POP(svga_sws(svga)); 427 return ret; 428 } 429 430 431 /** 432 * Issue the SVGA3D commands to set/bind a shader. 433 * \param result the shader to bind. 434 */ 435 enum pipe_error 436 svga_set_shader(struct svga_context *svga, 437 SVGA3dShaderType type, 438 struct svga_shader_variant *variant) 439 { 440 enum pipe_error ret; 441 unsigned id = variant ? variant->id : SVGA3D_INVALID_ID; 442 443 assert(type == SVGA3D_SHADERTYPE_VS || 444 type == SVGA3D_SHADERTYPE_GS || 445 type == SVGA3D_SHADERTYPE_PS); 446 447 if (svga_have_gb_objects(svga)) { 448 struct svga_winsys_gb_shader *gbshader = 449 variant ? variant->gb_shader : NULL; 450 451 if (svga_have_vgpu10(svga)) 452 ret = SVGA3D_vgpu10_SetShader(svga->swc, type, gbshader, id); 453 else 454 ret = SVGA3D_SetGBShader(svga->swc, type, gbshader); 455 } 456 else { 457 ret = SVGA3D_SetShader(svga->swc, type, id); 458 } 459 460 return ret; 461 } 462 463 464 struct svga_shader_variant * 465 svga_new_shader_variant(struct svga_context *svga) 466 { 467 svga->hud.num_shaders++; 468 return CALLOC_STRUCT(svga_shader_variant); 469 } 470 471 472 enum pipe_error 473 svga_destroy_shader_variant(struct svga_context *svga, 474 SVGA3dShaderType type, 475 struct svga_shader_variant *variant) 476 { 477 enum pipe_error ret = PIPE_OK; 478 479 if (svga_have_gb_objects(svga) && variant->gb_shader) { 480 if (svga_have_vgpu10(svga)) { 481 struct svga_winsys_context *swc = svga->swc; 482 swc->shader_destroy(swc, variant->gb_shader); 483 ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id); 484 if (ret != PIPE_OK) { 485 /* flush and try again */ 486 svga_context_flush(svga, NULL); 487 ret = SVGA3D_vgpu10_DestroyShader(svga->swc, variant->id); 488 } 489 util_bitmask_clear(svga->shader_id_bm, variant->id); 490 } 491 else { 492 struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws; 493 sws->shader_destroy(sws, variant->gb_shader); 494 } 495 variant->gb_shader = NULL; 496 } 497 else { 498 if (variant->id != UTIL_BITMASK_INVALID_INDEX) { 499 ret = SVGA3D_DestroyShader(svga->swc, variant->id, type); 500 if (ret != PIPE_OK) { 501 /* flush and try again */ 502 svga_context_flush(svga, NULL); 503 ret = SVGA3D_DestroyShader(svga->swc, variant->id, type); 504 assert(ret == PIPE_OK); 505 } 506 util_bitmask_clear(svga->shader_id_bm, variant->id); 507 } 508 } 509 510 FREE((unsigned *)variant->tokens); 511 FREE(variant); 512 513 svga->hud.num_shaders--; 514 515 return ret; 516 } 517 518 /* 519 * Rebind shaders. 520 * Called at the beginning of every new command buffer to ensure that 521 * shaders are properly paged-in. Instead of sending the SetShader 522 * command, this function sends a private allocation command to 523 * page in a shader. This avoids emitting redundant state to the device 524 * just to page in a resource. 525 */ 526 enum pipe_error 527 svga_rebind_shaders(struct svga_context *svga) 528 { 529 struct svga_winsys_context *swc = svga->swc; 530 struct svga_hw_draw_state *hw = &svga->state.hw_draw; 531 enum pipe_error ret; 532 533 assert(svga_have_vgpu10(svga)); 534 535 /** 536 * If the underlying winsys layer does not need resource rebinding, 537 * just clear the rebind flags and return. 538 */ 539 if (swc->resource_rebind == NULL) { 540 svga->rebind.flags.vs = 0; 541 svga->rebind.flags.gs = 0; 542 svga->rebind.flags.fs = 0; 543 544 return PIPE_OK; 545 } 546 547 if (svga->rebind.flags.vs && hw->vs && hw->vs->gb_shader) { 548 ret = swc->resource_rebind(swc, NULL, hw->vs->gb_shader, SVGA_RELOC_READ); 549 if (ret != PIPE_OK) 550 return ret; 551 } 552 svga->rebind.flags.vs = 0; 553 554 if (svga->rebind.flags.gs && hw->gs && hw->gs->gb_shader) { 555 ret = swc->resource_rebind(swc, NULL, hw->gs->gb_shader, SVGA_RELOC_READ); 556 if (ret != PIPE_OK) 557 return ret; 558 } 559 svga->rebind.flags.gs = 0; 560 561 if (svga->rebind.flags.fs && hw->fs && hw->fs->gb_shader) { 562 ret = swc->resource_rebind(swc, NULL, hw->fs->gb_shader, SVGA_RELOC_READ); 563 if (ret != PIPE_OK) 564 return ret; 565 } 566 svga->rebind.flags.fs = 0; 567 568 return PIPE_OK; 569 } 570