1 /* 2 * Copyright 2012 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 /** 25 * \file link_varyings.cpp 26 * 27 * Linker functions related specifically to linking varyings between shader 28 * stages. 29 */ 30 31 32 #include "main/mtypes.h" 33 #include "glsl_symbol_table.h" 34 #include "glsl_parser_extras.h" 35 #include "ir_optimization.h" 36 #include "linker.h" 37 #include "link_varyings.h" 38 #include "main/macros.h" 39 #include "util/hash_table.h" 40 #include "program.h" 41 42 43 /** 44 * Get the varying type stripped of the outermost array if we're processing 45 * a stage whose varyings are arrays indexed by a vertex number (such as 46 * geometry shader inputs). 47 */ 48 static const glsl_type * 49 get_varying_type(const ir_variable *var, gl_shader_stage stage) 50 { 51 const glsl_type *type = var->type; 52 53 if (!var->data.patch && 54 ((var->data.mode == ir_var_shader_out && 55 stage == MESA_SHADER_TESS_CTRL) || 56 (var->data.mode == ir_var_shader_in && 57 (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL || 58 stage == MESA_SHADER_GEOMETRY)))) { 59 assert(type->is_array()); 60 type = type->fields.array; 61 } 62 63 return type; 64 } 65 66 static void 67 create_xfb_varying_names(void *mem_ctx, const glsl_type *t, char **name, 68 size_t name_length, unsigned *count, 69 const char *ifc_member_name, 70 const glsl_type *ifc_member_t, char ***varying_names) 71 { 72 if (t->is_interface()) { 73 size_t new_length = name_length; 74 75 assert(ifc_member_name && ifc_member_t); 76 ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", ifc_member_name); 77 78 create_xfb_varying_names(mem_ctx, ifc_member_t, name, new_length, count, 79 NULL, NULL, varying_names); 80 } else if (t->is_record()) { 81 for (unsigned i = 0; i < t->length; i++) { 82 const char *field = t->fields.structure[i].name; 83 size_t new_length = name_length; 84 85 ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field); 86 87 create_xfb_varying_names(mem_ctx, t->fields.structure[i].type, name, 88 new_length, count, NULL, NULL, 89 varying_names); 90 } 91 } else if (t->without_array()->is_record() || 92 t->without_array()->is_interface() || 93 (t->is_array() && t->fields.array->is_array())) { 94 for (unsigned i = 0; i < t->length; i++) { 95 size_t new_length = name_length; 96 97 /* Append the subscript to the current variable name */ 98 ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i); 99 100 create_xfb_varying_names(mem_ctx, t->fields.array, name, new_length, 101 count, ifc_member_name, ifc_member_t, 102 varying_names); 103 } 104 } else { 105 (*varying_names)[(*count)++] = ralloc_strdup(mem_ctx, *name); 106 } 107 } 108 109 bool 110 process_xfb_layout_qualifiers(void *mem_ctx, const gl_linked_shader *sh, 111 unsigned *num_tfeedback_decls, 112 char ***varying_names) 113 { 114 bool has_xfb_qualifiers = false; 115 116 /* We still need to enable transform feedback mode even if xfb_stride is 117 * only applied to a global out. Also we don't bother to propagate 118 * xfb_stride to interface block members so this will catch that case also. 119 */ 120 for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) { 121 if (sh->info.TransformFeedback.BufferStride[j]) { 122 has_xfb_qualifiers = true; 123 } 124 } 125 126 foreach_in_list(ir_instruction, node, sh->ir) { 127 ir_variable *var = node->as_variable(); 128 if (!var || var->data.mode != ir_var_shader_out) 129 continue; 130 131 /* From the ARB_enhanced_layouts spec: 132 * 133 * "Any shader making any static use (after preprocessing) of any of 134 * these *xfb_* qualifiers will cause the shader to be in a 135 * transform feedback capturing mode and hence responsible for 136 * describing the transform feedback setup. This mode will capture 137 * any output selected by *xfb_offset*, directly or indirectly, to 138 * a transform feedback buffer." 139 */ 140 if (var->data.explicit_xfb_buffer || var->data.explicit_xfb_stride) { 141 has_xfb_qualifiers = true; 142 } 143 144 if (var->data.explicit_xfb_offset) { 145 *num_tfeedback_decls += var->type->varying_count(); 146 has_xfb_qualifiers = true; 147 } 148 } 149 150 if (*num_tfeedback_decls == 0) 151 return has_xfb_qualifiers; 152 153 unsigned i = 0; 154 *varying_names = ralloc_array(mem_ctx, char *, *num_tfeedback_decls); 155 foreach_in_list(ir_instruction, node, sh->ir) { 156 ir_variable *var = node->as_variable(); 157 if (!var || var->data.mode != ir_var_shader_out) 158 continue; 159 160 if (var->data.explicit_xfb_offset) { 161 char *name; 162 const glsl_type *type, *member_type; 163 164 if (var->data.from_named_ifc_block) { 165 type = var->get_interface_type(); 166 /* Find the member type before it was altered by lowering */ 167 member_type = 168 type->fields.structure[type->field_index(var->name)].type; 169 name = ralloc_strdup(NULL, type->without_array()->name); 170 } else { 171 type = var->type; 172 member_type = NULL; 173 name = ralloc_strdup(NULL, var->name); 174 } 175 create_xfb_varying_names(mem_ctx, type, &name, strlen(name), &i, 176 var->name, member_type, varying_names); 177 ralloc_free(name); 178 } 179 } 180 181 assert(i == *num_tfeedback_decls); 182 return has_xfb_qualifiers; 183 } 184 185 static bool 186 anonymous_struct_type_matches(const glsl_type *output_type, 187 const glsl_type *to_match) 188 { 189 while (output_type->is_array() && to_match->is_array()) { 190 /* if the lengths at each level don't match fail. */ 191 if (output_type->length != to_match->length) 192 return false; 193 output_type = output_type->fields.array; 194 to_match = to_match->fields.array; 195 } 196 197 if (output_type->is_array() || to_match->is_array()) 198 return false; 199 return output_type->is_anonymous() && 200 to_match->is_anonymous() && 201 to_match->record_compare(output_type); 202 } 203 204 /** 205 * Validate the types and qualifiers of an output from one stage against the 206 * matching input to another stage. 207 */ 208 static void 209 cross_validate_types_and_qualifiers(struct gl_shader_program *prog, 210 const ir_variable *input, 211 const ir_variable *output, 212 gl_shader_stage consumer_stage, 213 gl_shader_stage producer_stage) 214 { 215 /* Check that the types match between stages. 216 */ 217 const glsl_type *type_to_match = input->type; 218 219 /* VS -> GS, VS -> TCS, VS -> TES, TES -> GS */ 220 const bool extra_array_level = (producer_stage == MESA_SHADER_VERTEX && 221 consumer_stage != MESA_SHADER_FRAGMENT) || 222 consumer_stage == MESA_SHADER_GEOMETRY; 223 if (extra_array_level) { 224 assert(type_to_match->is_array()); 225 type_to_match = type_to_match->fields.array; 226 } 227 228 if (type_to_match != output->type) { 229 /* There is a bit of a special case for gl_TexCoord. This 230 * built-in is unsized by default. Applications that variable 231 * access it must redeclare it with a size. There is some 232 * language in the GLSL spec that implies the fragment shader 233 * and vertex shader do not have to agree on this size. Other 234 * driver behave this way, and one or two applications seem to 235 * rely on it. 236 * 237 * Neither declaration needs to be modified here because the array 238 * sizes are fixed later when update_array_sizes is called. 239 * 240 * From page 48 (page 54 of the PDF) of the GLSL 1.10 spec: 241 * 242 * "Unlike user-defined varying variables, the built-in 243 * varying variables don't have a strict one-to-one 244 * correspondence between the vertex language and the 245 * fragment language." 246 */ 247 if (!output->type->is_array() || !is_gl_identifier(output->name)) { 248 bool anon_matches = anonymous_struct_type_matches(output->type, type_to_match); 249 250 if (!anon_matches) { 251 linker_error(prog, 252 "%s shader output `%s' declared as type `%s', " 253 "but %s shader input declared as type `%s'\n", 254 _mesa_shader_stage_to_string(producer_stage), 255 output->name, 256 output->type->name, 257 _mesa_shader_stage_to_string(consumer_stage), 258 input->type->name); 259 return; 260 } 261 } 262 } 263 264 /* Check that all of the qualifiers match between stages. 265 */ 266 267 /* According to the OpenGL and OpenGLES GLSL specs, the centroid qualifier 268 * should match until OpenGL 4.3 and OpenGLES 3.1. The OpenGLES 3.0 269 * conformance test suite does not verify that the qualifiers must match. 270 * The deqp test suite expects the opposite (OpenGLES 3.1) behavior for 271 * OpenGLES 3.0 drivers, so we relax the checking in all cases. 272 */ 273 if (false /* always skip the centroid check */ && 274 prog->data->Version < (prog->IsES ? 310 : 430) && 275 input->data.centroid != output->data.centroid) { 276 linker_error(prog, 277 "%s shader output `%s' %s centroid qualifier, " 278 "but %s shader input %s centroid qualifier\n", 279 _mesa_shader_stage_to_string(producer_stage), 280 output->name, 281 (output->data.centroid) ? "has" : "lacks", 282 _mesa_shader_stage_to_string(consumer_stage), 283 (input->data.centroid) ? "has" : "lacks"); 284 return; 285 } 286 287 if (input->data.sample != output->data.sample) { 288 linker_error(prog, 289 "%s shader output `%s' %s sample qualifier, " 290 "but %s shader input %s sample qualifier\n", 291 _mesa_shader_stage_to_string(producer_stage), 292 output->name, 293 (output->data.sample) ? "has" : "lacks", 294 _mesa_shader_stage_to_string(consumer_stage), 295 (input->data.sample) ? "has" : "lacks"); 296 return; 297 } 298 299 if (input->data.patch != output->data.patch) { 300 linker_error(prog, 301 "%s shader output `%s' %s patch qualifier, " 302 "but %s shader input %s patch qualifier\n", 303 _mesa_shader_stage_to_string(producer_stage), 304 output->name, 305 (output->data.patch) ? "has" : "lacks", 306 _mesa_shader_stage_to_string(consumer_stage), 307 (input->data.patch) ? "has" : "lacks"); 308 return; 309 } 310 311 /* The GLSL 4.30 and GLSL ES 3.00 specifications say: 312 * 313 * "As only outputs need be declared with invariant, an output from 314 * one shader stage will still match an input of a subsequent stage 315 * without the input being declared as invariant." 316 * 317 * while GLSL 4.20 says: 318 * 319 * "For variables leaving one shader and coming into another shader, 320 * the invariant keyword has to be used in both shaders, or a link 321 * error will result." 322 * 323 * and GLSL ES 1.00 section 4.6.4 "Invariance and Linking" says: 324 * 325 * "The invariance of varyings that are declared in both the vertex 326 * and fragment shaders must match." 327 */ 328 if (input->data.invariant != output->data.invariant && 329 prog->data->Version < (prog->IsES ? 300 : 430)) { 330 linker_error(prog, 331 "%s shader output `%s' %s invariant qualifier, " 332 "but %s shader input %s invariant qualifier\n", 333 _mesa_shader_stage_to_string(producer_stage), 334 output->name, 335 (output->data.invariant) ? "has" : "lacks", 336 _mesa_shader_stage_to_string(consumer_stage), 337 (input->data.invariant) ? "has" : "lacks"); 338 return; 339 } 340 341 /* GLSL >= 4.40 removes text requiring interpolation qualifiers 342 * to match cross stage, they must only match within the same stage. 343 * 344 * From page 84 (page 90 of the PDF) of the GLSL 4.40 spec: 345 * 346 * "It is a link-time error if, within the same stage, the interpolation 347 * qualifiers of variables of the same name do not match. 348 * 349 */ 350 if (input->data.interpolation != output->data.interpolation && 351 prog->data->Version < 440) { 352 linker_error(prog, 353 "%s shader output `%s' specifies %s " 354 "interpolation qualifier, " 355 "but %s shader input specifies %s " 356 "interpolation qualifier\n", 357 _mesa_shader_stage_to_string(producer_stage), 358 output->name, 359 interpolation_string(output->data.interpolation), 360 _mesa_shader_stage_to_string(consumer_stage), 361 interpolation_string(input->data.interpolation)); 362 return; 363 } 364 } 365 366 /** 367 * Validate front and back color outputs against single color input 368 */ 369 static void 370 cross_validate_front_and_back_color(struct gl_shader_program *prog, 371 const ir_variable *input, 372 const ir_variable *front_color, 373 const ir_variable *back_color, 374 gl_shader_stage consumer_stage, 375 gl_shader_stage producer_stage) 376 { 377 if (front_color != NULL && front_color->data.assigned) 378 cross_validate_types_and_qualifiers(prog, input, front_color, 379 consumer_stage, producer_stage); 380 381 if (back_color != NULL && back_color->data.assigned) 382 cross_validate_types_and_qualifiers(prog, input, back_color, 383 consumer_stage, producer_stage); 384 } 385 386 /** 387 * Validate that outputs from one stage match inputs of another 388 */ 389 void 390 cross_validate_outputs_to_inputs(struct gl_shader_program *prog, 391 gl_linked_shader *producer, 392 gl_linked_shader *consumer) 393 { 394 glsl_symbol_table parameters; 395 ir_variable *explicit_locations[MAX_VARYINGS_INCL_PATCH][4] = 396 { {NULL, NULL} }; 397 398 /* Find all shader outputs in the "producer" stage. 399 */ 400 foreach_in_list(ir_instruction, node, producer->ir) { 401 ir_variable *const var = node->as_variable(); 402 403 if (var == NULL || var->data.mode != ir_var_shader_out) 404 continue; 405 406 if (!var->data.explicit_location 407 || var->data.location < VARYING_SLOT_VAR0) 408 parameters.add_variable(var); 409 else { 410 /* User-defined varyings with explicit locations are handled 411 * differently because they do not need to have matching names. 412 */ 413 const glsl_type *type = get_varying_type(var, producer->Stage); 414 unsigned num_elements = type->count_attribute_slots(false); 415 unsigned idx = var->data.location - VARYING_SLOT_VAR0; 416 unsigned slot_limit = idx + num_elements; 417 unsigned last_comp; 418 419 if (type->without_array()->is_record()) { 420 /* The component qualifier can't be used on structs so just treat 421 * all component slots as used. 422 */ 423 last_comp = 4; 424 } else { 425 unsigned dmul = type->without_array()->is_64bit() ? 2 : 1; 426 last_comp = var->data.location_frac + 427 type->without_array()->vector_elements * dmul; 428 } 429 430 while (idx < slot_limit) { 431 unsigned i = var->data.location_frac; 432 while (i < last_comp) { 433 if (explicit_locations[idx][i] != NULL) { 434 linker_error(prog, 435 "%s shader has multiple outputs explicitly " 436 "assigned to location %d and component %d\n", 437 _mesa_shader_stage_to_string(producer->Stage), 438 idx, var->data.location_frac); 439 return; 440 } 441 442 /* Make sure all component at this location have the same type. 443 */ 444 for (unsigned j = 0; j < 4; j++) { 445 if (explicit_locations[idx][j] && 446 (explicit_locations[idx][j]->type->without_array() 447 ->base_type != type->without_array()->base_type)) { 448 linker_error(prog, 449 "Varyings sharing the same location must " 450 "have the same underlying numerical type. " 451 "Location %u component %u\n", idx, 452 var->data.location_frac); 453 return; 454 } 455 } 456 457 explicit_locations[idx][i] = var; 458 i++; 459 460 /* We need to do some special handling for doubles as dvec3 and 461 * dvec4 consume two consecutive locations. We don't need to 462 * worry about components beginning at anything other than 0 as 463 * the spec does not allow this for dvec3 and dvec4. 464 */ 465 if (i == 4 && last_comp > 4) { 466 last_comp = last_comp - 4; 467 /* Bump location index and reset the component index */ 468 idx++; 469 i = 0; 470 } 471 } 472 idx++; 473 } 474 } 475 } 476 477 478 /* Find all shader inputs in the "consumer" stage. Any variables that have 479 * matching outputs already in the symbol table must have the same type and 480 * qualifiers. 481 * 482 * Exception: if the consumer is the geometry shader, then the inputs 483 * should be arrays and the type of the array element should match the type 484 * of the corresponding producer output. 485 */ 486 foreach_in_list(ir_instruction, node, consumer->ir) { 487 ir_variable *const input = node->as_variable(); 488 489 if (input == NULL || input->data.mode != ir_var_shader_in) 490 continue; 491 492 if (strcmp(input->name, "gl_Color") == 0 && input->data.used) { 493 const ir_variable *const front_color = 494 parameters.get_variable("gl_FrontColor"); 495 496 const ir_variable *const back_color = 497 parameters.get_variable("gl_BackColor"); 498 499 cross_validate_front_and_back_color(prog, input, 500 front_color, back_color, 501 consumer->Stage, producer->Stage); 502 } else if (strcmp(input->name, "gl_SecondaryColor") == 0 && input->data.used) { 503 const ir_variable *const front_color = 504 parameters.get_variable("gl_FrontSecondaryColor"); 505 506 const ir_variable *const back_color = 507 parameters.get_variable("gl_BackSecondaryColor"); 508 509 cross_validate_front_and_back_color(prog, input, 510 front_color, back_color, 511 consumer->Stage, producer->Stage); 512 } else { 513 /* The rules for connecting inputs and outputs change in the presence 514 * of explicit locations. In this case, we no longer care about the 515 * names of the variables. Instead, we care only about the 516 * explicitly assigned location. 517 */ 518 ir_variable *output = NULL; 519 if (input->data.explicit_location 520 && input->data.location >= VARYING_SLOT_VAR0) { 521 522 const glsl_type *type = get_varying_type(input, consumer->Stage); 523 unsigned num_elements = type->count_attribute_slots(false); 524 unsigned idx = input->data.location - VARYING_SLOT_VAR0; 525 unsigned slot_limit = idx + num_elements; 526 527 while (idx < slot_limit) { 528 output = explicit_locations[idx][input->data.location_frac]; 529 530 if (output == NULL || 531 input->data.location != output->data.location) { 532 linker_error(prog, 533 "%s shader input `%s' with explicit location " 534 "has no matching output\n", 535 _mesa_shader_stage_to_string(consumer->Stage), 536 input->name); 537 break; 538 } 539 idx++; 540 } 541 } else { 542 output = parameters.get_variable(input->name); 543 } 544 545 if (output != NULL) { 546 /* Interface blocks have their own validation elsewhere so don't 547 * try validating them here. 548 */ 549 if (!(input->get_interface_type() && 550 output->get_interface_type())) 551 cross_validate_types_and_qualifiers(prog, input, output, 552 consumer->Stage, 553 producer->Stage); 554 } else { 555 /* Check for input vars with unmatched output vars in prev stage 556 * taking into account that interface blocks could have a matching 557 * output but with different name, so we ignore them. 558 */ 559 assert(!input->data.assigned); 560 if (input->data.used && !input->get_interface_type() && 561 !input->data.explicit_location && !prog->SeparateShader) 562 linker_error(prog, 563 "%s shader input `%s' " 564 "has no matching output in the previous stage\n", 565 _mesa_shader_stage_to_string(consumer->Stage), 566 input->name); 567 } 568 } 569 } 570 } 571 572 /** 573 * Demote shader inputs and outputs that are not used in other stages, and 574 * remove them via dead code elimination. 575 */ 576 void 577 remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object, 578 gl_linked_shader *sh, 579 enum ir_variable_mode mode) 580 { 581 if (is_separate_shader_object) 582 return; 583 584 foreach_in_list(ir_instruction, node, sh->ir) { 585 ir_variable *const var = node->as_variable(); 586 587 if (var == NULL || var->data.mode != int(mode)) 588 continue; 589 590 /* A shader 'in' or 'out' variable is only really an input or output if 591 * its value is used by other shader stages. This will cause the 592 * variable to have a location assigned. 593 */ 594 if (var->data.is_unmatched_generic_inout && !var->data.is_xfb_only) { 595 assert(var->data.mode != ir_var_temporary); 596 597 /* Assign zeros to demoted inputs to allow more optimizations. */ 598 if (var->data.mode == ir_var_shader_in && !var->constant_value) 599 var->constant_value = ir_constant::zero(var, var->type); 600 601 var->data.mode = ir_var_auto; 602 } 603 } 604 605 /* Eliminate code that is now dead due to unused inputs/outputs being 606 * demoted. 607 */ 608 while (do_dead_code(sh->ir, false)) 609 ; 610 611 } 612 613 /** 614 * Initialize this object based on a string that was passed to 615 * glTransformFeedbackVaryings. 616 * 617 * If the input is mal-formed, this call still succeeds, but it sets 618 * this->var_name to a mal-formed input, so tfeedback_decl::find_output_var() 619 * will fail to find any matching variable. 620 */ 621 void 622 tfeedback_decl::init(struct gl_context *ctx, const void *mem_ctx, 623 const char *input) 624 { 625 /* We don't have to be pedantic about what is a valid GLSL variable name, 626 * because any variable with an invalid name can't exist in the IR anyway. 627 */ 628 629 this->location = -1; 630 this->orig_name = input; 631 this->lowered_builtin_array_variable = none; 632 this->skip_components = 0; 633 this->next_buffer_separator = false; 634 this->matched_candidate = NULL; 635 this->stream_id = 0; 636 this->buffer = 0; 637 this->offset = 0; 638 639 if (ctx->Extensions.ARB_transform_feedback3) { 640 /* Parse gl_NextBuffer. */ 641 if (strcmp(input, "gl_NextBuffer") == 0) { 642 this->next_buffer_separator = true; 643 return; 644 } 645 646 /* Parse gl_SkipComponents. */ 647 if (strcmp(input, "gl_SkipComponents1") == 0) 648 this->skip_components = 1; 649 else if (strcmp(input, "gl_SkipComponents2") == 0) 650 this->skip_components = 2; 651 else if (strcmp(input, "gl_SkipComponents3") == 0) 652 this->skip_components = 3; 653 else if (strcmp(input, "gl_SkipComponents4") == 0) 654 this->skip_components = 4; 655 656 if (this->skip_components) 657 return; 658 } 659 660 /* Parse a declaration. */ 661 const char *base_name_end; 662 long subscript = parse_program_resource_name(input, &base_name_end); 663 this->var_name = ralloc_strndup(mem_ctx, input, base_name_end - input); 664 if (this->var_name == NULL) { 665 _mesa_error_no_memory(__func__); 666 return; 667 } 668 669 if (subscript >= 0) { 670 this->array_subscript = subscript; 671 this->is_subscripted = true; 672 } else { 673 this->is_subscripted = false; 674 } 675 676 /* For drivers that lower gl_ClipDistance to gl_ClipDistanceMESA, this 677 * class must behave specially to account for the fact that gl_ClipDistance 678 * is converted from a float[8] to a vec4[2]. 679 */ 680 if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance && 681 strcmp(this->var_name, "gl_ClipDistance") == 0) { 682 this->lowered_builtin_array_variable = clip_distance; 683 } 684 if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance && 685 strcmp(this->var_name, "gl_CullDistance") == 0) { 686 this->lowered_builtin_array_variable = cull_distance; 687 } 688 689 if (ctx->Const.LowerTessLevel && 690 (strcmp(this->var_name, "gl_TessLevelOuter") == 0)) 691 this->lowered_builtin_array_variable = tess_level_outer; 692 if (ctx->Const.LowerTessLevel && 693 (strcmp(this->var_name, "gl_TessLevelInner") == 0)) 694 this->lowered_builtin_array_variable = tess_level_inner; 695 } 696 697 698 /** 699 * Determine whether two tfeedback_decl objects refer to the same variable and 700 * array index (if applicable). 701 */ 702 bool 703 tfeedback_decl::is_same(const tfeedback_decl &x, const tfeedback_decl &y) 704 { 705 assert(x.is_varying() && y.is_varying()); 706 707 if (strcmp(x.var_name, y.var_name) != 0) 708 return false; 709 if (x.is_subscripted != y.is_subscripted) 710 return false; 711 if (x.is_subscripted && x.array_subscript != y.array_subscript) 712 return false; 713 return true; 714 } 715 716 717 /** 718 * Assign a location and stream ID for this tfeedback_decl object based on the 719 * transform feedback candidate found by find_candidate. 720 * 721 * If an error occurs, the error is reported through linker_error() and false 722 * is returned. 723 */ 724 bool 725 tfeedback_decl::assign_location(struct gl_context *ctx, 726 struct gl_shader_program *prog) 727 { 728 assert(this->is_varying()); 729 730 unsigned fine_location 731 = this->matched_candidate->toplevel_var->data.location * 4 732 + this->matched_candidate->toplevel_var->data.location_frac 733 + this->matched_candidate->offset; 734 const unsigned dmul = 735 this->matched_candidate->type->without_array()->is_64bit() ? 2 : 1; 736 737 if (this->matched_candidate->type->is_array()) { 738 /* Array variable */ 739 const unsigned matrix_cols = 740 this->matched_candidate->type->fields.array->matrix_columns; 741 const unsigned vector_elements = 742 this->matched_candidate->type->fields.array->vector_elements; 743 unsigned actual_array_size; 744 switch (this->lowered_builtin_array_variable) { 745 case clip_distance: 746 actual_array_size = prog->LastClipDistanceArraySize; 747 break; 748 case cull_distance: 749 actual_array_size = prog->LastCullDistanceArraySize; 750 break; 751 case tess_level_outer: 752 actual_array_size = 4; 753 break; 754 case tess_level_inner: 755 actual_array_size = 2; 756 break; 757 case none: 758 default: 759 actual_array_size = this->matched_candidate->type->array_size(); 760 break; 761 } 762 763 if (this->is_subscripted) { 764 /* Check array bounds. */ 765 if (this->array_subscript >= actual_array_size) { 766 linker_error(prog, "Transform feedback varying %s has index " 767 "%i, but the array size is %u.", 768 this->orig_name, this->array_subscript, 769 actual_array_size); 770 return false; 771 } 772 unsigned array_elem_size = this->lowered_builtin_array_variable ? 773 1 : vector_elements * matrix_cols * dmul; 774 fine_location += array_elem_size * this->array_subscript; 775 this->size = 1; 776 } else { 777 this->size = actual_array_size; 778 } 779 this->vector_elements = vector_elements; 780 this->matrix_columns = matrix_cols; 781 if (this->lowered_builtin_array_variable) 782 this->type = GL_FLOAT; 783 else 784 this->type = this->matched_candidate->type->fields.array->gl_type; 785 } else { 786 /* Regular variable (scalar, vector, or matrix) */ 787 if (this->is_subscripted) { 788 linker_error(prog, "Transform feedback varying %s requested, " 789 "but %s is not an array.", 790 this->orig_name, this->var_name); 791 return false; 792 } 793 this->size = 1; 794 this->vector_elements = this->matched_candidate->type->vector_elements; 795 this->matrix_columns = this->matched_candidate->type->matrix_columns; 796 this->type = this->matched_candidate->type->gl_type; 797 } 798 this->location = fine_location / 4; 799 this->location_frac = fine_location % 4; 800 801 /* From GL_EXT_transform_feedback: 802 * A program will fail to link if: 803 * 804 * * the total number of components to capture in any varying 805 * variable in <varyings> is greater than the constant 806 * MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT and the 807 * buffer mode is SEPARATE_ATTRIBS_EXT; 808 */ 809 if (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS && 810 this->num_components() > 811 ctx->Const.MaxTransformFeedbackSeparateComponents) { 812 linker_error(prog, "Transform feedback varying %s exceeds " 813 "MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS.", 814 this->orig_name); 815 return false; 816 } 817 818 /* Only transform feedback varyings can be assigned to non-zero streams, 819 * so assign the stream id here. 820 */ 821 this->stream_id = this->matched_candidate->toplevel_var->data.stream; 822 823 unsigned array_offset = this->array_subscript * 4 * dmul; 824 unsigned struct_offset = this->matched_candidate->offset * 4 * dmul; 825 this->buffer = this->matched_candidate->toplevel_var->data.xfb_buffer; 826 this->offset = this->matched_candidate->toplevel_var->data.offset + 827 array_offset + struct_offset; 828 829 return true; 830 } 831 832 833 unsigned 834 tfeedback_decl::get_num_outputs() const 835 { 836 if (!this->is_varying()) { 837 return 0; 838 } 839 return (this->num_components() + this->location_frac + 3)/4; 840 } 841 842 843 /** 844 * Update gl_transform_feedback_info to reflect this tfeedback_decl. 845 * 846 * If an error occurs, the error is reported through linker_error() and false 847 * is returned. 848 */ 849 bool 850 tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog, 851 struct gl_transform_feedback_info *info, 852 unsigned buffer, unsigned buffer_index, 853 const unsigned max_outputs, bool *explicit_stride, 854 bool has_xfb_qualifiers) const 855 { 856 unsigned xfb_offset = 0; 857 unsigned size = this->size; 858 /* Handle gl_SkipComponents. */ 859 if (this->skip_components) { 860 info->Buffers[buffer].Stride += this->skip_components; 861 size = this->skip_components; 862 goto store_varying; 863 } 864 865 if (this->next_buffer_separator) { 866 size = 0; 867 goto store_varying; 868 } 869 870 if (has_xfb_qualifiers) { 871 xfb_offset = this->offset / 4; 872 } else { 873 xfb_offset = info->Buffers[buffer].Stride; 874 } 875 info->Varyings[info->NumVarying].Offset = xfb_offset * 4; 876 877 { 878 unsigned location = this->location; 879 unsigned location_frac = this->location_frac; 880 unsigned num_components = this->num_components(); 881 while (num_components > 0) { 882 unsigned output_size = MIN2(num_components, 4 - location_frac); 883 assert((info->NumOutputs == 0 && max_outputs == 0) || 884 info->NumOutputs < max_outputs); 885 886 /* From the ARB_enhanced_layouts spec: 887 * 888 * "If such a block member or variable is not written during a shader 889 * invocation, the buffer contents at the assigned offset will be 890 * undefined. Even if there are no static writes to a variable or 891 * member that is assigned a transform feedback offset, the space is 892 * still allocated in the buffer and still affects the stride." 893 */ 894 if (this->is_varying_written()) { 895 info->Outputs[info->NumOutputs].ComponentOffset = location_frac; 896 info->Outputs[info->NumOutputs].OutputRegister = location; 897 info->Outputs[info->NumOutputs].NumComponents = output_size; 898 info->Outputs[info->NumOutputs].StreamId = stream_id; 899 info->Outputs[info->NumOutputs].OutputBuffer = buffer; 900 info->Outputs[info->NumOutputs].DstOffset = xfb_offset; 901 ++info->NumOutputs; 902 } 903 info->Buffers[buffer].Stream = this->stream_id; 904 xfb_offset += output_size; 905 906 num_components -= output_size; 907 location++; 908 location_frac = 0; 909 } 910 } 911 912 if (explicit_stride && explicit_stride[buffer]) { 913 if (this->is_64bit() && info->Buffers[buffer].Stride % 2) { 914 linker_error(prog, "invalid qualifier xfb_stride=%d must be a " 915 "multiple of 8 as its applied to a type that is or " 916 "contains a double.", 917 info->Buffers[buffer].Stride * 4); 918 return false; 919 } 920 921 if ((this->offset / 4) / info->Buffers[buffer].Stride != 922 (xfb_offset - 1) / info->Buffers[buffer].Stride) { 923 linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for " 924 "buffer (%d)", xfb_offset * 4, 925 info->Buffers[buffer].Stride * 4, buffer); 926 return false; 927 } 928 } else { 929 info->Buffers[buffer].Stride = xfb_offset; 930 } 931 932 /* From GL_EXT_transform_feedback: 933 * A program will fail to link if: 934 * 935 * * the total number of components to capture is greater than 936 * the constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT 937 * and the buffer mode is INTERLEAVED_ATTRIBS_EXT. 938 * 939 * From GL_ARB_enhanced_layouts: 940 * 941 * "The resulting stride (implicit or explicit) must be less than or 942 * equal to the implementation-dependent constant 943 * gl_MaxTransformFeedbackInterleavedComponents." 944 */ 945 if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS || 946 has_xfb_qualifiers) && 947 info->Buffers[buffer].Stride > 948 ctx->Const.MaxTransformFeedbackInterleavedComponents) { 949 linker_error(prog, "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS " 950 "limit has been exceeded."); 951 return false; 952 } 953 954 store_varying: 955 info->Varyings[info->NumVarying].Name = ralloc_strdup(prog, 956 this->orig_name); 957 info->Varyings[info->NumVarying].Type = this->type; 958 info->Varyings[info->NumVarying].Size = size; 959 info->Varyings[info->NumVarying].BufferIndex = buffer_index; 960 info->NumVarying++; 961 info->Buffers[buffer].NumVaryings++; 962 963 return true; 964 } 965 966 967 const tfeedback_candidate * 968 tfeedback_decl::find_candidate(gl_shader_program *prog, 969 hash_table *tfeedback_candidates) 970 { 971 const char *name = this->var_name; 972 switch (this->lowered_builtin_array_variable) { 973 case none: 974 name = this->var_name; 975 break; 976 case clip_distance: 977 name = "gl_ClipDistanceMESA"; 978 break; 979 case cull_distance: 980 name = "gl_CullDistanceMESA"; 981 break; 982 case tess_level_outer: 983 name = "gl_TessLevelOuterMESA"; 984 break; 985 case tess_level_inner: 986 name = "gl_TessLevelInnerMESA"; 987 break; 988 } 989 hash_entry *entry = _mesa_hash_table_search(tfeedback_candidates, name); 990 991 this->matched_candidate = entry ? 992 (const tfeedback_candidate *) entry->data : NULL; 993 994 if (!this->matched_candidate) { 995 /* From GL_EXT_transform_feedback: 996 * A program will fail to link if: 997 * 998 * * any variable name specified in the <varyings> array is not 999 * declared as an output in the geometry shader (if present) or 1000 * the vertex shader (if no geometry shader is present); 1001 */ 1002 linker_error(prog, "Transform feedback varying %s undeclared.", 1003 this->orig_name); 1004 } 1005 1006 return this->matched_candidate; 1007 } 1008 1009 1010 /** 1011 * Parse all the transform feedback declarations that were passed to 1012 * glTransformFeedbackVaryings() and store them in tfeedback_decl objects. 1013 * 1014 * If an error occurs, the error is reported through linker_error() and false 1015 * is returned. 1016 */ 1017 bool 1018 parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog, 1019 const void *mem_ctx, unsigned num_names, 1020 char **varying_names, tfeedback_decl *decls) 1021 { 1022 for (unsigned i = 0; i < num_names; ++i) { 1023 decls[i].init(ctx, mem_ctx, varying_names[i]); 1024 1025 if (!decls[i].is_varying()) 1026 continue; 1027 1028 /* From GL_EXT_transform_feedback: 1029 * A program will fail to link if: 1030 * 1031 * * any two entries in the <varyings> array specify the same varying 1032 * variable; 1033 * 1034 * We interpret this to mean "any two entries in the <varyings> array 1035 * specify the same varying variable and array index", since transform 1036 * feedback of arrays would be useless otherwise. 1037 */ 1038 for (unsigned j = 0; j < i; ++j) { 1039 if (!decls[j].is_varying()) 1040 continue; 1041 1042 if (tfeedback_decl::is_same(decls[i], decls[j])) { 1043 linker_error(prog, "Transform feedback varying %s specified " 1044 "more than once.", varying_names[i]); 1045 return false; 1046 } 1047 } 1048 } 1049 return true; 1050 } 1051 1052 1053 static int 1054 cmp_xfb_offset(const void * x_generic, const void * y_generic) 1055 { 1056 tfeedback_decl *x = (tfeedback_decl *) x_generic; 1057 tfeedback_decl *y = (tfeedback_decl *) y_generic; 1058 1059 if (x->get_buffer() != y->get_buffer()) 1060 return x->get_buffer() - y->get_buffer(); 1061 return x->get_offset() - y->get_offset(); 1062 } 1063 1064 /** 1065 * Store transform feedback location assignments into 1066 * prog->sh.LinkedTransformFeedback based on the data stored in 1067 * tfeedback_decls. 1068 * 1069 * If an error occurs, the error is reported through linker_error() and false 1070 * is returned. 1071 */ 1072 bool 1073 store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, 1074 unsigned num_tfeedback_decls, 1075 tfeedback_decl *tfeedback_decls, bool has_xfb_qualifiers) 1076 { 1077 /* Make sure MaxTransformFeedbackBuffers is less than 32 so the bitmask for 1078 * tracking the number of buffers doesn't overflow. 1079 */ 1080 assert(ctx->Const.MaxTransformFeedbackBuffers < 32); 1081 1082 bool separate_attribs_mode = 1083 prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS; 1084 1085 struct gl_program *xfb_prog = prog->xfb_program; 1086 xfb_prog->sh.LinkedTransformFeedback = 1087 rzalloc(xfb_prog, struct gl_transform_feedback_info); 1088 1089 /* The xfb_offset qualifier does not have to be used in increasing order 1090 * however some drivers expect to receive the list of transform feedback 1091 * declarations in order so sort it now for convenience. 1092 */ 1093 if (has_xfb_qualifiers) 1094 qsort(tfeedback_decls, num_tfeedback_decls, sizeof(*tfeedback_decls), 1095 cmp_xfb_offset); 1096 1097 xfb_prog->sh.LinkedTransformFeedback->Varyings = 1098 rzalloc_array(xfb_prog, struct gl_transform_feedback_varying_info, 1099 num_tfeedback_decls); 1100 1101 unsigned num_outputs = 0; 1102 for (unsigned i = 0; i < num_tfeedback_decls; ++i) { 1103 if (tfeedback_decls[i].is_varying_written()) 1104 num_outputs += tfeedback_decls[i].get_num_outputs(); 1105 } 1106 1107 xfb_prog->sh.LinkedTransformFeedback->Outputs = 1108 rzalloc_array(xfb_prog, struct gl_transform_feedback_output, 1109 num_outputs); 1110 1111 unsigned num_buffers = 0; 1112 unsigned buffers = 0; 1113 1114 if (!has_xfb_qualifiers && separate_attribs_mode) { 1115 /* GL_SEPARATE_ATTRIBS */ 1116 for (unsigned i = 0; i < num_tfeedback_decls; ++i) { 1117 if (!tfeedback_decls[i].store(ctx, prog, 1118 xfb_prog->sh.LinkedTransformFeedback, 1119 num_buffers, num_buffers, num_outputs, 1120 NULL, has_xfb_qualifiers)) 1121 return false; 1122 1123 buffers |= 1 << num_buffers; 1124 num_buffers++; 1125 } 1126 } 1127 else { 1128 /* GL_INVERLEAVED_ATTRIBS */ 1129 int buffer_stream_id = -1; 1130 unsigned buffer = 1131 num_tfeedback_decls ? tfeedback_decls[0].get_buffer() : 0; 1132 bool explicit_stride[MAX_FEEDBACK_BUFFERS] = { false }; 1133 1134 /* Apply any xfb_stride global qualifiers */ 1135 if (has_xfb_qualifiers) { 1136 for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) { 1137 if (prog->TransformFeedback.BufferStride[j]) { 1138 buffers |= 1 << j; 1139 explicit_stride[j] = true; 1140 xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride = 1141 prog->TransformFeedback.BufferStride[j] / 4; 1142 } 1143 } 1144 } 1145 1146 for (unsigned i = 0; i < num_tfeedback_decls; ++i) { 1147 if (has_xfb_qualifiers && 1148 buffer != tfeedback_decls[i].get_buffer()) { 1149 /* we have moved to the next buffer so reset stream id */ 1150 buffer_stream_id = -1; 1151 num_buffers++; 1152 } 1153 1154 if (tfeedback_decls[i].is_next_buffer_separator()) { 1155 if (!tfeedback_decls[i].store(ctx, prog, 1156 xfb_prog->sh.LinkedTransformFeedback, 1157 buffer, num_buffers, num_outputs, 1158 explicit_stride, has_xfb_qualifiers)) 1159 return false; 1160 num_buffers++; 1161 buffer_stream_id = -1; 1162 continue; 1163 } else if (tfeedback_decls[i].is_varying()) { 1164 if (buffer_stream_id == -1) { 1165 /* First varying writing to this buffer: remember its stream */ 1166 buffer_stream_id = (int) tfeedback_decls[i].get_stream_id(); 1167 } else if (buffer_stream_id != 1168 (int) tfeedback_decls[i].get_stream_id()) { 1169 /* Varying writes to the same buffer from a different stream */ 1170 linker_error(prog, 1171 "Transform feedback can't capture varyings belonging " 1172 "to different vertex streams in a single buffer. " 1173 "Varying %s writes to buffer from stream %u, other " 1174 "varyings in the same buffer write from stream %u.", 1175 tfeedback_decls[i].name(), 1176 tfeedback_decls[i].get_stream_id(), 1177 buffer_stream_id); 1178 return false; 1179 } 1180 } 1181 1182 if (has_xfb_qualifiers) { 1183 buffer = tfeedback_decls[i].get_buffer(); 1184 } else { 1185 buffer = num_buffers; 1186 } 1187 buffers |= 1 << buffer; 1188 1189 if (!tfeedback_decls[i].store(ctx, prog, 1190 xfb_prog->sh.LinkedTransformFeedback, 1191 buffer, num_buffers, num_outputs, 1192 explicit_stride, has_xfb_qualifiers)) 1193 return false; 1194 } 1195 } 1196 1197 assert(xfb_prog->sh.LinkedTransformFeedback->NumOutputs == num_outputs); 1198 1199 xfb_prog->sh.LinkedTransformFeedback->ActiveBuffers = buffers; 1200 return true; 1201 } 1202 1203 namespace { 1204 1205 /** 1206 * Data structure recording the relationship between outputs of one shader 1207 * stage (the "producer") and inputs of another (the "consumer"). 1208 */ 1209 class varying_matches 1210 { 1211 public: 1212 varying_matches(bool disable_varying_packing, bool xfb_enabled, 1213 gl_shader_stage producer_stage, 1214 gl_shader_stage consumer_stage); 1215 ~varying_matches(); 1216 void record(ir_variable *producer_var, ir_variable *consumer_var); 1217 unsigned assign_locations(struct gl_shader_program *prog, 1218 uint8_t *components, 1219 uint64_t reserved_slots); 1220 void store_locations() const; 1221 1222 private: 1223 bool is_varying_packing_safe(const glsl_type *type, 1224 const ir_variable *var); 1225 1226 /** 1227 * If true, this driver disables varying packing, so all varyings need to 1228 * be aligned on slot boundaries, and take up a number of slots equal to 1229 * their number of matrix columns times their array size. 1230 * 1231 * Packing may also be disabled because our current packing method is not 1232 * safe in SSO or versions of OpenGL where interpolation qualifiers are not 1233 * guaranteed to match across stages. 1234 */ 1235 const bool disable_varying_packing; 1236 1237 /** 1238 * If true, this driver has transform feedback enabled. The transform 1239 * feedback code requires at least some packing be done even when varying 1240 * packing is disabled, fortunately where transform feedback requires 1241 * packing it's safe to override the disabled setting. See 1242 * is_varying_packing_safe(). 1243 */ 1244 const bool xfb_enabled; 1245 1246 /** 1247 * Enum representing the order in which varyings are packed within a 1248 * packing class. 1249 * 1250 * Currently we pack vec4's first, then vec2's, then scalar values, then 1251 * vec3's. This order ensures that the only vectors that are at risk of 1252 * having to be "double parked" (split between two adjacent varying slots) 1253 * are the vec3's. 1254 */ 1255 enum packing_order_enum { 1256 PACKING_ORDER_VEC4, 1257 PACKING_ORDER_VEC2, 1258 PACKING_ORDER_SCALAR, 1259 PACKING_ORDER_VEC3, 1260 }; 1261 1262 static unsigned compute_packing_class(const ir_variable *var); 1263 static packing_order_enum compute_packing_order(const ir_variable *var); 1264 static int match_comparator(const void *x_generic, const void *y_generic); 1265 static int xfb_comparator(const void *x_generic, const void *y_generic); 1266 1267 /** 1268 * Structure recording the relationship between a single producer output 1269 * and a single consumer input. 1270 */ 1271 struct match { 1272 /** 1273 * Packing class for this varying, computed by compute_packing_class(). 1274 */ 1275 unsigned packing_class; 1276 1277 /** 1278 * Packing order for this varying, computed by compute_packing_order(). 1279 */ 1280 packing_order_enum packing_order; 1281 unsigned num_components; 1282 1283 /** 1284 * The output variable in the producer stage. 1285 */ 1286 ir_variable *producer_var; 1287 1288 /** 1289 * The input variable in the consumer stage. 1290 */ 1291 ir_variable *consumer_var; 1292 1293 /** 1294 * The location which has been assigned for this varying. This is 1295 * expressed in multiples of a float, with the first generic varying 1296 * (i.e. the one referred to by VARYING_SLOT_VAR0) represented by the 1297 * value 0. 1298 */ 1299 unsigned generic_location; 1300 } *matches; 1301 1302 /** 1303 * The number of elements in the \c matches array that are currently in 1304 * use. 1305 */ 1306 unsigned num_matches; 1307 1308 /** 1309 * The number of elements that were set aside for the \c matches array when 1310 * it was allocated. 1311 */ 1312 unsigned matches_capacity; 1313 1314 gl_shader_stage producer_stage; 1315 gl_shader_stage consumer_stage; 1316 }; 1317 1318 } /* anonymous namespace */ 1319 1320 varying_matches::varying_matches(bool disable_varying_packing, 1321 bool xfb_enabled, 1322 gl_shader_stage producer_stage, 1323 gl_shader_stage consumer_stage) 1324 : disable_varying_packing(disable_varying_packing), 1325 xfb_enabled(xfb_enabled), 1326 producer_stage(producer_stage), 1327 consumer_stage(consumer_stage) 1328 { 1329 /* Note: this initial capacity is rather arbitrarily chosen to be large 1330 * enough for many cases without wasting an unreasonable amount of space. 1331 * varying_matches::record() will resize the array if there are more than 1332 * this number of varyings. 1333 */ 1334 this->matches_capacity = 8; 1335 this->matches = (match *) 1336 malloc(sizeof(*this->matches) * this->matches_capacity); 1337 this->num_matches = 0; 1338 } 1339 1340 1341 varying_matches::~varying_matches() 1342 { 1343 free(this->matches); 1344 } 1345 1346 1347 /** 1348 * Packing is always safe on individual arrays, structures, and matrices. It 1349 * is also safe if the varying is only used for transform feedback. 1350 */ 1351 bool 1352 varying_matches::is_varying_packing_safe(const glsl_type *type, 1353 const ir_variable *var) 1354 { 1355 if (consumer_stage == MESA_SHADER_TESS_EVAL || 1356 consumer_stage == MESA_SHADER_TESS_CTRL || 1357 producer_stage == MESA_SHADER_TESS_CTRL) 1358 return false; 1359 1360 return xfb_enabled && (type->is_array() || type->is_record() || 1361 type->is_matrix() || var->data.is_xfb_only); 1362 } 1363 1364 1365 /** 1366 * Record the given producer/consumer variable pair in the list of variables 1367 * that should later be assigned locations. 1368 * 1369 * It is permissible for \c consumer_var to be NULL (this happens if a 1370 * variable is output by the producer and consumed by transform feedback, but 1371 * not consumed by the consumer). 1372 * 1373 * If \c producer_var has already been paired up with a consumer_var, or 1374 * producer_var is part of fixed pipeline functionality (and hence already has 1375 * a location assigned), this function has no effect. 1376 * 1377 * Note: as a side effect this function may change the interpolation type of 1378 * \c producer_var, but only when the change couldn't possibly affect 1379 * rendering. 1380 */ 1381 void 1382 varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var) 1383 { 1384 assert(producer_var != NULL || consumer_var != NULL); 1385 1386 if ((producer_var && (!producer_var->data.is_unmatched_generic_inout || 1387 producer_var->data.explicit_location)) || 1388 (consumer_var && (!consumer_var->data.is_unmatched_generic_inout || 1389 consumer_var->data.explicit_location))) { 1390 /* Either a location already exists for this variable (since it is part 1391 * of fixed functionality), or it has already been recorded as part of a 1392 * previous match. 1393 */ 1394 return; 1395 } 1396 1397 bool needs_flat_qualifier = consumer_var == NULL && 1398 (producer_var->type->contains_integer() || 1399 producer_var->type->contains_double()); 1400 1401 if (!disable_varying_packing && 1402 (needs_flat_qualifier || 1403 (consumer_stage != -1 && consumer_stage != MESA_SHADER_FRAGMENT))) { 1404 /* Since this varying is not being consumed by the fragment shader, its 1405 * interpolation type varying cannot possibly affect rendering. 1406 * Also, this variable is non-flat and is (or contains) an integer 1407 * or a double. 1408 * If the consumer stage is unknown, don't modify the interpolation 1409 * type as it could affect rendering later with separate shaders. 1410 * 1411 * lower_packed_varyings requires all integer varyings to flat, 1412 * regardless of where they appear. We can trivially satisfy that 1413 * requirement by changing the interpolation type to flat here. 1414 */ 1415 if (producer_var) { 1416 producer_var->data.centroid = false; 1417 producer_var->data.sample = false; 1418 producer_var->data.interpolation = INTERP_MODE_FLAT; 1419 } 1420 1421 if (consumer_var) { 1422 consumer_var->data.centroid = false; 1423 consumer_var->data.sample = false; 1424 consumer_var->data.interpolation = INTERP_MODE_FLAT; 1425 } 1426 } 1427 1428 if (this->num_matches == this->matches_capacity) { 1429 this->matches_capacity *= 2; 1430 this->matches = (match *) 1431 realloc(this->matches, 1432 sizeof(*this->matches) * this->matches_capacity); 1433 } 1434 1435 /* We must use the consumer to compute the packing class because in GL4.4+ 1436 * there is no guarantee interpolation qualifiers will match across stages. 1437 * 1438 * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec: 1439 * 1440 * "The type and presence of interpolation qualifiers of variables with 1441 * the same name declared in all linked shaders for the same cross-stage 1442 * interface must match, otherwise the link command will fail. 1443 * 1444 * When comparing an output from one stage to an input of a subsequent 1445 * stage, the input and output don't match if their interpolation 1446 * qualifiers (or lack thereof) are not the same." 1447 * 1448 * This text was also in at least revison 7 of the 4.40 spec but is no 1449 * longer in revision 9 and not in the 4.50 spec. 1450 */ 1451 const ir_variable *const var = (consumer_var != NULL) 1452 ? consumer_var : producer_var; 1453 const gl_shader_stage stage = (consumer_var != NULL) 1454 ? consumer_stage : producer_stage; 1455 const glsl_type *type = get_varying_type(var, stage); 1456 1457 this->matches[this->num_matches].packing_class 1458 = this->compute_packing_class(var); 1459 this->matches[this->num_matches].packing_order 1460 = this->compute_packing_order(var); 1461 if (this->disable_varying_packing && !is_varying_packing_safe(type, var)) { 1462 unsigned slots = type->count_attribute_slots(false); 1463 this->matches[this->num_matches].num_components = slots * 4; 1464 } else { 1465 this->matches[this->num_matches].num_components 1466 = type->component_slots(); 1467 } 1468 this->matches[this->num_matches].producer_var = producer_var; 1469 this->matches[this->num_matches].consumer_var = consumer_var; 1470 this->num_matches++; 1471 if (producer_var) 1472 producer_var->data.is_unmatched_generic_inout = 0; 1473 if (consumer_var) 1474 consumer_var->data.is_unmatched_generic_inout = 0; 1475 } 1476 1477 1478 /** 1479 * Choose locations for all of the variable matches that were previously 1480 * passed to varying_matches::record(). 1481 */ 1482 unsigned 1483 varying_matches::assign_locations(struct gl_shader_program *prog, 1484 uint8_t *components, 1485 uint64_t reserved_slots) 1486 { 1487 /* If packing has been disabled then we cannot safely sort the varyings by 1488 * class as it may mean we are using a version of OpenGL where 1489 * interpolation qualifiers are not guaranteed to be matching across 1490 * shaders, sorting in this case could result in mismatching shader 1491 * interfaces. 1492 * When packing is disabled the sort orders varyings used by transform 1493 * feedback first, but also depends on *undefined behaviour* of qsort to 1494 * reverse the order of the varyings. See: xfb_comparator(). 1495 */ 1496 if (!this->disable_varying_packing) { 1497 /* Sort varying matches into an order that makes them easy to pack. */ 1498 qsort(this->matches, this->num_matches, sizeof(*this->matches), 1499 &varying_matches::match_comparator); 1500 } else { 1501 /* Only sort varyings that are only used by transform feedback. */ 1502 qsort(this->matches, this->num_matches, sizeof(*this->matches), 1503 &varying_matches::xfb_comparator); 1504 } 1505 1506 unsigned generic_location = 0; 1507 unsigned generic_patch_location = MAX_VARYING*4; 1508 bool previous_var_xfb_only = false; 1509 1510 for (unsigned i = 0; i < this->num_matches; i++) { 1511 unsigned *location = &generic_location; 1512 1513 const ir_variable *var; 1514 const glsl_type *type; 1515 bool is_vertex_input = false; 1516 if (matches[i].consumer_var) { 1517 var = matches[i].consumer_var; 1518 type = get_varying_type(var, consumer_stage); 1519 if (consumer_stage == MESA_SHADER_VERTEX) 1520 is_vertex_input = true; 1521 } else { 1522 var = matches[i].producer_var; 1523 type = get_varying_type(var, producer_stage); 1524 } 1525 1526 if (var->data.patch) 1527 location = &generic_patch_location; 1528 1529 /* Advance to the next slot if this varying has a different packing 1530 * class than the previous one, and we're not already on a slot 1531 * boundary. 1532 * 1533 * Also advance to the next slot if packing is disabled. This makes sure 1534 * we don't assign varyings the same locations which is possible 1535 * because we still pack individual arrays, records and matrices even 1536 * when packing is disabled. Note we don't advance to the next slot if 1537 * we can pack varyings together that are only used for transform 1538 * feedback. 1539 */ 1540 if ((this->disable_varying_packing && 1541 !(previous_var_xfb_only && var->data.is_xfb_only)) || 1542 (i > 0 && this->matches[i - 1].packing_class 1543 != this->matches[i].packing_class )) { 1544 *location = ALIGN(*location, 4); 1545 } 1546 1547 previous_var_xfb_only = var->data.is_xfb_only; 1548 1549 /* The number of components taken up by this variable. For vertex shader 1550 * inputs, we use the number of slots * 4, as they have different 1551 * counting rules. 1552 */ 1553 unsigned num_components = is_vertex_input ? 1554 type->count_attribute_slots(is_vertex_input) * 4 : 1555 this->matches[i].num_components; 1556 1557 /* The last slot for this variable, inclusive. */ 1558 unsigned slot_end = *location + num_components - 1; 1559 1560 /* FIXME: We could be smarter in the below code and loop back over 1561 * trying to fill any locations that we skipped because we couldn't pack 1562 * the varying between an explicit location. For now just let the user 1563 * hit the linking error if we run out of room and suggest they use 1564 * explicit locations. 1565 */ 1566 while (slot_end < MAX_VARYING * 4u) { 1567 const unsigned slots = (slot_end / 4u) - (*location / 4u) + 1; 1568 const uint64_t slot_mask = ((1ull << slots) - 1) << (*location / 4u); 1569 1570 assert(slots > 0); 1571 if (reserved_slots & slot_mask) { 1572 *location = ALIGN(*location + 1, 4); 1573 slot_end = *location + num_components - 1; 1574 continue; 1575 } 1576 1577 break; 1578 } 1579 1580 if (!var->data.patch && slot_end >= MAX_VARYING * 4u) { 1581 linker_error(prog, "insufficient contiguous locations available for " 1582 "%s it is possible an array or struct could not be " 1583 "packed between varyings with explicit locations. Try " 1584 "using an explicit location for arrays and structs.", 1585 var->name); 1586 } 1587 1588 if (slot_end < MAX_VARYINGS_INCL_PATCH * 4u) { 1589 for (unsigned j = *location / 4u; j < slot_end / 4u; j++) 1590 components[j] = 4; 1591 components[slot_end / 4u] = (slot_end & 3) + 1; 1592 } 1593 1594 this->matches[i].generic_location = *location; 1595 1596 *location = slot_end + 1; 1597 } 1598 1599 return (generic_location + 3) / 4; 1600 } 1601 1602 1603 /** 1604 * Update the producer and consumer shaders to reflect the locations 1605 * assignments that were made by varying_matches::assign_locations(). 1606 */ 1607 void 1608 varying_matches::store_locations() const 1609 { 1610 for (unsigned i = 0; i < this->num_matches; i++) { 1611 ir_variable *producer_var = this->matches[i].producer_var; 1612 ir_variable *consumer_var = this->matches[i].consumer_var; 1613 unsigned generic_location = this->matches[i].generic_location; 1614 unsigned slot = generic_location / 4; 1615 unsigned offset = generic_location % 4; 1616 1617 if (producer_var) { 1618 producer_var->data.location = VARYING_SLOT_VAR0 + slot; 1619 producer_var->data.location_frac = offset; 1620 } 1621 1622 if (consumer_var) { 1623 assert(consumer_var->data.location == -1); 1624 consumer_var->data.location = VARYING_SLOT_VAR0 + slot; 1625 consumer_var->data.location_frac = offset; 1626 } 1627 } 1628 } 1629 1630 1631 /** 1632 * Compute the "packing class" of the given varying. This is an unsigned 1633 * integer with the property that two variables in the same packing class can 1634 * be safely backed into the same vec4. 1635 */ 1636 unsigned 1637 varying_matches::compute_packing_class(const ir_variable *var) 1638 { 1639 /* Without help from the back-end, there is no way to pack together 1640 * variables with different interpolation types, because 1641 * lower_packed_varyings must choose exactly one interpolation type for 1642 * each packed varying it creates. 1643 * 1644 * However, we can safely pack together floats, ints, and uints, because: 1645 * 1646 * - varyings of base type "int" and "uint" must use the "flat" 1647 * interpolation type, which can only occur in GLSL 1.30 and above. 1648 * 1649 * - On platforms that support GLSL 1.30 and above, lower_packed_varyings 1650 * can store flat floats as ints without losing any information (using 1651 * the ir_unop_bitcast_* opcodes). 1652 * 1653 * Therefore, the packing class depends only on the interpolation type. 1654 */ 1655 unsigned packing_class = var->data.centroid | (var->data.sample << 1) | 1656 (var->data.patch << 2); 1657 packing_class *= 4; 1658 packing_class += var->is_interpolation_flat() 1659 ? unsigned(INTERP_MODE_FLAT) : var->data.interpolation; 1660 return packing_class; 1661 } 1662 1663 1664 /** 1665 * Compute the "packing order" of the given varying. This is a sort key we 1666 * use to determine when to attempt to pack the given varying relative to 1667 * other varyings in the same packing class. 1668 */ 1669 varying_matches::packing_order_enum 1670 varying_matches::compute_packing_order(const ir_variable *var) 1671 { 1672 const glsl_type *element_type = var->type; 1673 1674 while (element_type->base_type == GLSL_TYPE_ARRAY) { 1675 element_type = element_type->fields.array; 1676 } 1677 1678 switch (element_type->component_slots() % 4) { 1679 case 1: return PACKING_ORDER_SCALAR; 1680 case 2: return PACKING_ORDER_VEC2; 1681 case 3: return PACKING_ORDER_VEC3; 1682 case 0: return PACKING_ORDER_VEC4; 1683 default: 1684 assert(!"Unexpected value of vector_elements"); 1685 return PACKING_ORDER_VEC4; 1686 } 1687 } 1688 1689 1690 /** 1691 * Comparison function passed to qsort() to sort varyings by packing_class and 1692 * then by packing_order. 1693 */ 1694 int 1695 varying_matches::match_comparator(const void *x_generic, const void *y_generic) 1696 { 1697 const match *x = (const match *) x_generic; 1698 const match *y = (const match *) y_generic; 1699 1700 if (x->packing_class != y->packing_class) 1701 return x->packing_class - y->packing_class; 1702 return x->packing_order - y->packing_order; 1703 } 1704 1705 1706 /** 1707 * Comparison function passed to qsort() to sort varyings used only by 1708 * transform feedback when packing of other varyings is disabled. 1709 */ 1710 int 1711 varying_matches::xfb_comparator(const void *x_generic, const void *y_generic) 1712 { 1713 const match *x = (const match *) x_generic; 1714 1715 if (x->producer_var != NULL && x->producer_var->data.is_xfb_only) 1716 return match_comparator(x_generic, y_generic); 1717 1718 /* FIXME: When the comparator returns 0 it means the elements being 1719 * compared are equivalent. However the qsort documentation says: 1720 * 1721 * "The order of equivalent elements is undefined." 1722 * 1723 * In practice the sort ends up reversing the order of the varyings which 1724 * means locations are also assigned in this reversed order and happens to 1725 * be what we want. This is also whats happening in 1726 * varying_matches::match_comparator(). 1727 */ 1728 return 0; 1729 } 1730 1731 1732 /** 1733 * Is the given variable a varying variable to be counted against the 1734 * limit in ctx->Const.MaxVarying? 1735 * This includes variables such as texcoords, colors and generic 1736 * varyings, but excludes variables such as gl_FrontFacing and gl_FragCoord. 1737 */ 1738 static bool 1739 var_counts_against_varying_limit(gl_shader_stage stage, const ir_variable *var) 1740 { 1741 /* Only fragment shaders will take a varying variable as an input */ 1742 if (stage == MESA_SHADER_FRAGMENT && 1743 var->data.mode == ir_var_shader_in) { 1744 switch (var->data.location) { 1745 case VARYING_SLOT_POS: 1746 case VARYING_SLOT_FACE: 1747 case VARYING_SLOT_PNTC: 1748 return false; 1749 default: 1750 return true; 1751 } 1752 } 1753 return false; 1754 } 1755 1756 1757 /** 1758 * Visitor class that generates tfeedback_candidate structs describing all 1759 * possible targets of transform feedback. 1760 * 1761 * tfeedback_candidate structs are stored in the hash table 1762 * tfeedback_candidates, which is passed to the constructor. This hash table 1763 * maps varying names to instances of the tfeedback_candidate struct. 1764 */ 1765 class tfeedback_candidate_generator : public program_resource_visitor 1766 { 1767 public: 1768 tfeedback_candidate_generator(void *mem_ctx, 1769 hash_table *tfeedback_candidates) 1770 : mem_ctx(mem_ctx), 1771 tfeedback_candidates(tfeedback_candidates), 1772 toplevel_var(NULL), 1773 varying_floats(0) 1774 { 1775 } 1776 1777 void process(ir_variable *var) 1778 { 1779 /* All named varying interface blocks should be flattened by now */ 1780 assert(!var->is_interface_instance()); 1781 1782 this->toplevel_var = var; 1783 this->varying_floats = 0; 1784 program_resource_visitor::process(var); 1785 } 1786 1787 private: 1788 virtual void visit_field(const glsl_type *type, const char *name, 1789 bool /* row_major */, 1790 const glsl_type * /* record_type */, 1791 const enum glsl_interface_packing, 1792 bool /* last_field */) 1793 { 1794 assert(!type->without_array()->is_record()); 1795 assert(!type->without_array()->is_interface()); 1796 1797 tfeedback_candidate *candidate 1798 = rzalloc(this->mem_ctx, tfeedback_candidate); 1799 candidate->toplevel_var = this->toplevel_var; 1800 candidate->type = type; 1801 candidate->offset = this->varying_floats; 1802 _mesa_hash_table_insert(this->tfeedback_candidates, 1803 ralloc_strdup(this->mem_ctx, name), 1804 candidate); 1805 this->varying_floats += type->component_slots(); 1806 } 1807 1808 /** 1809 * Memory context used to allocate hash table keys and values. 1810 */ 1811 void * const mem_ctx; 1812 1813 /** 1814 * Hash table in which tfeedback_candidate objects should be stored. 1815 */ 1816 hash_table * const tfeedback_candidates; 1817 1818 /** 1819 * Pointer to the toplevel variable that is being traversed. 1820 */ 1821 ir_variable *toplevel_var; 1822 1823 /** 1824 * Total number of varying floats that have been visited so far. This is 1825 * used to determine the offset to each varying within the toplevel 1826 * variable. 1827 */ 1828 unsigned varying_floats; 1829 }; 1830 1831 1832 namespace linker { 1833 1834 void 1835 populate_consumer_input_sets(void *mem_ctx, exec_list *ir, 1836 hash_table *consumer_inputs, 1837 hash_table *consumer_interface_inputs, 1838 ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX]) 1839 { 1840 memset(consumer_inputs_with_locations, 1841 0, 1842 sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_TESS_MAX); 1843 1844 foreach_in_list(ir_instruction, node, ir) { 1845 ir_variable *const input_var = node->as_variable(); 1846 1847 if (input_var != NULL && input_var->data.mode == ir_var_shader_in) { 1848 /* All interface blocks should have been lowered by this point */ 1849 assert(!input_var->type->is_interface()); 1850 1851 if (input_var->data.explicit_location) { 1852 /* assign_varying_locations only cares about finding the 1853 * ir_variable at the start of a contiguous location block. 1854 * 1855 * - For !producer, consumer_inputs_with_locations isn't used. 1856 * 1857 * - For !consumer, consumer_inputs_with_locations is empty. 1858 * 1859 * For consumer && producer, if you were trying to set some 1860 * ir_variable to the middle of a location block on the other side 1861 * of producer/consumer, cross_validate_outputs_to_inputs() should 1862 * be link-erroring due to either type mismatch or location 1863 * overlaps. If the variables do match up, then they've got a 1864 * matching data.location and you only looked at 1865 * consumer_inputs_with_locations[var->data.location], not any 1866 * following entries for the array/structure. 1867 */ 1868 consumer_inputs_with_locations[input_var->data.location] = 1869 input_var; 1870 } else if (input_var->get_interface_type() != NULL) { 1871 char *const iface_field_name = 1872 ralloc_asprintf(mem_ctx, "%s.%s", 1873 input_var->get_interface_type()->without_array()->name, 1874 input_var->name); 1875 _mesa_hash_table_insert(consumer_interface_inputs, 1876 iface_field_name, input_var); 1877 } else { 1878 _mesa_hash_table_insert(consumer_inputs, 1879 ralloc_strdup(mem_ctx, input_var->name), 1880 input_var); 1881 } 1882 } 1883 } 1884 } 1885 1886 /** 1887 * Find a variable from the consumer that "matches" the specified variable 1888 * 1889 * This function only finds inputs with names that match. There is no 1890 * validation (here) that the types, etc. are compatible. 1891 */ 1892 ir_variable * 1893 get_matching_input(void *mem_ctx, 1894 const ir_variable *output_var, 1895 hash_table *consumer_inputs, 1896 hash_table *consumer_interface_inputs, 1897 ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX]) 1898 { 1899 ir_variable *input_var; 1900 1901 if (output_var->data.explicit_location) { 1902 input_var = consumer_inputs_with_locations[output_var->data.location]; 1903 } else if (output_var->get_interface_type() != NULL) { 1904 char *const iface_field_name = 1905 ralloc_asprintf(mem_ctx, "%s.%s", 1906 output_var->get_interface_type()->without_array()->name, 1907 output_var->name); 1908 hash_entry *entry = _mesa_hash_table_search(consumer_interface_inputs, iface_field_name); 1909 input_var = entry ? (ir_variable *) entry->data : NULL; 1910 } else { 1911 hash_entry *entry = _mesa_hash_table_search(consumer_inputs, output_var->name); 1912 input_var = entry ? (ir_variable *) entry->data : NULL; 1913 } 1914 1915 return (input_var == NULL || input_var->data.mode != ir_var_shader_in) 1916 ? NULL : input_var; 1917 } 1918 1919 } 1920 1921 static int 1922 io_variable_cmp(const void *_a, const void *_b) 1923 { 1924 const ir_variable *const a = *(const ir_variable **) _a; 1925 const ir_variable *const b = *(const ir_variable **) _b; 1926 1927 if (a->data.explicit_location && b->data.explicit_location) 1928 return b->data.location - a->data.location; 1929 1930 if (a->data.explicit_location && !b->data.explicit_location) 1931 return 1; 1932 1933 if (!a->data.explicit_location && b->data.explicit_location) 1934 return -1; 1935 1936 return -strcmp(a->name, b->name); 1937 } 1938 1939 /** 1940 * Sort the shader IO variables into canonical order 1941 */ 1942 static void 1943 canonicalize_shader_io(exec_list *ir, enum ir_variable_mode io_mode) 1944 { 1945 ir_variable *var_table[MAX_PROGRAM_OUTPUTS * 4]; 1946 unsigned num_variables = 0; 1947 1948 foreach_in_list(ir_instruction, node, ir) { 1949 ir_variable *const var = node->as_variable(); 1950 1951 if (var == NULL || var->data.mode != io_mode) 1952 continue; 1953 1954 /* If we have already encountered more I/O variables that could 1955 * successfully link, bail. 1956 */ 1957 if (num_variables == ARRAY_SIZE(var_table)) 1958 return; 1959 1960 var_table[num_variables++] = var; 1961 } 1962 1963 if (num_variables == 0) 1964 return; 1965 1966 /* Sort the list in reverse order (io_variable_cmp handles this). Later 1967 * we're going to push the variables on to the IR list as a stack, so we 1968 * want the last variable (in canonical order) to be first in the list. 1969 */ 1970 qsort(var_table, num_variables, sizeof(var_table[0]), io_variable_cmp); 1971 1972 /* Remove the variable from it's current location in the IR, and put it at 1973 * the front. 1974 */ 1975 for (unsigned i = 0; i < num_variables; i++) { 1976 var_table[i]->remove(); 1977 ir->push_head(var_table[i]); 1978 } 1979 } 1980 1981 /** 1982 * Generate a bitfield map of the explicit locations for shader varyings. 1983 * 1984 * Note: For Tessellation shaders we are sitting right on the limits of the 1985 * 64 bit map. Per-vertex and per-patch both have separate location domains 1986 * with a max of MAX_VARYING. 1987 */ 1988 uint64_t 1989 reserved_varying_slot(struct gl_linked_shader *stage, 1990 ir_variable_mode io_mode) 1991 { 1992 assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out); 1993 /* Avoid an overflow of the returned value */ 1994 assert(MAX_VARYINGS_INCL_PATCH <= 64); 1995 1996 uint64_t slots = 0; 1997 int var_slot; 1998 1999 if (!stage) 2000 return slots; 2001 2002 foreach_in_list(ir_instruction, node, stage->ir) { 2003 ir_variable *const var = node->as_variable(); 2004 2005 if (var == NULL || var->data.mode != io_mode || 2006 !var->data.explicit_location || 2007 var->data.location < VARYING_SLOT_VAR0) 2008 continue; 2009 2010 var_slot = var->data.location - VARYING_SLOT_VAR0; 2011 2012 unsigned num_elements = get_varying_type(var, stage->Stage) 2013 ->count_attribute_slots(stage->Stage == MESA_SHADER_VERTEX); 2014 for (unsigned i = 0; i < num_elements; i++) { 2015 if (var_slot >= 0 && var_slot < MAX_VARYINGS_INCL_PATCH) 2016 slots |= UINT64_C(1) << var_slot; 2017 var_slot += 1; 2018 } 2019 } 2020 2021 return slots; 2022 } 2023 2024 2025 /** 2026 * Assign locations for all variables that are produced in one pipeline stage 2027 * (the "producer") and consumed in the next stage (the "consumer"). 2028 * 2029 * Variables produced by the producer may also be consumed by transform 2030 * feedback. 2031 * 2032 * \param num_tfeedback_decls is the number of declarations indicating 2033 * variables that may be consumed by transform feedback. 2034 * 2035 * \param tfeedback_decls is a pointer to an array of tfeedback_decl objects 2036 * representing the result of parsing the strings passed to 2037 * glTransformFeedbackVaryings(). assign_location() will be called for 2038 * each of these objects that matches one of the outputs of the 2039 * producer. 2040 * 2041 * When num_tfeedback_decls is nonzero, it is permissible for the consumer to 2042 * be NULL. In this case, varying locations are assigned solely based on the 2043 * requirements of transform feedback. 2044 */ 2045 bool 2046 assign_varying_locations(struct gl_context *ctx, 2047 void *mem_ctx, 2048 struct gl_shader_program *prog, 2049 gl_linked_shader *producer, 2050 gl_linked_shader *consumer, 2051 unsigned num_tfeedback_decls, 2052 tfeedback_decl *tfeedback_decls, 2053 const uint64_t reserved_slots) 2054 { 2055 /* Tessellation shaders treat inputs and outputs as shared memory and can 2056 * access inputs and outputs of other invocations. 2057 * Therefore, they can't be lowered to temps easily (and definitely not 2058 * efficiently). 2059 */ 2060 bool unpackable_tess = 2061 (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) || 2062 (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) || 2063 (producer && producer->Stage == MESA_SHADER_TESS_CTRL); 2064 2065 /* Transform feedback code assumes varying arrays are packed, so if the 2066 * driver has disabled varying packing, make sure to at least enable 2067 * packing required by transform feedback. 2068 */ 2069 bool xfb_enabled = 2070 ctx->Extensions.EXT_transform_feedback && !unpackable_tess; 2071 2072 /* Disable packing on outward facing interfaces for SSO because in ES we 2073 * need to retain the unpacked varying information for draw time 2074 * validation. 2075 * 2076 * Packing is still enabled on individual arrays, structs, and matrices as 2077 * these are required by the transform feedback code and it is still safe 2078 * to do so. We also enable packing when a varying is only used for 2079 * transform feedback and its not a SSO. 2080 */ 2081 bool disable_varying_packing = 2082 ctx->Const.DisableVaryingPacking || unpackable_tess; 2083 if (prog->SeparateShader && (producer == NULL || consumer == NULL)) 2084 disable_varying_packing = true; 2085 2086 varying_matches matches(disable_varying_packing, xfb_enabled, 2087 producer ? producer->Stage : (gl_shader_stage)-1, 2088 consumer ? consumer->Stage : (gl_shader_stage)-1); 2089 hash_table *tfeedback_candidates = 2090 _mesa_hash_table_create(NULL, _mesa_key_hash_string, 2091 _mesa_key_string_equal); 2092 hash_table *consumer_inputs = 2093 _mesa_hash_table_create(NULL, _mesa_key_hash_string, 2094 _mesa_key_string_equal); 2095 hash_table *consumer_interface_inputs = 2096 _mesa_hash_table_create(NULL, _mesa_key_hash_string, 2097 _mesa_key_string_equal); 2098 ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = { 2099 NULL, 2100 }; 2101 2102 unsigned consumer_vertices = 0; 2103 if (consumer && consumer->Stage == MESA_SHADER_GEOMETRY) 2104 consumer_vertices = prog->Geom.VerticesIn; 2105 2106 /* Operate in a total of four passes. 2107 * 2108 * 1. Sort inputs / outputs into a canonical order. This is necessary so 2109 * that inputs / outputs of separable shaders will be assigned 2110 * predictable locations regardless of the order in which declarations 2111 * appeared in the shader source. 2112 * 2113 * 2. Assign locations for any matching inputs and outputs. 2114 * 2115 * 3. Mark output variables in the producer that do not have locations as 2116 * not being outputs. This lets the optimizer eliminate them. 2117 * 2118 * 4. Mark input variables in the consumer that do not have locations as 2119 * not being inputs. This lets the optimizer eliminate them. 2120 */ 2121 if (consumer) 2122 canonicalize_shader_io(consumer->ir, ir_var_shader_in); 2123 2124 if (producer) 2125 canonicalize_shader_io(producer->ir, ir_var_shader_out); 2126 2127 if (consumer) 2128 linker::populate_consumer_input_sets(mem_ctx, consumer->ir, 2129 consumer_inputs, 2130 consumer_interface_inputs, 2131 consumer_inputs_with_locations); 2132 2133 if (producer) { 2134 foreach_in_list(ir_instruction, node, producer->ir) { 2135 ir_variable *const output_var = node->as_variable(); 2136 2137 if (output_var == NULL || output_var->data.mode != ir_var_shader_out) 2138 continue; 2139 2140 /* Only geometry shaders can use non-zero streams */ 2141 assert(output_var->data.stream == 0 || 2142 (output_var->data.stream < MAX_VERTEX_STREAMS && 2143 producer->Stage == MESA_SHADER_GEOMETRY)); 2144 2145 if (num_tfeedback_decls > 0) { 2146 tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates); 2147 g.process(output_var); 2148 } 2149 2150 ir_variable *const input_var = 2151 linker::get_matching_input(mem_ctx, output_var, consumer_inputs, 2152 consumer_interface_inputs, 2153 consumer_inputs_with_locations); 2154 2155 /* If a matching input variable was found, add this output (and the 2156 * input) to the set. If this is a separable program and there is no 2157 * consumer stage, add the output. 2158 * 2159 * Always add TCS outputs. They are shared by all invocations 2160 * within a patch and can be used as shared memory. 2161 */ 2162 if (input_var || (prog->SeparateShader && consumer == NULL) || 2163 producer->Stage == MESA_SHADER_TESS_CTRL) { 2164 matches.record(output_var, input_var); 2165 } 2166 2167 /* Only stream 0 outputs can be consumed in the next stage */ 2168 if (input_var && output_var->data.stream != 0) { 2169 linker_error(prog, "output %s is assigned to stream=%d but " 2170 "is linked to an input, which requires stream=0", 2171 output_var->name, output_var->data.stream); 2172 return false; 2173 } 2174 } 2175 } else { 2176 /* If there's no producer stage, then this must be a separable program. 2177 * For example, we may have a program that has just a fragment shader. 2178 * Later this program will be used with some arbitrary vertex (or 2179 * geometry) shader program. This means that locations must be assigned 2180 * for all the inputs. 2181 */ 2182 foreach_in_list(ir_instruction, node, consumer->ir) { 2183 ir_variable *const input_var = node->as_variable(); 2184 2185 if (input_var == NULL || input_var->data.mode != ir_var_shader_in) 2186 continue; 2187 2188 matches.record(NULL, input_var); 2189 } 2190 } 2191 2192 _mesa_hash_table_destroy(consumer_inputs, NULL); 2193 _mesa_hash_table_destroy(consumer_interface_inputs, NULL); 2194 2195 for (unsigned i = 0; i < num_tfeedback_decls; ++i) { 2196 if (!tfeedback_decls[i].is_varying()) 2197 continue; 2198 2199 const tfeedback_candidate *matched_candidate 2200 = tfeedback_decls[i].find_candidate(prog, tfeedback_candidates); 2201 2202 if (matched_candidate == NULL) { 2203 _mesa_hash_table_destroy(tfeedback_candidates, NULL); 2204 return false; 2205 } 2206 2207 if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) { 2208 matched_candidate->toplevel_var->data.is_xfb_only = 1; 2209 matches.record(matched_candidate->toplevel_var, NULL); 2210 } 2211 } 2212 2213 uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0}; 2214 const unsigned slots_used = matches.assign_locations( 2215 prog, components, reserved_slots); 2216 matches.store_locations(); 2217 2218 for (unsigned i = 0; i < num_tfeedback_decls; ++i) { 2219 if (!tfeedback_decls[i].is_varying()) 2220 continue; 2221 2222 if (!tfeedback_decls[i].assign_location(ctx, prog)) { 2223 _mesa_hash_table_destroy(tfeedback_candidates, NULL); 2224 return false; 2225 } 2226 } 2227 _mesa_hash_table_destroy(tfeedback_candidates, NULL); 2228 2229 if (consumer && producer) { 2230 foreach_in_list(ir_instruction, node, consumer->ir) { 2231 ir_variable *const var = node->as_variable(); 2232 2233 if (var && var->data.mode == ir_var_shader_in && 2234 var->data.is_unmatched_generic_inout) { 2235 if (!prog->IsES && prog->data->Version <= 120) { 2236 /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec: 2237 * 2238 * Only those varying variables used (i.e. read) in 2239 * the fragment shader executable must be written to 2240 * by the vertex shader executable; declaring 2241 * superfluous varying variables in a vertex shader is 2242 * permissible. 2243 * 2244 * We interpret this text as meaning that the VS must 2245 * write the variable for the FS to read it. See 2246 * "glsl1-varying read but not written" in piglit. 2247 */ 2248 linker_error(prog, "%s shader varying %s not written " 2249 "by %s shader\n.", 2250 _mesa_shader_stage_to_string(consumer->Stage), 2251 var->name, 2252 _mesa_shader_stage_to_string(producer->Stage)); 2253 } else { 2254 linker_warning(prog, "%s shader varying %s not written " 2255 "by %s shader\n.", 2256 _mesa_shader_stage_to_string(consumer->Stage), 2257 var->name, 2258 _mesa_shader_stage_to_string(producer->Stage)); 2259 } 2260 } 2261 } 2262 2263 /* Now that validation is done its safe to remove unused varyings. As 2264 * we have both a producer and consumer its safe to remove unused 2265 * varyings even if the program is a SSO because the stages are being 2266 * linked together i.e. we have a multi-stage SSO. 2267 */ 2268 remove_unused_shader_inputs_and_outputs(false, producer, 2269 ir_var_shader_out); 2270 remove_unused_shader_inputs_and_outputs(false, consumer, 2271 ir_var_shader_in); 2272 } 2273 2274 if (producer) { 2275 lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_out, 2276 0, producer, disable_varying_packing, 2277 xfb_enabled); 2278 } 2279 2280 if (consumer) { 2281 lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_in, 2282 consumer_vertices, consumer, 2283 disable_varying_packing, xfb_enabled); 2284 } 2285 2286 return true; 2287 } 2288 2289 bool 2290 check_against_output_limit(struct gl_context *ctx, 2291 struct gl_shader_program *prog, 2292 gl_linked_shader *producer, 2293 unsigned num_explicit_locations) 2294 { 2295 unsigned output_vectors = num_explicit_locations; 2296 2297 foreach_in_list(ir_instruction, node, producer->ir) { 2298 ir_variable *const var = node->as_variable(); 2299 2300 if (var && !var->data.explicit_location && 2301 var->data.mode == ir_var_shader_out && 2302 var_counts_against_varying_limit(producer->Stage, var)) { 2303 /* outputs for fragment shader can't be doubles */ 2304 output_vectors += var->type->count_attribute_slots(false); 2305 } 2306 } 2307 2308 assert(producer->Stage != MESA_SHADER_FRAGMENT); 2309 unsigned max_output_components = 2310 ctx->Const.Program[producer->Stage].MaxOutputComponents; 2311 2312 const unsigned output_components = output_vectors * 4; 2313 if (output_components > max_output_components) { 2314 if (ctx->API == API_OPENGLES2 || prog->IsES) 2315 linker_error(prog, "%s shader uses too many output vectors " 2316 "(%u > %u)\n", 2317 _mesa_shader_stage_to_string(producer->Stage), 2318 output_vectors, 2319 max_output_components / 4); 2320 else 2321 linker_error(prog, "%s shader uses too many output components " 2322 "(%u > %u)\n", 2323 _mesa_shader_stage_to_string(producer->Stage), 2324 output_components, 2325 max_output_components); 2326 2327 return false; 2328 } 2329 2330 return true; 2331 } 2332 2333 bool 2334 check_against_input_limit(struct gl_context *ctx, 2335 struct gl_shader_program *prog, 2336 gl_linked_shader *consumer, 2337 unsigned num_explicit_locations) 2338 { 2339 unsigned input_vectors = num_explicit_locations; 2340 2341 foreach_in_list(ir_instruction, node, consumer->ir) { 2342 ir_variable *const var = node->as_variable(); 2343 2344 if (var && !var->data.explicit_location && 2345 var->data.mode == ir_var_shader_in && 2346 var_counts_against_varying_limit(consumer->Stage, var)) { 2347 /* vertex inputs aren't varying counted */ 2348 input_vectors += var->type->count_attribute_slots(false); 2349 } 2350 } 2351 2352 assert(consumer->Stage != MESA_SHADER_VERTEX); 2353 unsigned max_input_components = 2354 ctx->Const.Program[consumer->Stage].MaxInputComponents; 2355 2356 const unsigned input_components = input_vectors * 4; 2357 if (input_components > max_input_components) { 2358 if (ctx->API == API_OPENGLES2 || prog->IsES) 2359 linker_error(prog, "%s shader uses too many input vectors " 2360 "(%u > %u)\n", 2361 _mesa_shader_stage_to_string(consumer->Stage), 2362 input_vectors, 2363 max_input_components / 4); 2364 else 2365 linker_error(prog, "%s shader uses too many input components " 2366 "(%u > %u)\n", 2367 _mesa_shader_stage_to_string(consumer->Stage), 2368 input_components, 2369 max_input_components); 2370 2371 return false; 2372 } 2373 2374 return true; 2375 } 2376