Home | History | Annotate | Download | only in glsl
      1 /*
      2  * Copyright  2012 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * \file link_varyings.cpp
     26  *
     27  * Linker functions related specifically to linking varyings between shader
     28  * stages.
     29  */
     30 
     31 
     32 #include "main/mtypes.h"
     33 #include "glsl_symbol_table.h"
     34 #include "glsl_parser_extras.h"
     35 #include "ir_optimization.h"
     36 #include "linker.h"
     37 #include "link_varyings.h"
     38 #include "main/macros.h"
     39 #include "util/hash_table.h"
     40 #include "program.h"
     41 
     42 
     43 /**
     44  * Get the varying type stripped of the outermost array if we're processing
     45  * a stage whose varyings are arrays indexed by a vertex number (such as
     46  * geometry shader inputs).
     47  */
     48 static const glsl_type *
     49 get_varying_type(const ir_variable *var, gl_shader_stage stage)
     50 {
     51    const glsl_type *type = var->type;
     52 
     53    if (!var->data.patch &&
     54        ((var->data.mode == ir_var_shader_out &&
     55          stage == MESA_SHADER_TESS_CTRL) ||
     56         (var->data.mode == ir_var_shader_in &&
     57          (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL ||
     58           stage == MESA_SHADER_GEOMETRY)))) {
     59       assert(type->is_array());
     60       type = type->fields.array;
     61    }
     62 
     63    return type;
     64 }
     65 
     66 static void
     67 create_xfb_varying_names(void *mem_ctx, const glsl_type *t, char **name,
     68                          size_t name_length, unsigned *count,
     69                          const char *ifc_member_name,
     70                          const glsl_type *ifc_member_t, char ***varying_names)
     71 {
     72    if (t->is_interface()) {
     73       size_t new_length = name_length;
     74 
     75       assert(ifc_member_name && ifc_member_t);
     76       ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", ifc_member_name);
     77 
     78       create_xfb_varying_names(mem_ctx, ifc_member_t, name, new_length, count,
     79                                NULL, NULL, varying_names);
     80    } else if (t->is_record()) {
     81       for (unsigned i = 0; i < t->length; i++) {
     82          const char *field = t->fields.structure[i].name;
     83          size_t new_length = name_length;
     84 
     85          ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field);
     86 
     87          create_xfb_varying_names(mem_ctx, t->fields.structure[i].type, name,
     88                                   new_length, count, NULL, NULL,
     89                                   varying_names);
     90       }
     91    } else if (t->without_array()->is_record() ||
     92               t->without_array()->is_interface() ||
     93               (t->is_array() && t->fields.array->is_array())) {
     94       for (unsigned i = 0; i < t->length; i++) {
     95          size_t new_length = name_length;
     96 
     97          /* Append the subscript to the current variable name */
     98          ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
     99 
    100          create_xfb_varying_names(mem_ctx, t->fields.array, name, new_length,
    101                                   count, ifc_member_name, ifc_member_t,
    102                                   varying_names);
    103       }
    104    } else {
    105       (*varying_names)[(*count)++] = ralloc_strdup(mem_ctx, *name);
    106    }
    107 }
    108 
    109 bool
    110 process_xfb_layout_qualifiers(void *mem_ctx, const gl_linked_shader *sh,
    111                               unsigned *num_tfeedback_decls,
    112                               char ***varying_names)
    113 {
    114    bool has_xfb_qualifiers = false;
    115 
    116    /* We still need to enable transform feedback mode even if xfb_stride is
    117     * only applied to a global out. Also we don't bother to propagate
    118     * xfb_stride to interface block members so this will catch that case also.
    119     */
    120    for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
    121       if (sh->info.TransformFeedback.BufferStride[j]) {
    122          has_xfb_qualifiers = true;
    123       }
    124    }
    125 
    126    foreach_in_list(ir_instruction, node, sh->ir) {
    127       ir_variable *var = node->as_variable();
    128       if (!var || var->data.mode != ir_var_shader_out)
    129          continue;
    130 
    131       /* From the ARB_enhanced_layouts spec:
    132        *
    133        *    "Any shader making any static use (after preprocessing) of any of
    134        *     these *xfb_* qualifiers will cause the shader to be in a
    135        *     transform feedback capturing mode and hence responsible for
    136        *     describing the transform feedback setup.  This mode will capture
    137        *     any output selected by *xfb_offset*, directly or indirectly, to
    138        *     a transform feedback buffer."
    139        */
    140       if (var->data.explicit_xfb_buffer || var->data.explicit_xfb_stride) {
    141          has_xfb_qualifiers = true;
    142       }
    143 
    144       if (var->data.explicit_xfb_offset) {
    145          *num_tfeedback_decls += var->type->varying_count();
    146          has_xfb_qualifiers = true;
    147       }
    148    }
    149 
    150    if (*num_tfeedback_decls == 0)
    151       return has_xfb_qualifiers;
    152 
    153    unsigned i = 0;
    154    *varying_names = ralloc_array(mem_ctx, char *, *num_tfeedback_decls);
    155    foreach_in_list(ir_instruction, node, sh->ir) {
    156       ir_variable *var = node->as_variable();
    157       if (!var || var->data.mode != ir_var_shader_out)
    158          continue;
    159 
    160       if (var->data.explicit_xfb_offset) {
    161          char *name;
    162          const glsl_type *type, *member_type;
    163 
    164          if (var->data.from_named_ifc_block) {
    165             type = var->get_interface_type();
    166             /* Find the member type before it was altered by lowering */
    167             member_type =
    168                type->fields.structure[type->field_index(var->name)].type;
    169             name = ralloc_strdup(NULL, type->without_array()->name);
    170          } else {
    171             type = var->type;
    172             member_type = NULL;
    173             name = ralloc_strdup(NULL, var->name);
    174          }
    175          create_xfb_varying_names(mem_ctx, type, &name, strlen(name), &i,
    176                                   var->name, member_type, varying_names);
    177          ralloc_free(name);
    178       }
    179    }
    180 
    181    assert(i == *num_tfeedback_decls);
    182    return has_xfb_qualifiers;
    183 }
    184 
    185 static bool
    186 anonymous_struct_type_matches(const glsl_type *output_type,
    187                               const glsl_type *to_match)
    188 {
    189     while (output_type->is_array() && to_match->is_array()) {
    190         /* if the lengths at each level don't match fail. */
    191         if (output_type->length != to_match->length)
    192             return false;
    193         output_type = output_type->fields.array;
    194         to_match = to_match->fields.array;
    195     }
    196 
    197     if (output_type->is_array() || to_match->is_array())
    198         return false;
    199     return output_type->is_anonymous() &&
    200            to_match->is_anonymous() &&
    201            to_match->record_compare(output_type);
    202 }
    203 
    204 /**
    205  * Validate the types and qualifiers of an output from one stage against the
    206  * matching input to another stage.
    207  */
    208 static void
    209 cross_validate_types_and_qualifiers(struct gl_shader_program *prog,
    210                                     const ir_variable *input,
    211                                     const ir_variable *output,
    212                                     gl_shader_stage consumer_stage,
    213                                     gl_shader_stage producer_stage)
    214 {
    215    /* Check that the types match between stages.
    216     */
    217    const glsl_type *type_to_match = input->type;
    218 
    219    /* VS -> GS, VS -> TCS, VS -> TES, TES -> GS */
    220    const bool extra_array_level = (producer_stage == MESA_SHADER_VERTEX &&
    221                                    consumer_stage != MESA_SHADER_FRAGMENT) ||
    222                                   consumer_stage == MESA_SHADER_GEOMETRY;
    223    if (extra_array_level) {
    224       assert(type_to_match->is_array());
    225       type_to_match = type_to_match->fields.array;
    226    }
    227 
    228    if (type_to_match != output->type) {
    229       /* There is a bit of a special case for gl_TexCoord.  This
    230        * built-in is unsized by default.  Applications that variable
    231        * access it must redeclare it with a size.  There is some
    232        * language in the GLSL spec that implies the fragment shader
    233        * and vertex shader do not have to agree on this size.  Other
    234        * driver behave this way, and one or two applications seem to
    235        * rely on it.
    236        *
    237        * Neither declaration needs to be modified here because the array
    238        * sizes are fixed later when update_array_sizes is called.
    239        *
    240        * From page 48 (page 54 of the PDF) of the GLSL 1.10 spec:
    241        *
    242        *     "Unlike user-defined varying variables, the built-in
    243        *     varying variables don't have a strict one-to-one
    244        *     correspondence between the vertex language and the
    245        *     fragment language."
    246        */
    247       if (!output->type->is_array() || !is_gl_identifier(output->name)) {
    248          bool anon_matches = anonymous_struct_type_matches(output->type, type_to_match);
    249 
    250          if (!anon_matches) {
    251             linker_error(prog,
    252                          "%s shader output `%s' declared as type `%s', "
    253                          "but %s shader input declared as type `%s'\n",
    254                          _mesa_shader_stage_to_string(producer_stage),
    255                          output->name,
    256                          output->type->name,
    257                          _mesa_shader_stage_to_string(consumer_stage),
    258                          input->type->name);
    259             return;
    260          }
    261       }
    262    }
    263 
    264    /* Check that all of the qualifiers match between stages.
    265     */
    266 
    267    /* According to the OpenGL and OpenGLES GLSL specs, the centroid qualifier
    268     * should match until OpenGL 4.3 and OpenGLES 3.1. The OpenGLES 3.0
    269     * conformance test suite does not verify that the qualifiers must match.
    270     * The deqp test suite expects the opposite (OpenGLES 3.1) behavior for
    271     * OpenGLES 3.0 drivers, so we relax the checking in all cases.
    272     */
    273    if (false /* always skip the centroid check */ &&
    274        prog->data->Version < (prog->IsES ? 310 : 430) &&
    275        input->data.centroid != output->data.centroid) {
    276       linker_error(prog,
    277                    "%s shader output `%s' %s centroid qualifier, "
    278                    "but %s shader input %s centroid qualifier\n",
    279                    _mesa_shader_stage_to_string(producer_stage),
    280                    output->name,
    281                    (output->data.centroid) ? "has" : "lacks",
    282                    _mesa_shader_stage_to_string(consumer_stage),
    283                    (input->data.centroid) ? "has" : "lacks");
    284       return;
    285    }
    286 
    287    if (input->data.sample != output->data.sample) {
    288       linker_error(prog,
    289                    "%s shader output `%s' %s sample qualifier, "
    290                    "but %s shader input %s sample qualifier\n",
    291                    _mesa_shader_stage_to_string(producer_stage),
    292                    output->name,
    293                    (output->data.sample) ? "has" : "lacks",
    294                    _mesa_shader_stage_to_string(consumer_stage),
    295                    (input->data.sample) ? "has" : "lacks");
    296       return;
    297    }
    298 
    299    if (input->data.patch != output->data.patch) {
    300       linker_error(prog,
    301                    "%s shader output `%s' %s patch qualifier, "
    302                    "but %s shader input %s patch qualifier\n",
    303                    _mesa_shader_stage_to_string(producer_stage),
    304                    output->name,
    305                    (output->data.patch) ? "has" : "lacks",
    306                    _mesa_shader_stage_to_string(consumer_stage),
    307                    (input->data.patch) ? "has" : "lacks");
    308       return;
    309    }
    310 
    311    /* The GLSL 4.30 and GLSL ES 3.00 specifications say:
    312     *
    313     *    "As only outputs need be declared with invariant, an output from
    314     *     one shader stage will still match an input of a subsequent stage
    315     *     without the input being declared as invariant."
    316     *
    317     * while GLSL 4.20 says:
    318     *
    319     *    "For variables leaving one shader and coming into another shader,
    320     *     the invariant keyword has to be used in both shaders, or a link
    321     *     error will result."
    322     *
    323     * and GLSL ES 1.00 section 4.6.4 "Invariance and Linking" says:
    324     *
    325     *    "The invariance of varyings that are declared in both the vertex
    326     *     and fragment shaders must match."
    327     */
    328    if (input->data.invariant != output->data.invariant &&
    329        prog->data->Version < (prog->IsES ? 300 : 430)) {
    330       linker_error(prog,
    331                    "%s shader output `%s' %s invariant qualifier, "
    332                    "but %s shader input %s invariant qualifier\n",
    333                    _mesa_shader_stage_to_string(producer_stage),
    334                    output->name,
    335                    (output->data.invariant) ? "has" : "lacks",
    336                    _mesa_shader_stage_to_string(consumer_stage),
    337                    (input->data.invariant) ? "has" : "lacks");
    338       return;
    339    }
    340 
    341    /* GLSL >= 4.40 removes text requiring interpolation qualifiers
    342     * to match cross stage, they must only match within the same stage.
    343     *
    344     * From page 84 (page 90 of the PDF) of the GLSL 4.40 spec:
    345     *
    346     *     "It is a link-time error if, within the same stage, the interpolation
    347     *     qualifiers of variables of the same name do not match.
    348     *
    349     */
    350    if (input->data.interpolation != output->data.interpolation &&
    351        prog->data->Version < 440) {
    352       linker_error(prog,
    353                    "%s shader output `%s' specifies %s "
    354                    "interpolation qualifier, "
    355                    "but %s shader input specifies %s "
    356                    "interpolation qualifier\n",
    357                    _mesa_shader_stage_to_string(producer_stage),
    358                    output->name,
    359                    interpolation_string(output->data.interpolation),
    360                    _mesa_shader_stage_to_string(consumer_stage),
    361                    interpolation_string(input->data.interpolation));
    362       return;
    363    }
    364 }
    365 
    366 /**
    367  * Validate front and back color outputs against single color input
    368  */
    369 static void
    370 cross_validate_front_and_back_color(struct gl_shader_program *prog,
    371                                     const ir_variable *input,
    372                                     const ir_variable *front_color,
    373                                     const ir_variable *back_color,
    374                                     gl_shader_stage consumer_stage,
    375                                     gl_shader_stage producer_stage)
    376 {
    377    if (front_color != NULL && front_color->data.assigned)
    378       cross_validate_types_and_qualifiers(prog, input, front_color,
    379                                           consumer_stage, producer_stage);
    380 
    381    if (back_color != NULL && back_color->data.assigned)
    382       cross_validate_types_and_qualifiers(prog, input, back_color,
    383                                           consumer_stage, producer_stage);
    384 }
    385 
    386 /**
    387  * Validate that outputs from one stage match inputs of another
    388  */
    389 void
    390 cross_validate_outputs_to_inputs(struct gl_shader_program *prog,
    391                                  gl_linked_shader *producer,
    392                                  gl_linked_shader *consumer)
    393 {
    394    glsl_symbol_table parameters;
    395    ir_variable *explicit_locations[MAX_VARYINGS_INCL_PATCH][4] =
    396       { {NULL, NULL} };
    397 
    398    /* Find all shader outputs in the "producer" stage.
    399     */
    400    foreach_in_list(ir_instruction, node, producer->ir) {
    401       ir_variable *const var = node->as_variable();
    402 
    403       if (var == NULL || var->data.mode != ir_var_shader_out)
    404          continue;
    405 
    406       if (!var->data.explicit_location
    407           || var->data.location < VARYING_SLOT_VAR0)
    408          parameters.add_variable(var);
    409       else {
    410          /* User-defined varyings with explicit locations are handled
    411           * differently because they do not need to have matching names.
    412           */
    413          const glsl_type *type = get_varying_type(var, producer->Stage);
    414          unsigned num_elements = type->count_attribute_slots(false);
    415          unsigned idx = var->data.location - VARYING_SLOT_VAR0;
    416          unsigned slot_limit = idx + num_elements;
    417          unsigned last_comp;
    418 
    419          if (type->without_array()->is_record()) {
    420             /* The component qualifier can't be used on structs so just treat
    421              * all component slots as used.
    422              */
    423             last_comp = 4;
    424          } else {
    425             unsigned dmul = type->without_array()->is_64bit() ? 2 : 1;
    426             last_comp = var->data.location_frac +
    427                type->without_array()->vector_elements * dmul;
    428          }
    429 
    430          while (idx < slot_limit) {
    431             unsigned i = var->data.location_frac;
    432             while (i < last_comp) {
    433                if (explicit_locations[idx][i] != NULL) {
    434                   linker_error(prog,
    435                                "%s shader has multiple outputs explicitly "
    436                                "assigned to location %d and component %d\n",
    437                                _mesa_shader_stage_to_string(producer->Stage),
    438                                idx, var->data.location_frac);
    439                   return;
    440                }
    441 
    442                /* Make sure all component at this location have the same type.
    443                 */
    444                for (unsigned j = 0; j < 4; j++) {
    445                   if (explicit_locations[idx][j] &&
    446                       (explicit_locations[idx][j]->type->without_array()
    447                        ->base_type != type->without_array()->base_type)) {
    448                      linker_error(prog,
    449                                   "Varyings sharing the same location must "
    450                                   "have the same underlying numerical type. "
    451                                   "Location %u component %u\n", idx,
    452                                   var->data.location_frac);
    453                      return;
    454                   }
    455                }
    456 
    457                explicit_locations[idx][i] = var;
    458                i++;
    459 
    460                /* We need to do some special handling for doubles as dvec3 and
    461                 * dvec4 consume two consecutive locations. We don't need to
    462                 * worry about components beginning at anything other than 0 as
    463                 * the spec does not allow this for dvec3 and dvec4.
    464                 */
    465                if (i == 4 && last_comp > 4) {
    466                   last_comp = last_comp - 4;
    467                   /* Bump location index and reset the component index */
    468                   idx++;
    469                   i = 0;
    470                }
    471             }
    472             idx++;
    473          }
    474       }
    475    }
    476 
    477 
    478    /* Find all shader inputs in the "consumer" stage.  Any variables that have
    479     * matching outputs already in the symbol table must have the same type and
    480     * qualifiers.
    481     *
    482     * Exception: if the consumer is the geometry shader, then the inputs
    483     * should be arrays and the type of the array element should match the type
    484     * of the corresponding producer output.
    485     */
    486    foreach_in_list(ir_instruction, node, consumer->ir) {
    487       ir_variable *const input = node->as_variable();
    488 
    489       if (input == NULL || input->data.mode != ir_var_shader_in)
    490          continue;
    491 
    492       if (strcmp(input->name, "gl_Color") == 0 && input->data.used) {
    493          const ir_variable *const front_color =
    494             parameters.get_variable("gl_FrontColor");
    495 
    496          const ir_variable *const back_color =
    497             parameters.get_variable("gl_BackColor");
    498 
    499          cross_validate_front_and_back_color(prog, input,
    500                                              front_color, back_color,
    501                                              consumer->Stage, producer->Stage);
    502       } else if (strcmp(input->name, "gl_SecondaryColor") == 0 && input->data.used) {
    503          const ir_variable *const front_color =
    504             parameters.get_variable("gl_FrontSecondaryColor");
    505 
    506          const ir_variable *const back_color =
    507             parameters.get_variable("gl_BackSecondaryColor");
    508 
    509          cross_validate_front_and_back_color(prog, input,
    510                                              front_color, back_color,
    511                                              consumer->Stage, producer->Stage);
    512       } else {
    513          /* The rules for connecting inputs and outputs change in the presence
    514           * of explicit locations.  In this case, we no longer care about the
    515           * names of the variables.  Instead, we care only about the
    516           * explicitly assigned location.
    517           */
    518          ir_variable *output = NULL;
    519          if (input->data.explicit_location
    520              && input->data.location >= VARYING_SLOT_VAR0) {
    521 
    522             const glsl_type *type = get_varying_type(input, consumer->Stage);
    523             unsigned num_elements = type->count_attribute_slots(false);
    524             unsigned idx = input->data.location - VARYING_SLOT_VAR0;
    525             unsigned slot_limit = idx + num_elements;
    526 
    527             while (idx < slot_limit) {
    528                output = explicit_locations[idx][input->data.location_frac];
    529 
    530                if (output == NULL ||
    531                    input->data.location != output->data.location) {
    532                   linker_error(prog,
    533                                "%s shader input `%s' with explicit location "
    534                                "has no matching output\n",
    535                                _mesa_shader_stage_to_string(consumer->Stage),
    536                                input->name);
    537                   break;
    538                }
    539                idx++;
    540             }
    541          } else {
    542             output = parameters.get_variable(input->name);
    543          }
    544 
    545          if (output != NULL) {
    546             /* Interface blocks have their own validation elsewhere so don't
    547              * try validating them here.
    548              */
    549             if (!(input->get_interface_type() &&
    550                   output->get_interface_type()))
    551                cross_validate_types_and_qualifiers(prog, input, output,
    552                                                    consumer->Stage,
    553                                                    producer->Stage);
    554          } else {
    555             /* Check for input vars with unmatched output vars in prev stage
    556              * taking into account that interface blocks could have a matching
    557              * output but with different name, so we ignore them.
    558              */
    559             assert(!input->data.assigned);
    560             if (input->data.used && !input->get_interface_type() &&
    561                 !input->data.explicit_location && !prog->SeparateShader)
    562                linker_error(prog,
    563                             "%s shader input `%s' "
    564                             "has no matching output in the previous stage\n",
    565                             _mesa_shader_stage_to_string(consumer->Stage),
    566                             input->name);
    567          }
    568       }
    569    }
    570 }
    571 
    572 /**
    573  * Demote shader inputs and outputs that are not used in other stages, and
    574  * remove them via dead code elimination.
    575  */
    576 void
    577 remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object,
    578                                         gl_linked_shader *sh,
    579                                         enum ir_variable_mode mode)
    580 {
    581    if (is_separate_shader_object)
    582       return;
    583 
    584    foreach_in_list(ir_instruction, node, sh->ir) {
    585       ir_variable *const var = node->as_variable();
    586 
    587       if (var == NULL || var->data.mode != int(mode))
    588          continue;
    589 
    590       /* A shader 'in' or 'out' variable is only really an input or output if
    591        * its value is used by other shader stages. This will cause the
    592        * variable to have a location assigned.
    593        */
    594       if (var->data.is_unmatched_generic_inout && !var->data.is_xfb_only) {
    595          assert(var->data.mode != ir_var_temporary);
    596 
    597          /* Assign zeros to demoted inputs to allow more optimizations. */
    598          if (var->data.mode == ir_var_shader_in && !var->constant_value)
    599             var->constant_value = ir_constant::zero(var, var->type);
    600 
    601          var->data.mode = ir_var_auto;
    602       }
    603    }
    604 
    605    /* Eliminate code that is now dead due to unused inputs/outputs being
    606     * demoted.
    607     */
    608    while (do_dead_code(sh->ir, false))
    609       ;
    610 
    611 }
    612 
    613 /**
    614  * Initialize this object based on a string that was passed to
    615  * glTransformFeedbackVaryings.
    616  *
    617  * If the input is mal-formed, this call still succeeds, but it sets
    618  * this->var_name to a mal-formed input, so tfeedback_decl::find_output_var()
    619  * will fail to find any matching variable.
    620  */
    621 void
    622 tfeedback_decl::init(struct gl_context *ctx, const void *mem_ctx,
    623                      const char *input)
    624 {
    625    /* We don't have to be pedantic about what is a valid GLSL variable name,
    626     * because any variable with an invalid name can't exist in the IR anyway.
    627     */
    628 
    629    this->location = -1;
    630    this->orig_name = input;
    631    this->lowered_builtin_array_variable = none;
    632    this->skip_components = 0;
    633    this->next_buffer_separator = false;
    634    this->matched_candidate = NULL;
    635    this->stream_id = 0;
    636    this->buffer = 0;
    637    this->offset = 0;
    638 
    639    if (ctx->Extensions.ARB_transform_feedback3) {
    640       /* Parse gl_NextBuffer. */
    641       if (strcmp(input, "gl_NextBuffer") == 0) {
    642          this->next_buffer_separator = true;
    643          return;
    644       }
    645 
    646       /* Parse gl_SkipComponents. */
    647       if (strcmp(input, "gl_SkipComponents1") == 0)
    648          this->skip_components = 1;
    649       else if (strcmp(input, "gl_SkipComponents2") == 0)
    650          this->skip_components = 2;
    651       else if (strcmp(input, "gl_SkipComponents3") == 0)
    652          this->skip_components = 3;
    653       else if (strcmp(input, "gl_SkipComponents4") == 0)
    654          this->skip_components = 4;
    655 
    656       if (this->skip_components)
    657          return;
    658    }
    659 
    660    /* Parse a declaration. */
    661    const char *base_name_end;
    662    long subscript = parse_program_resource_name(input, &base_name_end);
    663    this->var_name = ralloc_strndup(mem_ctx, input, base_name_end - input);
    664    if (this->var_name == NULL) {
    665       _mesa_error_no_memory(__func__);
    666       return;
    667    }
    668 
    669    if (subscript >= 0) {
    670       this->array_subscript = subscript;
    671       this->is_subscripted = true;
    672    } else {
    673       this->is_subscripted = false;
    674    }
    675 
    676    /* For drivers that lower gl_ClipDistance to gl_ClipDistanceMESA, this
    677     * class must behave specially to account for the fact that gl_ClipDistance
    678     * is converted from a float[8] to a vec4[2].
    679     */
    680    if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance &&
    681        strcmp(this->var_name, "gl_ClipDistance") == 0) {
    682       this->lowered_builtin_array_variable = clip_distance;
    683    }
    684    if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance &&
    685        strcmp(this->var_name, "gl_CullDistance") == 0) {
    686       this->lowered_builtin_array_variable = cull_distance;
    687    }
    688 
    689    if (ctx->Const.LowerTessLevel &&
    690        (strcmp(this->var_name, "gl_TessLevelOuter") == 0))
    691       this->lowered_builtin_array_variable = tess_level_outer;
    692    if (ctx->Const.LowerTessLevel &&
    693        (strcmp(this->var_name, "gl_TessLevelInner") == 0))
    694       this->lowered_builtin_array_variable = tess_level_inner;
    695 }
    696 
    697 
    698 /**
    699  * Determine whether two tfeedback_decl objects refer to the same variable and
    700  * array index (if applicable).
    701  */
    702 bool
    703 tfeedback_decl::is_same(const tfeedback_decl &x, const tfeedback_decl &y)
    704 {
    705    assert(x.is_varying() && y.is_varying());
    706 
    707    if (strcmp(x.var_name, y.var_name) != 0)
    708       return false;
    709    if (x.is_subscripted != y.is_subscripted)
    710       return false;
    711    if (x.is_subscripted && x.array_subscript != y.array_subscript)
    712       return false;
    713    return true;
    714 }
    715 
    716 
    717 /**
    718  * Assign a location and stream ID for this tfeedback_decl object based on the
    719  * transform feedback candidate found by find_candidate.
    720  *
    721  * If an error occurs, the error is reported through linker_error() and false
    722  * is returned.
    723  */
    724 bool
    725 tfeedback_decl::assign_location(struct gl_context *ctx,
    726                                 struct gl_shader_program *prog)
    727 {
    728    assert(this->is_varying());
    729 
    730    unsigned fine_location
    731       = this->matched_candidate->toplevel_var->data.location * 4
    732       + this->matched_candidate->toplevel_var->data.location_frac
    733       + this->matched_candidate->offset;
    734    const unsigned dmul =
    735       this->matched_candidate->type->without_array()->is_64bit() ? 2 : 1;
    736 
    737    if (this->matched_candidate->type->is_array()) {
    738       /* Array variable */
    739       const unsigned matrix_cols =
    740          this->matched_candidate->type->fields.array->matrix_columns;
    741       const unsigned vector_elements =
    742          this->matched_candidate->type->fields.array->vector_elements;
    743       unsigned actual_array_size;
    744       switch (this->lowered_builtin_array_variable) {
    745       case clip_distance:
    746          actual_array_size = prog->LastClipDistanceArraySize;
    747          break;
    748       case cull_distance:
    749          actual_array_size = prog->LastCullDistanceArraySize;
    750          break;
    751       case tess_level_outer:
    752          actual_array_size = 4;
    753          break;
    754       case tess_level_inner:
    755          actual_array_size = 2;
    756          break;
    757       case none:
    758       default:
    759          actual_array_size = this->matched_candidate->type->array_size();
    760          break;
    761       }
    762 
    763       if (this->is_subscripted) {
    764          /* Check array bounds. */
    765          if (this->array_subscript >= actual_array_size) {
    766             linker_error(prog, "Transform feedback varying %s has index "
    767                          "%i, but the array size is %u.",
    768                          this->orig_name, this->array_subscript,
    769                          actual_array_size);
    770             return false;
    771          }
    772          unsigned array_elem_size = this->lowered_builtin_array_variable ?
    773             1 : vector_elements * matrix_cols * dmul;
    774          fine_location += array_elem_size * this->array_subscript;
    775          this->size = 1;
    776       } else {
    777          this->size = actual_array_size;
    778       }
    779       this->vector_elements = vector_elements;
    780       this->matrix_columns = matrix_cols;
    781       if (this->lowered_builtin_array_variable)
    782          this->type = GL_FLOAT;
    783       else
    784          this->type = this->matched_candidate->type->fields.array->gl_type;
    785    } else {
    786       /* Regular variable (scalar, vector, or matrix) */
    787       if (this->is_subscripted) {
    788          linker_error(prog, "Transform feedback varying %s requested, "
    789                       "but %s is not an array.",
    790                       this->orig_name, this->var_name);
    791          return false;
    792       }
    793       this->size = 1;
    794       this->vector_elements = this->matched_candidate->type->vector_elements;
    795       this->matrix_columns = this->matched_candidate->type->matrix_columns;
    796       this->type = this->matched_candidate->type->gl_type;
    797    }
    798    this->location = fine_location / 4;
    799    this->location_frac = fine_location % 4;
    800 
    801    /* From GL_EXT_transform_feedback:
    802     *   A program will fail to link if:
    803     *
    804     *   * the total number of components to capture in any varying
    805     *     variable in <varyings> is greater than the constant
    806     *     MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT and the
    807     *     buffer mode is SEPARATE_ATTRIBS_EXT;
    808     */
    809    if (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
    810        this->num_components() >
    811        ctx->Const.MaxTransformFeedbackSeparateComponents) {
    812       linker_error(prog, "Transform feedback varying %s exceeds "
    813                    "MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS.",
    814                    this->orig_name);
    815       return false;
    816    }
    817 
    818    /* Only transform feedback varyings can be assigned to non-zero streams,
    819     * so assign the stream id here.
    820     */
    821    this->stream_id = this->matched_candidate->toplevel_var->data.stream;
    822 
    823    unsigned array_offset = this->array_subscript * 4 * dmul;
    824    unsigned struct_offset = this->matched_candidate->offset * 4 * dmul;
    825    this->buffer = this->matched_candidate->toplevel_var->data.xfb_buffer;
    826    this->offset = this->matched_candidate->toplevel_var->data.offset +
    827       array_offset + struct_offset;
    828 
    829    return true;
    830 }
    831 
    832 
    833 unsigned
    834 tfeedback_decl::get_num_outputs() const
    835 {
    836    if (!this->is_varying()) {
    837       return 0;
    838    }
    839    return (this->num_components() + this->location_frac + 3)/4;
    840 }
    841 
    842 
    843 /**
    844  * Update gl_transform_feedback_info to reflect this tfeedback_decl.
    845  *
    846  * If an error occurs, the error is reported through linker_error() and false
    847  * is returned.
    848  */
    849 bool
    850 tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog,
    851                       struct gl_transform_feedback_info *info,
    852                       unsigned buffer, unsigned buffer_index,
    853                       const unsigned max_outputs, bool *explicit_stride,
    854                       bool has_xfb_qualifiers) const
    855 {
    856    unsigned xfb_offset = 0;
    857    unsigned size = this->size;
    858    /* Handle gl_SkipComponents. */
    859    if (this->skip_components) {
    860       info->Buffers[buffer].Stride += this->skip_components;
    861       size = this->skip_components;
    862       goto store_varying;
    863    }
    864 
    865    if (this->next_buffer_separator) {
    866       size = 0;
    867       goto store_varying;
    868    }
    869 
    870    if (has_xfb_qualifiers) {
    871       xfb_offset = this->offset / 4;
    872    } else {
    873       xfb_offset = info->Buffers[buffer].Stride;
    874    }
    875    info->Varyings[info->NumVarying].Offset = xfb_offset * 4;
    876 
    877    {
    878       unsigned location = this->location;
    879       unsigned location_frac = this->location_frac;
    880       unsigned num_components = this->num_components();
    881       while (num_components > 0) {
    882          unsigned output_size = MIN2(num_components, 4 - location_frac);
    883          assert((info->NumOutputs == 0 && max_outputs == 0) ||
    884                 info->NumOutputs < max_outputs);
    885 
    886          /* From the ARB_enhanced_layouts spec:
    887           *
    888           *    "If such a block member or variable is not written during a shader
    889           *    invocation, the buffer contents at the assigned offset will be
    890           *    undefined.  Even if there are no static writes to a variable or
    891           *    member that is assigned a transform feedback offset, the space is
    892           *    still allocated in the buffer and still affects the stride."
    893           */
    894          if (this->is_varying_written()) {
    895             info->Outputs[info->NumOutputs].ComponentOffset = location_frac;
    896             info->Outputs[info->NumOutputs].OutputRegister = location;
    897             info->Outputs[info->NumOutputs].NumComponents = output_size;
    898             info->Outputs[info->NumOutputs].StreamId = stream_id;
    899             info->Outputs[info->NumOutputs].OutputBuffer = buffer;
    900             info->Outputs[info->NumOutputs].DstOffset = xfb_offset;
    901             ++info->NumOutputs;
    902          }
    903          info->Buffers[buffer].Stream = this->stream_id;
    904          xfb_offset += output_size;
    905 
    906          num_components -= output_size;
    907          location++;
    908          location_frac = 0;
    909       }
    910    }
    911 
    912    if (explicit_stride && explicit_stride[buffer]) {
    913       if (this->is_64bit() && info->Buffers[buffer].Stride % 2) {
    914          linker_error(prog, "invalid qualifier xfb_stride=%d must be a "
    915                       "multiple of 8 as its applied to a type that is or "
    916                       "contains a double.",
    917                       info->Buffers[buffer].Stride * 4);
    918          return false;
    919       }
    920 
    921       if ((this->offset / 4) / info->Buffers[buffer].Stride !=
    922           (xfb_offset - 1) / info->Buffers[buffer].Stride) {
    923          linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for "
    924                       "buffer (%d)", xfb_offset * 4,
    925                       info->Buffers[buffer].Stride * 4, buffer);
    926          return false;
    927       }
    928    } else {
    929       info->Buffers[buffer].Stride = xfb_offset;
    930    }
    931 
    932    /* From GL_EXT_transform_feedback:
    933     *   A program will fail to link if:
    934     *
    935     *     * the total number of components to capture is greater than
    936     *       the constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT
    937     *       and the buffer mode is INTERLEAVED_ATTRIBS_EXT.
    938     *
    939     * From GL_ARB_enhanced_layouts:
    940     *
    941     *   "The resulting stride (implicit or explicit) must be less than or
    942     *   equal to the implementation-dependent constant
    943     *   gl_MaxTransformFeedbackInterleavedComponents."
    944     */
    945    if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS ||
    946         has_xfb_qualifiers) &&
    947        info->Buffers[buffer].Stride >
    948        ctx->Const.MaxTransformFeedbackInterleavedComponents) {
    949       linker_error(prog, "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS "
    950                    "limit has been exceeded.");
    951       return false;
    952    }
    953 
    954  store_varying:
    955    info->Varyings[info->NumVarying].Name = ralloc_strdup(prog,
    956                                                          this->orig_name);
    957    info->Varyings[info->NumVarying].Type = this->type;
    958    info->Varyings[info->NumVarying].Size = size;
    959    info->Varyings[info->NumVarying].BufferIndex = buffer_index;
    960    info->NumVarying++;
    961    info->Buffers[buffer].NumVaryings++;
    962 
    963    return true;
    964 }
    965 
    966 
    967 const tfeedback_candidate *
    968 tfeedback_decl::find_candidate(gl_shader_program *prog,
    969                                hash_table *tfeedback_candidates)
    970 {
    971    const char *name = this->var_name;
    972    switch (this->lowered_builtin_array_variable) {
    973    case none:
    974       name = this->var_name;
    975       break;
    976    case clip_distance:
    977       name = "gl_ClipDistanceMESA";
    978       break;
    979    case cull_distance:
    980       name = "gl_CullDistanceMESA";
    981       break;
    982    case tess_level_outer:
    983       name = "gl_TessLevelOuterMESA";
    984       break;
    985    case tess_level_inner:
    986       name = "gl_TessLevelInnerMESA";
    987       break;
    988    }
    989    hash_entry *entry = _mesa_hash_table_search(tfeedback_candidates, name);
    990 
    991    this->matched_candidate = entry ?
    992          (const tfeedback_candidate *) entry->data : NULL;
    993 
    994    if (!this->matched_candidate) {
    995       /* From GL_EXT_transform_feedback:
    996        *   A program will fail to link if:
    997        *
    998        *   * any variable name specified in the <varyings> array is not
    999        *     declared as an output in the geometry shader (if present) or
   1000        *     the vertex shader (if no geometry shader is present);
   1001        */
   1002       linker_error(prog, "Transform feedback varying %s undeclared.",
   1003                    this->orig_name);
   1004    }
   1005 
   1006    return this->matched_candidate;
   1007 }
   1008 
   1009 
   1010 /**
   1011  * Parse all the transform feedback declarations that were passed to
   1012  * glTransformFeedbackVaryings() and store them in tfeedback_decl objects.
   1013  *
   1014  * If an error occurs, the error is reported through linker_error() and false
   1015  * is returned.
   1016  */
   1017 bool
   1018 parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog,
   1019                       const void *mem_ctx, unsigned num_names,
   1020                       char **varying_names, tfeedback_decl *decls)
   1021 {
   1022    for (unsigned i = 0; i < num_names; ++i) {
   1023       decls[i].init(ctx, mem_ctx, varying_names[i]);
   1024 
   1025       if (!decls[i].is_varying())
   1026          continue;
   1027 
   1028       /* From GL_EXT_transform_feedback:
   1029        *   A program will fail to link if:
   1030        *
   1031        *   * any two entries in the <varyings> array specify the same varying
   1032        *     variable;
   1033        *
   1034        * We interpret this to mean "any two entries in the <varyings> array
   1035        * specify the same varying variable and array index", since transform
   1036        * feedback of arrays would be useless otherwise.
   1037        */
   1038       for (unsigned j = 0; j < i; ++j) {
   1039          if (!decls[j].is_varying())
   1040             continue;
   1041 
   1042          if (tfeedback_decl::is_same(decls[i], decls[j])) {
   1043             linker_error(prog, "Transform feedback varying %s specified "
   1044                          "more than once.", varying_names[i]);
   1045             return false;
   1046          }
   1047       }
   1048    }
   1049    return true;
   1050 }
   1051 
   1052 
   1053 static int
   1054 cmp_xfb_offset(const void * x_generic, const void * y_generic)
   1055 {
   1056    tfeedback_decl *x = (tfeedback_decl *) x_generic;
   1057    tfeedback_decl *y = (tfeedback_decl *) y_generic;
   1058 
   1059    if (x->get_buffer() != y->get_buffer())
   1060       return x->get_buffer() - y->get_buffer();
   1061    return x->get_offset() - y->get_offset();
   1062 }
   1063 
   1064 /**
   1065  * Store transform feedback location assignments into
   1066  * prog->sh.LinkedTransformFeedback based on the data stored in
   1067  * tfeedback_decls.
   1068  *
   1069  * If an error occurs, the error is reported through linker_error() and false
   1070  * is returned.
   1071  */
   1072 bool
   1073 store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
   1074                      unsigned num_tfeedback_decls,
   1075                      tfeedback_decl *tfeedback_decls, bool has_xfb_qualifiers)
   1076 {
   1077    /* Make sure MaxTransformFeedbackBuffers is less than 32 so the bitmask for
   1078     * tracking the number of buffers doesn't overflow.
   1079     */
   1080    assert(ctx->Const.MaxTransformFeedbackBuffers < 32);
   1081 
   1082    bool separate_attribs_mode =
   1083       prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS;
   1084 
   1085    struct gl_program *xfb_prog = prog->xfb_program;
   1086    xfb_prog->sh.LinkedTransformFeedback =
   1087       rzalloc(xfb_prog, struct gl_transform_feedback_info);
   1088 
   1089    /* The xfb_offset qualifier does not have to be used in increasing order
   1090     * however some drivers expect to receive the list of transform feedback
   1091     * declarations in order so sort it now for convenience.
   1092     */
   1093    if (has_xfb_qualifiers)
   1094       qsort(tfeedback_decls, num_tfeedback_decls, sizeof(*tfeedback_decls),
   1095             cmp_xfb_offset);
   1096 
   1097    xfb_prog->sh.LinkedTransformFeedback->Varyings =
   1098       rzalloc_array(xfb_prog, struct gl_transform_feedback_varying_info,
   1099                     num_tfeedback_decls);
   1100 
   1101    unsigned num_outputs = 0;
   1102    for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
   1103       if (tfeedback_decls[i].is_varying_written())
   1104          num_outputs += tfeedback_decls[i].get_num_outputs();
   1105    }
   1106 
   1107    xfb_prog->sh.LinkedTransformFeedback->Outputs =
   1108       rzalloc_array(xfb_prog, struct gl_transform_feedback_output,
   1109                     num_outputs);
   1110 
   1111    unsigned num_buffers = 0;
   1112    unsigned buffers = 0;
   1113 
   1114    if (!has_xfb_qualifiers && separate_attribs_mode) {
   1115       /* GL_SEPARATE_ATTRIBS */
   1116       for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
   1117          if (!tfeedback_decls[i].store(ctx, prog,
   1118                                        xfb_prog->sh.LinkedTransformFeedback,
   1119                                        num_buffers, num_buffers, num_outputs,
   1120                                        NULL, has_xfb_qualifiers))
   1121             return false;
   1122 
   1123          buffers |= 1 << num_buffers;
   1124          num_buffers++;
   1125       }
   1126    }
   1127    else {
   1128       /* GL_INVERLEAVED_ATTRIBS */
   1129       int buffer_stream_id = -1;
   1130       unsigned buffer =
   1131          num_tfeedback_decls ? tfeedback_decls[0].get_buffer() : 0;
   1132       bool explicit_stride[MAX_FEEDBACK_BUFFERS] = { false };
   1133 
   1134       /* Apply any xfb_stride global qualifiers */
   1135       if (has_xfb_qualifiers) {
   1136          for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
   1137             if (prog->TransformFeedback.BufferStride[j]) {
   1138                buffers |= 1 << j;
   1139                explicit_stride[j] = true;
   1140                xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride =
   1141                   prog->TransformFeedback.BufferStride[j] / 4;
   1142             }
   1143          }
   1144       }
   1145 
   1146       for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
   1147          if (has_xfb_qualifiers &&
   1148              buffer != tfeedback_decls[i].get_buffer()) {
   1149             /* we have moved to the next buffer so reset stream id */
   1150             buffer_stream_id = -1;
   1151             num_buffers++;
   1152          }
   1153 
   1154          if (tfeedback_decls[i].is_next_buffer_separator()) {
   1155             if (!tfeedback_decls[i].store(ctx, prog,
   1156                                           xfb_prog->sh.LinkedTransformFeedback,
   1157                                           buffer, num_buffers, num_outputs,
   1158                                           explicit_stride, has_xfb_qualifiers))
   1159                return false;
   1160             num_buffers++;
   1161             buffer_stream_id = -1;
   1162             continue;
   1163          } else if (tfeedback_decls[i].is_varying()) {
   1164             if (buffer_stream_id == -1)  {
   1165                /* First varying writing to this buffer: remember its stream */
   1166                buffer_stream_id = (int) tfeedback_decls[i].get_stream_id();
   1167             } else if (buffer_stream_id !=
   1168                        (int) tfeedback_decls[i].get_stream_id()) {
   1169                /* Varying writes to the same buffer from a different stream */
   1170                linker_error(prog,
   1171                             "Transform feedback can't capture varyings belonging "
   1172                             "to different vertex streams in a single buffer. "
   1173                             "Varying %s writes to buffer from stream %u, other "
   1174                             "varyings in the same buffer write from stream %u.",
   1175                             tfeedback_decls[i].name(),
   1176                             tfeedback_decls[i].get_stream_id(),
   1177                             buffer_stream_id);
   1178                return false;
   1179             }
   1180          }
   1181 
   1182          if (has_xfb_qualifiers) {
   1183             buffer = tfeedback_decls[i].get_buffer();
   1184          } else {
   1185             buffer = num_buffers;
   1186          }
   1187          buffers |= 1 << buffer;
   1188 
   1189          if (!tfeedback_decls[i].store(ctx, prog,
   1190                                        xfb_prog->sh.LinkedTransformFeedback,
   1191                                        buffer, num_buffers, num_outputs,
   1192                                        explicit_stride, has_xfb_qualifiers))
   1193             return false;
   1194       }
   1195    }
   1196 
   1197    assert(xfb_prog->sh.LinkedTransformFeedback->NumOutputs == num_outputs);
   1198 
   1199    xfb_prog->sh.LinkedTransformFeedback->ActiveBuffers = buffers;
   1200    return true;
   1201 }
   1202 
   1203 namespace {
   1204 
   1205 /**
   1206  * Data structure recording the relationship between outputs of one shader
   1207  * stage (the "producer") and inputs of another (the "consumer").
   1208  */
   1209 class varying_matches
   1210 {
   1211 public:
   1212    varying_matches(bool disable_varying_packing, bool xfb_enabled,
   1213                    gl_shader_stage producer_stage,
   1214                    gl_shader_stage consumer_stage);
   1215    ~varying_matches();
   1216    void record(ir_variable *producer_var, ir_variable *consumer_var);
   1217    unsigned assign_locations(struct gl_shader_program *prog,
   1218                              uint8_t *components,
   1219                              uint64_t reserved_slots);
   1220    void store_locations() const;
   1221 
   1222 private:
   1223    bool is_varying_packing_safe(const glsl_type *type,
   1224                                 const ir_variable *var);
   1225 
   1226    /**
   1227     * If true, this driver disables varying packing, so all varyings need to
   1228     * be aligned on slot boundaries, and take up a number of slots equal to
   1229     * their number of matrix columns times their array size.
   1230     *
   1231     * Packing may also be disabled because our current packing method is not
   1232     * safe in SSO or versions of OpenGL where interpolation qualifiers are not
   1233     * guaranteed to match across stages.
   1234     */
   1235    const bool disable_varying_packing;
   1236 
   1237    /**
   1238     * If true, this driver has transform feedback enabled. The transform
   1239     * feedback code requires at least some packing be done even when varying
   1240     * packing is disabled, fortunately where transform feedback requires
   1241     * packing it's safe to override the disabled setting. See
   1242     * is_varying_packing_safe().
   1243     */
   1244    const bool xfb_enabled;
   1245 
   1246    /**
   1247     * Enum representing the order in which varyings are packed within a
   1248     * packing class.
   1249     *
   1250     * Currently we pack vec4's first, then vec2's, then scalar values, then
   1251     * vec3's.  This order ensures that the only vectors that are at risk of
   1252     * having to be "double parked" (split between two adjacent varying slots)
   1253     * are the vec3's.
   1254     */
   1255    enum packing_order_enum {
   1256       PACKING_ORDER_VEC4,
   1257       PACKING_ORDER_VEC2,
   1258       PACKING_ORDER_SCALAR,
   1259       PACKING_ORDER_VEC3,
   1260    };
   1261 
   1262    static unsigned compute_packing_class(const ir_variable *var);
   1263    static packing_order_enum compute_packing_order(const ir_variable *var);
   1264    static int match_comparator(const void *x_generic, const void *y_generic);
   1265    static int xfb_comparator(const void *x_generic, const void *y_generic);
   1266 
   1267    /**
   1268     * Structure recording the relationship between a single producer output
   1269     * and a single consumer input.
   1270     */
   1271    struct match {
   1272       /**
   1273        * Packing class for this varying, computed by compute_packing_class().
   1274        */
   1275       unsigned packing_class;
   1276 
   1277       /**
   1278        * Packing order for this varying, computed by compute_packing_order().
   1279        */
   1280       packing_order_enum packing_order;
   1281       unsigned num_components;
   1282 
   1283       /**
   1284        * The output variable in the producer stage.
   1285        */
   1286       ir_variable *producer_var;
   1287 
   1288       /**
   1289        * The input variable in the consumer stage.
   1290        */
   1291       ir_variable *consumer_var;
   1292 
   1293       /**
   1294        * The location which has been assigned for this varying.  This is
   1295        * expressed in multiples of a float, with the first generic varying
   1296        * (i.e. the one referred to by VARYING_SLOT_VAR0) represented by the
   1297        * value 0.
   1298        */
   1299       unsigned generic_location;
   1300    } *matches;
   1301 
   1302    /**
   1303     * The number of elements in the \c matches array that are currently in
   1304     * use.
   1305     */
   1306    unsigned num_matches;
   1307 
   1308    /**
   1309     * The number of elements that were set aside for the \c matches array when
   1310     * it was allocated.
   1311     */
   1312    unsigned matches_capacity;
   1313 
   1314    gl_shader_stage producer_stage;
   1315    gl_shader_stage consumer_stage;
   1316 };
   1317 
   1318 } /* anonymous namespace */
   1319 
   1320 varying_matches::varying_matches(bool disable_varying_packing,
   1321                                  bool xfb_enabled,
   1322                                  gl_shader_stage producer_stage,
   1323                                  gl_shader_stage consumer_stage)
   1324    : disable_varying_packing(disable_varying_packing),
   1325      xfb_enabled(xfb_enabled),
   1326      producer_stage(producer_stage),
   1327      consumer_stage(consumer_stage)
   1328 {
   1329    /* Note: this initial capacity is rather arbitrarily chosen to be large
   1330     * enough for many cases without wasting an unreasonable amount of space.
   1331     * varying_matches::record() will resize the array if there are more than
   1332     * this number of varyings.
   1333     */
   1334    this->matches_capacity = 8;
   1335    this->matches = (match *)
   1336       malloc(sizeof(*this->matches) * this->matches_capacity);
   1337    this->num_matches = 0;
   1338 }
   1339 
   1340 
   1341 varying_matches::~varying_matches()
   1342 {
   1343    free(this->matches);
   1344 }
   1345 
   1346 
   1347 /**
   1348  * Packing is always safe on individual arrays, structures, and matrices. It
   1349  * is also safe if the varying is only used for transform feedback.
   1350  */
   1351 bool
   1352 varying_matches::is_varying_packing_safe(const glsl_type *type,
   1353                                          const ir_variable *var)
   1354 {
   1355    if (consumer_stage == MESA_SHADER_TESS_EVAL ||
   1356        consumer_stage == MESA_SHADER_TESS_CTRL ||
   1357        producer_stage == MESA_SHADER_TESS_CTRL)
   1358       return false;
   1359 
   1360    return xfb_enabled && (type->is_array() || type->is_record() ||
   1361                           type->is_matrix() || var->data.is_xfb_only);
   1362 }
   1363 
   1364 
   1365 /**
   1366  * Record the given producer/consumer variable pair in the list of variables
   1367  * that should later be assigned locations.
   1368  *
   1369  * It is permissible for \c consumer_var to be NULL (this happens if a
   1370  * variable is output by the producer and consumed by transform feedback, but
   1371  * not consumed by the consumer).
   1372  *
   1373  * If \c producer_var has already been paired up with a consumer_var, or
   1374  * producer_var is part of fixed pipeline functionality (and hence already has
   1375  * a location assigned), this function has no effect.
   1376  *
   1377  * Note: as a side effect this function may change the interpolation type of
   1378  * \c producer_var, but only when the change couldn't possibly affect
   1379  * rendering.
   1380  */
   1381 void
   1382 varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
   1383 {
   1384    assert(producer_var != NULL || consumer_var != NULL);
   1385 
   1386    if ((producer_var && (!producer_var->data.is_unmatched_generic_inout ||
   1387        producer_var->data.explicit_location)) ||
   1388        (consumer_var && (!consumer_var->data.is_unmatched_generic_inout ||
   1389        consumer_var->data.explicit_location))) {
   1390       /* Either a location already exists for this variable (since it is part
   1391        * of fixed functionality), or it has already been recorded as part of a
   1392        * previous match.
   1393        */
   1394       return;
   1395    }
   1396 
   1397    bool needs_flat_qualifier = consumer_var == NULL &&
   1398       (producer_var->type->contains_integer() ||
   1399        producer_var->type->contains_double());
   1400 
   1401    if (!disable_varying_packing &&
   1402        (needs_flat_qualifier ||
   1403         (consumer_stage != -1 && consumer_stage != MESA_SHADER_FRAGMENT))) {
   1404       /* Since this varying is not being consumed by the fragment shader, its
   1405        * interpolation type varying cannot possibly affect rendering.
   1406        * Also, this variable is non-flat and is (or contains) an integer
   1407        * or a double.
   1408        * If the consumer stage is unknown, don't modify the interpolation
   1409        * type as it could affect rendering later with separate shaders.
   1410        *
   1411        * lower_packed_varyings requires all integer varyings to flat,
   1412        * regardless of where they appear.  We can trivially satisfy that
   1413        * requirement by changing the interpolation type to flat here.
   1414        */
   1415       if (producer_var) {
   1416          producer_var->data.centroid = false;
   1417          producer_var->data.sample = false;
   1418          producer_var->data.interpolation = INTERP_MODE_FLAT;
   1419       }
   1420 
   1421       if (consumer_var) {
   1422          consumer_var->data.centroid = false;
   1423          consumer_var->data.sample = false;
   1424          consumer_var->data.interpolation = INTERP_MODE_FLAT;
   1425       }
   1426    }
   1427 
   1428    if (this->num_matches == this->matches_capacity) {
   1429       this->matches_capacity *= 2;
   1430       this->matches = (match *)
   1431          realloc(this->matches,
   1432                  sizeof(*this->matches) * this->matches_capacity);
   1433    }
   1434 
   1435    /* We must use the consumer to compute the packing class because in GL4.4+
   1436     * there is no guarantee interpolation qualifiers will match across stages.
   1437     *
   1438     * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec:
   1439     *
   1440     *    "The type and presence of interpolation qualifiers of variables with
   1441     *    the same name declared in all linked shaders for the same cross-stage
   1442     *    interface must match, otherwise the link command will fail.
   1443     *
   1444     *    When comparing an output from one stage to an input of a subsequent
   1445     *    stage, the input and output don't match if their interpolation
   1446     *    qualifiers (or lack thereof) are not the same."
   1447     *
   1448     * This text was also in at least revison 7 of the 4.40 spec but is no
   1449     * longer in revision 9 and not in the 4.50 spec.
   1450     */
   1451    const ir_variable *const var = (consumer_var != NULL)
   1452       ? consumer_var : producer_var;
   1453    const gl_shader_stage stage = (consumer_var != NULL)
   1454       ? consumer_stage : producer_stage;
   1455    const glsl_type *type = get_varying_type(var, stage);
   1456 
   1457    this->matches[this->num_matches].packing_class
   1458       = this->compute_packing_class(var);
   1459    this->matches[this->num_matches].packing_order
   1460       = this->compute_packing_order(var);
   1461    if (this->disable_varying_packing && !is_varying_packing_safe(type, var)) {
   1462       unsigned slots = type->count_attribute_slots(false);
   1463       this->matches[this->num_matches].num_components = slots * 4;
   1464    } else {
   1465       this->matches[this->num_matches].num_components
   1466          = type->component_slots();
   1467    }
   1468    this->matches[this->num_matches].producer_var = producer_var;
   1469    this->matches[this->num_matches].consumer_var = consumer_var;
   1470    this->num_matches++;
   1471    if (producer_var)
   1472       producer_var->data.is_unmatched_generic_inout = 0;
   1473    if (consumer_var)
   1474       consumer_var->data.is_unmatched_generic_inout = 0;
   1475 }
   1476 
   1477 
   1478 /**
   1479  * Choose locations for all of the variable matches that were previously
   1480  * passed to varying_matches::record().
   1481  */
   1482 unsigned
   1483 varying_matches::assign_locations(struct gl_shader_program *prog,
   1484                                   uint8_t *components,
   1485                                   uint64_t reserved_slots)
   1486 {
   1487    /* If packing has been disabled then we cannot safely sort the varyings by
   1488     * class as it may mean we are using a version of OpenGL where
   1489     * interpolation qualifiers are not guaranteed to be matching across
   1490     * shaders, sorting in this case could result in mismatching shader
   1491     * interfaces.
   1492     * When packing is disabled the sort orders varyings used by transform
   1493     * feedback first, but also depends on *undefined behaviour* of qsort to
   1494     * reverse the order of the varyings. See: xfb_comparator().
   1495     */
   1496    if (!this->disable_varying_packing) {
   1497       /* Sort varying matches into an order that makes them easy to pack. */
   1498       qsort(this->matches, this->num_matches, sizeof(*this->matches),
   1499             &varying_matches::match_comparator);
   1500    } else {
   1501       /* Only sort varyings that are only used by transform feedback. */
   1502       qsort(this->matches, this->num_matches, sizeof(*this->matches),
   1503             &varying_matches::xfb_comparator);
   1504    }
   1505 
   1506    unsigned generic_location = 0;
   1507    unsigned generic_patch_location = MAX_VARYING*4;
   1508    bool previous_var_xfb_only = false;
   1509 
   1510    for (unsigned i = 0; i < this->num_matches; i++) {
   1511       unsigned *location = &generic_location;
   1512 
   1513       const ir_variable *var;
   1514       const glsl_type *type;
   1515       bool is_vertex_input = false;
   1516       if (matches[i].consumer_var) {
   1517          var = matches[i].consumer_var;
   1518          type = get_varying_type(var, consumer_stage);
   1519          if (consumer_stage == MESA_SHADER_VERTEX)
   1520             is_vertex_input = true;
   1521       } else {
   1522          var = matches[i].producer_var;
   1523          type = get_varying_type(var, producer_stage);
   1524       }
   1525 
   1526       if (var->data.patch)
   1527          location = &generic_patch_location;
   1528 
   1529       /* Advance to the next slot if this varying has a different packing
   1530        * class than the previous one, and we're not already on a slot
   1531        * boundary.
   1532        *
   1533        * Also advance to the next slot if packing is disabled. This makes sure
   1534        * we don't assign varyings the same locations which is possible
   1535        * because we still pack individual arrays, records and matrices even
   1536        * when packing is disabled. Note we don't advance to the next slot if
   1537        * we can pack varyings together that are only used for transform
   1538        * feedback.
   1539        */
   1540       if ((this->disable_varying_packing &&
   1541            !(previous_var_xfb_only && var->data.is_xfb_only)) ||
   1542           (i > 0 && this->matches[i - 1].packing_class
   1543           != this->matches[i].packing_class )) {
   1544          *location = ALIGN(*location, 4);
   1545       }
   1546 
   1547       previous_var_xfb_only = var->data.is_xfb_only;
   1548 
   1549       /* The number of components taken up by this variable. For vertex shader
   1550        * inputs, we use the number of slots * 4, as they have different
   1551        * counting rules.
   1552        */
   1553       unsigned num_components = is_vertex_input ?
   1554          type->count_attribute_slots(is_vertex_input) * 4 :
   1555          this->matches[i].num_components;
   1556 
   1557       /* The last slot for this variable, inclusive. */
   1558       unsigned slot_end = *location + num_components - 1;
   1559 
   1560       /* FIXME: We could be smarter in the below code and loop back over
   1561        * trying to fill any locations that we skipped because we couldn't pack
   1562        * the varying between an explicit location. For now just let the user
   1563        * hit the linking error if we run out of room and suggest they use
   1564        * explicit locations.
   1565        */
   1566       while (slot_end < MAX_VARYING * 4u) {
   1567          const unsigned slots = (slot_end / 4u) - (*location / 4u) + 1;
   1568          const uint64_t slot_mask = ((1ull << slots) - 1) << (*location / 4u);
   1569 
   1570          assert(slots > 0);
   1571          if (reserved_slots & slot_mask) {
   1572             *location = ALIGN(*location + 1, 4);
   1573             slot_end = *location + num_components - 1;
   1574             continue;
   1575          }
   1576 
   1577          break;
   1578       }
   1579 
   1580       if (!var->data.patch && slot_end >= MAX_VARYING * 4u) {
   1581          linker_error(prog, "insufficient contiguous locations available for "
   1582                       "%s it is possible an array or struct could not be "
   1583                       "packed between varyings with explicit locations. Try "
   1584                       "using an explicit location for arrays and structs.",
   1585                       var->name);
   1586       }
   1587 
   1588       if (slot_end < MAX_VARYINGS_INCL_PATCH * 4u) {
   1589          for (unsigned j = *location / 4u; j < slot_end / 4u; j++)
   1590             components[j] = 4;
   1591          components[slot_end / 4u] = (slot_end & 3) + 1;
   1592       }
   1593 
   1594       this->matches[i].generic_location = *location;
   1595 
   1596       *location = slot_end + 1;
   1597    }
   1598 
   1599    return (generic_location + 3) / 4;
   1600 }
   1601 
   1602 
   1603 /**
   1604  * Update the producer and consumer shaders to reflect the locations
   1605  * assignments that were made by varying_matches::assign_locations().
   1606  */
   1607 void
   1608 varying_matches::store_locations() const
   1609 {
   1610    for (unsigned i = 0; i < this->num_matches; i++) {
   1611       ir_variable *producer_var = this->matches[i].producer_var;
   1612       ir_variable *consumer_var = this->matches[i].consumer_var;
   1613       unsigned generic_location = this->matches[i].generic_location;
   1614       unsigned slot = generic_location / 4;
   1615       unsigned offset = generic_location % 4;
   1616 
   1617       if (producer_var) {
   1618          producer_var->data.location = VARYING_SLOT_VAR0 + slot;
   1619          producer_var->data.location_frac = offset;
   1620       }
   1621 
   1622       if (consumer_var) {
   1623          assert(consumer_var->data.location == -1);
   1624          consumer_var->data.location = VARYING_SLOT_VAR0 + slot;
   1625          consumer_var->data.location_frac = offset;
   1626       }
   1627    }
   1628 }
   1629 
   1630 
   1631 /**
   1632  * Compute the "packing class" of the given varying.  This is an unsigned
   1633  * integer with the property that two variables in the same packing class can
   1634  * be safely backed into the same vec4.
   1635  */
   1636 unsigned
   1637 varying_matches::compute_packing_class(const ir_variable *var)
   1638 {
   1639    /* Without help from the back-end, there is no way to pack together
   1640     * variables with different interpolation types, because
   1641     * lower_packed_varyings must choose exactly one interpolation type for
   1642     * each packed varying it creates.
   1643     *
   1644     * However, we can safely pack together floats, ints, and uints, because:
   1645     *
   1646     * - varyings of base type "int" and "uint" must use the "flat"
   1647     *   interpolation type, which can only occur in GLSL 1.30 and above.
   1648     *
   1649     * - On platforms that support GLSL 1.30 and above, lower_packed_varyings
   1650     *   can store flat floats as ints without losing any information (using
   1651     *   the ir_unop_bitcast_* opcodes).
   1652     *
   1653     * Therefore, the packing class depends only on the interpolation type.
   1654     */
   1655    unsigned packing_class = var->data.centroid | (var->data.sample << 1) |
   1656                             (var->data.patch << 2);
   1657    packing_class *= 4;
   1658    packing_class += var->is_interpolation_flat()
   1659       ? unsigned(INTERP_MODE_FLAT) : var->data.interpolation;
   1660    return packing_class;
   1661 }
   1662 
   1663 
   1664 /**
   1665  * Compute the "packing order" of the given varying.  This is a sort key we
   1666  * use to determine when to attempt to pack the given varying relative to
   1667  * other varyings in the same packing class.
   1668  */
   1669 varying_matches::packing_order_enum
   1670 varying_matches::compute_packing_order(const ir_variable *var)
   1671 {
   1672    const glsl_type *element_type = var->type;
   1673 
   1674    while (element_type->base_type == GLSL_TYPE_ARRAY) {
   1675       element_type = element_type->fields.array;
   1676    }
   1677 
   1678    switch (element_type->component_slots() % 4) {
   1679    case 1: return PACKING_ORDER_SCALAR;
   1680    case 2: return PACKING_ORDER_VEC2;
   1681    case 3: return PACKING_ORDER_VEC3;
   1682    case 0: return PACKING_ORDER_VEC4;
   1683    default:
   1684       assert(!"Unexpected value of vector_elements");
   1685       return PACKING_ORDER_VEC4;
   1686    }
   1687 }
   1688 
   1689 
   1690 /**
   1691  * Comparison function passed to qsort() to sort varyings by packing_class and
   1692  * then by packing_order.
   1693  */
   1694 int
   1695 varying_matches::match_comparator(const void *x_generic, const void *y_generic)
   1696 {
   1697    const match *x = (const match *) x_generic;
   1698    const match *y = (const match *) y_generic;
   1699 
   1700    if (x->packing_class != y->packing_class)
   1701       return x->packing_class - y->packing_class;
   1702    return x->packing_order - y->packing_order;
   1703 }
   1704 
   1705 
   1706 /**
   1707  * Comparison function passed to qsort() to sort varyings used only by
   1708  * transform feedback when packing of other varyings is disabled.
   1709  */
   1710 int
   1711 varying_matches::xfb_comparator(const void *x_generic, const void *y_generic)
   1712 {
   1713    const match *x = (const match *) x_generic;
   1714 
   1715    if (x->producer_var != NULL && x->producer_var->data.is_xfb_only)
   1716          return match_comparator(x_generic, y_generic);
   1717 
   1718    /* FIXME: When the comparator returns 0 it means the elements being
   1719     * compared are equivalent. However the qsort documentation says:
   1720     *
   1721     *    "The order of equivalent elements is undefined."
   1722     *
   1723     * In practice the sort ends up reversing the order of the varyings which
   1724     * means locations are also assigned in this reversed order and happens to
   1725     * be what we want. This is also whats happening in
   1726     * varying_matches::match_comparator().
   1727     */
   1728    return 0;
   1729 }
   1730 
   1731 
   1732 /**
   1733  * Is the given variable a varying variable to be counted against the
   1734  * limit in ctx->Const.MaxVarying?
   1735  * This includes variables such as texcoords, colors and generic
   1736  * varyings, but excludes variables such as gl_FrontFacing and gl_FragCoord.
   1737  */
   1738 static bool
   1739 var_counts_against_varying_limit(gl_shader_stage stage, const ir_variable *var)
   1740 {
   1741    /* Only fragment shaders will take a varying variable as an input */
   1742    if (stage == MESA_SHADER_FRAGMENT &&
   1743        var->data.mode == ir_var_shader_in) {
   1744       switch (var->data.location) {
   1745       case VARYING_SLOT_POS:
   1746       case VARYING_SLOT_FACE:
   1747       case VARYING_SLOT_PNTC:
   1748          return false;
   1749       default:
   1750          return true;
   1751       }
   1752    }
   1753    return false;
   1754 }
   1755 
   1756 
   1757 /**
   1758  * Visitor class that generates tfeedback_candidate structs describing all
   1759  * possible targets of transform feedback.
   1760  *
   1761  * tfeedback_candidate structs are stored in the hash table
   1762  * tfeedback_candidates, which is passed to the constructor.  This hash table
   1763  * maps varying names to instances of the tfeedback_candidate struct.
   1764  */
   1765 class tfeedback_candidate_generator : public program_resource_visitor
   1766 {
   1767 public:
   1768    tfeedback_candidate_generator(void *mem_ctx,
   1769                                  hash_table *tfeedback_candidates)
   1770       : mem_ctx(mem_ctx),
   1771         tfeedback_candidates(tfeedback_candidates),
   1772         toplevel_var(NULL),
   1773         varying_floats(0)
   1774    {
   1775    }
   1776 
   1777    void process(ir_variable *var)
   1778    {
   1779       /* All named varying interface blocks should be flattened by now */
   1780       assert(!var->is_interface_instance());
   1781 
   1782       this->toplevel_var = var;
   1783       this->varying_floats = 0;
   1784       program_resource_visitor::process(var);
   1785    }
   1786 
   1787 private:
   1788    virtual void visit_field(const glsl_type *type, const char *name,
   1789                             bool /* row_major */,
   1790                             const glsl_type * /* record_type */,
   1791                             const enum glsl_interface_packing,
   1792                             bool /* last_field */)
   1793    {
   1794       assert(!type->without_array()->is_record());
   1795       assert(!type->without_array()->is_interface());
   1796 
   1797       tfeedback_candidate *candidate
   1798          = rzalloc(this->mem_ctx, tfeedback_candidate);
   1799       candidate->toplevel_var = this->toplevel_var;
   1800       candidate->type = type;
   1801       candidate->offset = this->varying_floats;
   1802       _mesa_hash_table_insert(this->tfeedback_candidates,
   1803                               ralloc_strdup(this->mem_ctx, name),
   1804                               candidate);
   1805       this->varying_floats += type->component_slots();
   1806    }
   1807 
   1808    /**
   1809     * Memory context used to allocate hash table keys and values.
   1810     */
   1811    void * const mem_ctx;
   1812 
   1813    /**
   1814     * Hash table in which tfeedback_candidate objects should be stored.
   1815     */
   1816    hash_table * const tfeedback_candidates;
   1817 
   1818    /**
   1819     * Pointer to the toplevel variable that is being traversed.
   1820     */
   1821    ir_variable *toplevel_var;
   1822 
   1823    /**
   1824     * Total number of varying floats that have been visited so far.  This is
   1825     * used to determine the offset to each varying within the toplevel
   1826     * variable.
   1827     */
   1828    unsigned varying_floats;
   1829 };
   1830 
   1831 
   1832 namespace linker {
   1833 
   1834 void
   1835 populate_consumer_input_sets(void *mem_ctx, exec_list *ir,
   1836                              hash_table *consumer_inputs,
   1837                              hash_table *consumer_interface_inputs,
   1838                              ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
   1839 {
   1840    memset(consumer_inputs_with_locations,
   1841           0,
   1842           sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_TESS_MAX);
   1843 
   1844    foreach_in_list(ir_instruction, node, ir) {
   1845       ir_variable *const input_var = node->as_variable();
   1846 
   1847       if (input_var != NULL && input_var->data.mode == ir_var_shader_in) {
   1848          /* All interface blocks should have been lowered by this point */
   1849          assert(!input_var->type->is_interface());
   1850 
   1851          if (input_var->data.explicit_location) {
   1852             /* assign_varying_locations only cares about finding the
   1853              * ir_variable at the start of a contiguous location block.
   1854              *
   1855              *     - For !producer, consumer_inputs_with_locations isn't used.
   1856              *
   1857              *     - For !consumer, consumer_inputs_with_locations is empty.
   1858              *
   1859              * For consumer && producer, if you were trying to set some
   1860              * ir_variable to the middle of a location block on the other side
   1861              * of producer/consumer, cross_validate_outputs_to_inputs() should
   1862              * be link-erroring due to either type mismatch or location
   1863              * overlaps.  If the variables do match up, then they've got a
   1864              * matching data.location and you only looked at
   1865              * consumer_inputs_with_locations[var->data.location], not any
   1866              * following entries for the array/structure.
   1867              */
   1868             consumer_inputs_with_locations[input_var->data.location] =
   1869                input_var;
   1870          } else if (input_var->get_interface_type() != NULL) {
   1871             char *const iface_field_name =
   1872                ralloc_asprintf(mem_ctx, "%s.%s",
   1873                   input_var->get_interface_type()->without_array()->name,
   1874                   input_var->name);
   1875             _mesa_hash_table_insert(consumer_interface_inputs,
   1876                                     iface_field_name, input_var);
   1877          } else {
   1878             _mesa_hash_table_insert(consumer_inputs,
   1879                                     ralloc_strdup(mem_ctx, input_var->name),
   1880                                     input_var);
   1881          }
   1882       }
   1883    }
   1884 }
   1885 
   1886 /**
   1887  * Find a variable from the consumer that "matches" the specified variable
   1888  *
   1889  * This function only finds inputs with names that match.  There is no
   1890  * validation (here) that the types, etc. are compatible.
   1891  */
   1892 ir_variable *
   1893 get_matching_input(void *mem_ctx,
   1894                    const ir_variable *output_var,
   1895                    hash_table *consumer_inputs,
   1896                    hash_table *consumer_interface_inputs,
   1897                    ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
   1898 {
   1899    ir_variable *input_var;
   1900 
   1901    if (output_var->data.explicit_location) {
   1902       input_var = consumer_inputs_with_locations[output_var->data.location];
   1903    } else if (output_var->get_interface_type() != NULL) {
   1904       char *const iface_field_name =
   1905          ralloc_asprintf(mem_ctx, "%s.%s",
   1906             output_var->get_interface_type()->without_array()->name,
   1907             output_var->name);
   1908       hash_entry *entry = _mesa_hash_table_search(consumer_interface_inputs, iface_field_name);
   1909       input_var = entry ? (ir_variable *) entry->data : NULL;
   1910    } else {
   1911       hash_entry *entry = _mesa_hash_table_search(consumer_inputs, output_var->name);
   1912       input_var = entry ? (ir_variable *) entry->data : NULL;
   1913    }
   1914 
   1915    return (input_var == NULL || input_var->data.mode != ir_var_shader_in)
   1916       ? NULL : input_var;
   1917 }
   1918 
   1919 }
   1920 
   1921 static int
   1922 io_variable_cmp(const void *_a, const void *_b)
   1923 {
   1924    const ir_variable *const a = *(const ir_variable **) _a;
   1925    const ir_variable *const b = *(const ir_variable **) _b;
   1926 
   1927    if (a->data.explicit_location && b->data.explicit_location)
   1928       return b->data.location - a->data.location;
   1929 
   1930    if (a->data.explicit_location && !b->data.explicit_location)
   1931       return 1;
   1932 
   1933    if (!a->data.explicit_location && b->data.explicit_location)
   1934       return -1;
   1935 
   1936    return -strcmp(a->name, b->name);
   1937 }
   1938 
   1939 /**
   1940  * Sort the shader IO variables into canonical order
   1941  */
   1942 static void
   1943 canonicalize_shader_io(exec_list *ir, enum ir_variable_mode io_mode)
   1944 {
   1945    ir_variable *var_table[MAX_PROGRAM_OUTPUTS * 4];
   1946    unsigned num_variables = 0;
   1947 
   1948    foreach_in_list(ir_instruction, node, ir) {
   1949       ir_variable *const var = node->as_variable();
   1950 
   1951       if (var == NULL || var->data.mode != io_mode)
   1952          continue;
   1953 
   1954       /* If we have already encountered more I/O variables that could
   1955        * successfully link, bail.
   1956        */
   1957       if (num_variables == ARRAY_SIZE(var_table))
   1958          return;
   1959 
   1960       var_table[num_variables++] = var;
   1961    }
   1962 
   1963    if (num_variables == 0)
   1964       return;
   1965 
   1966    /* Sort the list in reverse order (io_variable_cmp handles this).  Later
   1967     * we're going to push the variables on to the IR list as a stack, so we
   1968     * want the last variable (in canonical order) to be first in the list.
   1969     */
   1970    qsort(var_table, num_variables, sizeof(var_table[0]), io_variable_cmp);
   1971 
   1972    /* Remove the variable from it's current location in the IR, and put it at
   1973     * the front.
   1974     */
   1975    for (unsigned i = 0; i < num_variables; i++) {
   1976       var_table[i]->remove();
   1977       ir->push_head(var_table[i]);
   1978    }
   1979 }
   1980 
   1981 /**
   1982  * Generate a bitfield map of the explicit locations for shader varyings.
   1983  *
   1984  * Note: For Tessellation shaders we are sitting right on the limits of the
   1985  * 64 bit map. Per-vertex and per-patch both have separate location domains
   1986  * with a max of MAX_VARYING.
   1987  */
   1988 uint64_t
   1989 reserved_varying_slot(struct gl_linked_shader *stage,
   1990                       ir_variable_mode io_mode)
   1991 {
   1992    assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out);
   1993    /* Avoid an overflow of the returned value */
   1994    assert(MAX_VARYINGS_INCL_PATCH <= 64);
   1995 
   1996    uint64_t slots = 0;
   1997    int var_slot;
   1998 
   1999    if (!stage)
   2000       return slots;
   2001 
   2002    foreach_in_list(ir_instruction, node, stage->ir) {
   2003       ir_variable *const var = node->as_variable();
   2004 
   2005       if (var == NULL || var->data.mode != io_mode ||
   2006           !var->data.explicit_location ||
   2007           var->data.location < VARYING_SLOT_VAR0)
   2008          continue;
   2009 
   2010       var_slot = var->data.location - VARYING_SLOT_VAR0;
   2011 
   2012       unsigned num_elements = get_varying_type(var, stage->Stage)
   2013          ->count_attribute_slots(stage->Stage == MESA_SHADER_VERTEX);
   2014       for (unsigned i = 0; i < num_elements; i++) {
   2015          if (var_slot >= 0 && var_slot < MAX_VARYINGS_INCL_PATCH)
   2016             slots |= UINT64_C(1) << var_slot;
   2017          var_slot += 1;
   2018       }
   2019    }
   2020 
   2021    return slots;
   2022 }
   2023 
   2024 
   2025 /**
   2026  * Assign locations for all variables that are produced in one pipeline stage
   2027  * (the "producer") and consumed in the next stage (the "consumer").
   2028  *
   2029  * Variables produced by the producer may also be consumed by transform
   2030  * feedback.
   2031  *
   2032  * \param num_tfeedback_decls is the number of declarations indicating
   2033  *        variables that may be consumed by transform feedback.
   2034  *
   2035  * \param tfeedback_decls is a pointer to an array of tfeedback_decl objects
   2036  *        representing the result of parsing the strings passed to
   2037  *        glTransformFeedbackVaryings().  assign_location() will be called for
   2038  *        each of these objects that matches one of the outputs of the
   2039  *        producer.
   2040  *
   2041  * When num_tfeedback_decls is nonzero, it is permissible for the consumer to
   2042  * be NULL.  In this case, varying locations are assigned solely based on the
   2043  * requirements of transform feedback.
   2044  */
   2045 bool
   2046 assign_varying_locations(struct gl_context *ctx,
   2047                          void *mem_ctx,
   2048                          struct gl_shader_program *prog,
   2049                          gl_linked_shader *producer,
   2050                          gl_linked_shader *consumer,
   2051                          unsigned num_tfeedback_decls,
   2052                          tfeedback_decl *tfeedback_decls,
   2053                          const uint64_t reserved_slots)
   2054 {
   2055    /* Tessellation shaders treat inputs and outputs as shared memory and can
   2056     * access inputs and outputs of other invocations.
   2057     * Therefore, they can't be lowered to temps easily (and definitely not
   2058     * efficiently).
   2059     */
   2060    bool unpackable_tess =
   2061       (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) ||
   2062       (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) ||
   2063       (producer && producer->Stage == MESA_SHADER_TESS_CTRL);
   2064 
   2065    /* Transform feedback code assumes varying arrays are packed, so if the
   2066     * driver has disabled varying packing, make sure to at least enable
   2067     * packing required by transform feedback.
   2068     */
   2069    bool xfb_enabled =
   2070       ctx->Extensions.EXT_transform_feedback && !unpackable_tess;
   2071 
   2072    /* Disable packing on outward facing interfaces for SSO because in ES we
   2073     * need to retain the unpacked varying information for draw time
   2074     * validation.
   2075     *
   2076     * Packing is still enabled on individual arrays, structs, and matrices as
   2077     * these are required by the transform feedback code and it is still safe
   2078     * to do so. We also enable packing when a varying is only used for
   2079     * transform feedback and its not a SSO.
   2080     */
   2081    bool disable_varying_packing =
   2082       ctx->Const.DisableVaryingPacking || unpackable_tess;
   2083    if (prog->SeparateShader && (producer == NULL || consumer == NULL))
   2084       disable_varying_packing = true;
   2085 
   2086    varying_matches matches(disable_varying_packing, xfb_enabled,
   2087                            producer ? producer->Stage : (gl_shader_stage)-1,
   2088                            consumer ? consumer->Stage : (gl_shader_stage)-1);
   2089    hash_table *tfeedback_candidates =
   2090          _mesa_hash_table_create(NULL, _mesa_key_hash_string,
   2091                                  _mesa_key_string_equal);
   2092    hash_table *consumer_inputs =
   2093          _mesa_hash_table_create(NULL, _mesa_key_hash_string,
   2094                                  _mesa_key_string_equal);
   2095    hash_table *consumer_interface_inputs =
   2096          _mesa_hash_table_create(NULL, _mesa_key_hash_string,
   2097                                  _mesa_key_string_equal);
   2098    ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = {
   2099       NULL,
   2100    };
   2101 
   2102    unsigned consumer_vertices = 0;
   2103    if (consumer && consumer->Stage == MESA_SHADER_GEOMETRY)
   2104       consumer_vertices = prog->Geom.VerticesIn;
   2105 
   2106    /* Operate in a total of four passes.
   2107     *
   2108     * 1. Sort inputs / outputs into a canonical order.  This is necessary so
   2109     *    that inputs / outputs of separable shaders will be assigned
   2110     *    predictable locations regardless of the order in which declarations
   2111     *    appeared in the shader source.
   2112     *
   2113     * 2. Assign locations for any matching inputs and outputs.
   2114     *
   2115     * 3. Mark output variables in the producer that do not have locations as
   2116     *    not being outputs.  This lets the optimizer eliminate them.
   2117     *
   2118     * 4. Mark input variables in the consumer that do not have locations as
   2119     *    not being inputs.  This lets the optimizer eliminate them.
   2120     */
   2121    if (consumer)
   2122       canonicalize_shader_io(consumer->ir, ir_var_shader_in);
   2123 
   2124    if (producer)
   2125       canonicalize_shader_io(producer->ir, ir_var_shader_out);
   2126 
   2127    if (consumer)
   2128       linker::populate_consumer_input_sets(mem_ctx, consumer->ir,
   2129                                            consumer_inputs,
   2130                                            consumer_interface_inputs,
   2131                                            consumer_inputs_with_locations);
   2132 
   2133    if (producer) {
   2134       foreach_in_list(ir_instruction, node, producer->ir) {
   2135          ir_variable *const output_var = node->as_variable();
   2136 
   2137          if (output_var == NULL || output_var->data.mode != ir_var_shader_out)
   2138             continue;
   2139 
   2140          /* Only geometry shaders can use non-zero streams */
   2141          assert(output_var->data.stream == 0 ||
   2142                 (output_var->data.stream < MAX_VERTEX_STREAMS &&
   2143                  producer->Stage == MESA_SHADER_GEOMETRY));
   2144 
   2145          if (num_tfeedback_decls > 0) {
   2146             tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates);
   2147             g.process(output_var);
   2148          }
   2149 
   2150          ir_variable *const input_var =
   2151             linker::get_matching_input(mem_ctx, output_var, consumer_inputs,
   2152                                        consumer_interface_inputs,
   2153                                        consumer_inputs_with_locations);
   2154 
   2155          /* If a matching input variable was found, add this output (and the
   2156           * input) to the set.  If this is a separable program and there is no
   2157           * consumer stage, add the output.
   2158           *
   2159           * Always add TCS outputs. They are shared by all invocations
   2160           * within a patch and can be used as shared memory.
   2161           */
   2162          if (input_var || (prog->SeparateShader && consumer == NULL) ||
   2163              producer->Stage == MESA_SHADER_TESS_CTRL) {
   2164             matches.record(output_var, input_var);
   2165          }
   2166 
   2167          /* Only stream 0 outputs can be consumed in the next stage */
   2168          if (input_var && output_var->data.stream != 0) {
   2169             linker_error(prog, "output %s is assigned to stream=%d but "
   2170                          "is linked to an input, which requires stream=0",
   2171                          output_var->name, output_var->data.stream);
   2172             return false;
   2173          }
   2174       }
   2175    } else {
   2176       /* If there's no producer stage, then this must be a separable program.
   2177        * For example, we may have a program that has just a fragment shader.
   2178        * Later this program will be used with some arbitrary vertex (or
   2179        * geometry) shader program.  This means that locations must be assigned
   2180        * for all the inputs.
   2181        */
   2182       foreach_in_list(ir_instruction, node, consumer->ir) {
   2183          ir_variable *const input_var = node->as_variable();
   2184 
   2185          if (input_var == NULL || input_var->data.mode != ir_var_shader_in)
   2186             continue;
   2187 
   2188          matches.record(NULL, input_var);
   2189       }
   2190    }
   2191 
   2192    _mesa_hash_table_destroy(consumer_inputs, NULL);
   2193    _mesa_hash_table_destroy(consumer_interface_inputs, NULL);
   2194 
   2195    for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
   2196       if (!tfeedback_decls[i].is_varying())
   2197          continue;
   2198 
   2199       const tfeedback_candidate *matched_candidate
   2200          = tfeedback_decls[i].find_candidate(prog, tfeedback_candidates);
   2201 
   2202       if (matched_candidate == NULL) {
   2203          _mesa_hash_table_destroy(tfeedback_candidates, NULL);
   2204          return false;
   2205       }
   2206 
   2207       if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) {
   2208          matched_candidate->toplevel_var->data.is_xfb_only = 1;
   2209          matches.record(matched_candidate->toplevel_var, NULL);
   2210       }
   2211    }
   2212 
   2213    uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0};
   2214    const unsigned slots_used = matches.assign_locations(
   2215          prog, components, reserved_slots);
   2216    matches.store_locations();
   2217 
   2218    for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
   2219       if (!tfeedback_decls[i].is_varying())
   2220          continue;
   2221 
   2222       if (!tfeedback_decls[i].assign_location(ctx, prog)) {
   2223          _mesa_hash_table_destroy(tfeedback_candidates, NULL);
   2224          return false;
   2225       }
   2226    }
   2227    _mesa_hash_table_destroy(tfeedback_candidates, NULL);
   2228 
   2229    if (consumer && producer) {
   2230       foreach_in_list(ir_instruction, node, consumer->ir) {
   2231          ir_variable *const var = node->as_variable();
   2232 
   2233          if (var && var->data.mode == ir_var_shader_in &&
   2234              var->data.is_unmatched_generic_inout) {
   2235             if (!prog->IsES && prog->data->Version <= 120) {
   2236                /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec:
   2237                 *
   2238                 *     Only those varying variables used (i.e. read) in
   2239                 *     the fragment shader executable must be written to
   2240                 *     by the vertex shader executable; declaring
   2241                 *     superfluous varying variables in a vertex shader is
   2242                 *     permissible.
   2243                 *
   2244                 * We interpret this text as meaning that the VS must
   2245                 * write the variable for the FS to read it.  See
   2246                 * "glsl1-varying read but not written" in piglit.
   2247                 */
   2248                linker_error(prog, "%s shader varying %s not written "
   2249                             "by %s shader\n.",
   2250                             _mesa_shader_stage_to_string(consumer->Stage),
   2251                             var->name,
   2252                             _mesa_shader_stage_to_string(producer->Stage));
   2253             } else {
   2254                linker_warning(prog, "%s shader varying %s not written "
   2255                               "by %s shader\n.",
   2256                               _mesa_shader_stage_to_string(consumer->Stage),
   2257                               var->name,
   2258                               _mesa_shader_stage_to_string(producer->Stage));
   2259             }
   2260          }
   2261       }
   2262 
   2263       /* Now that validation is done its safe to remove unused varyings. As
   2264        * we have both a producer and consumer its safe to remove unused
   2265        * varyings even if the program is a SSO because the stages are being
   2266        * linked together i.e. we have a multi-stage SSO.
   2267        */
   2268       remove_unused_shader_inputs_and_outputs(false, producer,
   2269                                               ir_var_shader_out);
   2270       remove_unused_shader_inputs_and_outputs(false, consumer,
   2271                                               ir_var_shader_in);
   2272    }
   2273 
   2274    if (producer) {
   2275       lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_out,
   2276                             0, producer, disable_varying_packing,
   2277                             xfb_enabled);
   2278    }
   2279 
   2280    if (consumer) {
   2281       lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_in,
   2282                             consumer_vertices, consumer,
   2283                             disable_varying_packing, xfb_enabled);
   2284    }
   2285 
   2286    return true;
   2287 }
   2288 
   2289 bool
   2290 check_against_output_limit(struct gl_context *ctx,
   2291                            struct gl_shader_program *prog,
   2292                            gl_linked_shader *producer,
   2293                            unsigned num_explicit_locations)
   2294 {
   2295    unsigned output_vectors = num_explicit_locations;
   2296 
   2297    foreach_in_list(ir_instruction, node, producer->ir) {
   2298       ir_variable *const var = node->as_variable();
   2299 
   2300       if (var && !var->data.explicit_location &&
   2301           var->data.mode == ir_var_shader_out &&
   2302           var_counts_against_varying_limit(producer->Stage, var)) {
   2303          /* outputs for fragment shader can't be doubles */
   2304          output_vectors += var->type->count_attribute_slots(false);
   2305       }
   2306    }
   2307 
   2308    assert(producer->Stage != MESA_SHADER_FRAGMENT);
   2309    unsigned max_output_components =
   2310       ctx->Const.Program[producer->Stage].MaxOutputComponents;
   2311 
   2312    const unsigned output_components = output_vectors * 4;
   2313    if (output_components > max_output_components) {
   2314       if (ctx->API == API_OPENGLES2 || prog->IsES)
   2315          linker_error(prog, "%s shader uses too many output vectors "
   2316                       "(%u > %u)\n",
   2317                       _mesa_shader_stage_to_string(producer->Stage),
   2318                       output_vectors,
   2319                       max_output_components / 4);
   2320       else
   2321          linker_error(prog, "%s shader uses too many output components "
   2322                       "(%u > %u)\n",
   2323                       _mesa_shader_stage_to_string(producer->Stage),
   2324                       output_components,
   2325                       max_output_components);
   2326 
   2327       return false;
   2328    }
   2329 
   2330    return true;
   2331 }
   2332 
   2333 bool
   2334 check_against_input_limit(struct gl_context *ctx,
   2335                           struct gl_shader_program *prog,
   2336                           gl_linked_shader *consumer,
   2337                           unsigned num_explicit_locations)
   2338 {
   2339    unsigned input_vectors = num_explicit_locations;
   2340 
   2341    foreach_in_list(ir_instruction, node, consumer->ir) {
   2342       ir_variable *const var = node->as_variable();
   2343 
   2344       if (var && !var->data.explicit_location &&
   2345           var->data.mode == ir_var_shader_in &&
   2346           var_counts_against_varying_limit(consumer->Stage, var)) {
   2347          /* vertex inputs aren't varying counted */
   2348          input_vectors += var->type->count_attribute_slots(false);
   2349       }
   2350    }
   2351 
   2352    assert(consumer->Stage != MESA_SHADER_VERTEX);
   2353    unsigned max_input_components =
   2354       ctx->Const.Program[consumer->Stage].MaxInputComponents;
   2355 
   2356    const unsigned input_components = input_vectors * 4;
   2357    if (input_components > max_input_components) {
   2358       if (ctx->API == API_OPENGLES2 || prog->IsES)
   2359          linker_error(prog, "%s shader uses too many input vectors "
   2360                       "(%u > %u)\n",
   2361                       _mesa_shader_stage_to_string(consumer->Stage),
   2362                       input_vectors,
   2363                       max_input_components / 4);
   2364       else
   2365          linker_error(prog, "%s shader uses too many input components "
   2366                       "(%u > %u)\n",
   2367                       _mesa_shader_stage_to_string(consumer->Stage),
   2368                       input_components,
   2369                       max_input_components);
   2370 
   2371       return false;
   2372    }
   2373 
   2374    return true;
   2375 }
   2376