Home | History | Annotate | Download | only in hlsl
      1 //
      2 // Copyright (C) 2017-2018 Google, Inc.
      3 // Copyright (C) 2017 LunarG, Inc.
      4 //
      5 // All rights reserved.
      6 //
      7 // Redistribution and use in source and binary forms, with or without
      8 // modification, are permitted provided that the following conditions
      9 // are met:
     10 //
     11 //    Redistributions of source code must retain the above copyright
     12 //    notice, this list of conditions and the following disclaimer.
     13 //
     14 //    Redistributions in binary form must reproduce the above
     15 //    copyright notice, this list of conditions and the following
     16 //    disclaimer in the documentation and/or other materials provided
     17 //    with the distribution.
     18 //
     19 //    Neither the name of 3Dlabs Inc. Ltd. nor the names of its
     20 //    contributors may be used to endorse or promote products derived
     21 //    from this software without specific prior written permission.
     22 //
     23 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     24 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     25 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
     26 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
     27 // COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     28 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     29 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     30 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     31 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     32 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     33 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     34 // POSSIBILITY OF SUCH DAMAGE.
     35 //
     36 
     37 #include "hlslParseHelper.h"
     38 #include "hlslScanContext.h"
     39 #include "hlslGrammar.h"
     40 #include "hlslAttributes.h"
     41 
     42 #include "../glslang/MachineIndependent/Scan.h"
     43 #include "../glslang/MachineIndependent/preprocessor/PpContext.h"
     44 
     45 #include "../glslang/OSDependent/osinclude.h"
     46 
     47 #include <algorithm>
     48 #include <functional>
     49 #include <cctype>
     50 #include <array>
     51 #include <set>
     52 
     53 namespace glslang {
     54 
     55 HlslParseContext::HlslParseContext(TSymbolTable& symbolTable, TIntermediate& interm, bool parsingBuiltins,
     56                                    int version, EProfile profile, const SpvVersion& spvVersion, EShLanguage language,
     57                                    TInfoSink& infoSink,
     58                                    const TString sourceEntryPointName,
     59                                    bool forwardCompatible, EShMessages messages) :
     60     TParseContextBase(symbolTable, interm, parsingBuiltins, version, profile, spvVersion, language, infoSink,
     61                       forwardCompatible, messages, &sourceEntryPointName),
     62     annotationNestingLevel(0),
     63     inputPatch(nullptr),
     64     nextInLocation(0), nextOutLocation(0),
     65     entryPointFunction(nullptr),
     66     entryPointFunctionBody(nullptr),
     67     gsStreamOutput(nullptr),
     68     clipDistanceOutput(nullptr),
     69     cullDistanceOutput(nullptr),
     70     clipDistanceInput(nullptr),
     71     cullDistanceInput(nullptr)
     72 {
     73     globalUniformDefaults.clear();
     74     globalUniformDefaults.layoutMatrix = ElmRowMajor;
     75     globalUniformDefaults.layoutPacking = ElpStd140;
     76 
     77     globalBufferDefaults.clear();
     78     globalBufferDefaults.layoutMatrix = ElmRowMajor;
     79     globalBufferDefaults.layoutPacking = ElpStd430;
     80 
     81     globalInputDefaults.clear();
     82     globalOutputDefaults.clear();
     83 
     84     clipSemanticNSizeIn.fill(0);
     85     cullSemanticNSizeIn.fill(0);
     86     clipSemanticNSizeOut.fill(0);
     87     cullSemanticNSizeOut.fill(0);
     88 
     89     // "Shaders in the transform
     90     // feedback capturing mode have an initial global default of
     91     //     layout(xfb_buffer = 0) out;"
     92     if (language == EShLangVertex ||
     93         language == EShLangTessControl ||
     94         language == EShLangTessEvaluation ||
     95         language == EShLangGeometry)
     96         globalOutputDefaults.layoutXfbBuffer = 0;
     97 
     98     if (language == EShLangGeometry)
     99         globalOutputDefaults.layoutStream = 0;
    100 }
    101 
    102 HlslParseContext::~HlslParseContext()
    103 {
    104 }
    105 
    106 void HlslParseContext::initializeExtensionBehavior()
    107 {
    108     TParseContextBase::initializeExtensionBehavior();
    109 
    110     // HLSL allows #line by default.
    111     extensionBehavior[E_GL_GOOGLE_cpp_style_line_directive] = EBhEnable;
    112 }
    113 
    114 void HlslParseContext::setLimits(const TBuiltInResource& r)
    115 {
    116     resources = r;
    117     intermediate.setLimits(resources);
    118 }
    119 
    120 //
    121 // Parse an array of strings using the parser in HlslRules.
    122 //
    123 // Returns true for successful acceptance of the shader, false if any errors.
    124 //
    125 bool HlslParseContext::parseShaderStrings(TPpContext& ppContext, TInputScanner& input, bool versionWillBeError)
    126 {
    127     currentScanner = &input;
    128     ppContext.setInput(input, versionWillBeError);
    129 
    130     HlslScanContext scanContext(*this, ppContext);
    131     HlslGrammar grammar(scanContext, *this);
    132     if (!grammar.parse()) {
    133         // Print a message formated such that if you click on the message it will take you right to
    134         // the line through most UIs.
    135         const glslang::TSourceLoc& sourceLoc = input.getSourceLoc();
    136         infoSink.info << sourceLoc.name->c_str() << "(" << sourceLoc.line << "): error at column " << sourceLoc.column
    137                       << ", HLSL parsing failed.\n";
    138         ++numErrors;
    139         return false;
    140     }
    141 
    142     finish();
    143 
    144     return numErrors == 0;
    145 }
    146 
    147 //
    148 // Return true if this l-value node should be converted in some manner.
    149 // For instance: turning a load aggregate into a store in an l-value.
    150 //
    151 bool HlslParseContext::shouldConvertLValue(const TIntermNode* node) const
    152 {
    153     if (node == nullptr || node->getAsTyped() == nullptr)
    154         return false;
    155 
    156     const TIntermAggregate* lhsAsAggregate = node->getAsAggregate();
    157     const TIntermBinary* lhsAsBinary = node->getAsBinaryNode();
    158 
    159     // If it's a swizzled/indexed aggregate, look at the left node instead.
    160     if (lhsAsBinary != nullptr &&
    161         (lhsAsBinary->getOp() == EOpVectorSwizzle || lhsAsBinary->getOp() == EOpIndexDirect))
    162         lhsAsAggregate = lhsAsBinary->getLeft()->getAsAggregate();
    163     if (lhsAsAggregate != nullptr && lhsAsAggregate->getOp() == EOpImageLoad)
    164         return true;
    165 
    166     return false;
    167 }
    168 
    169 void HlslParseContext::growGlobalUniformBlock(const TSourceLoc& loc, TType& memberType, const TString& memberName,
    170                                               TTypeList* newTypeList)
    171 {
    172     newTypeList = nullptr;
    173     correctUniform(memberType.getQualifier());
    174     if (memberType.isStruct()) {
    175         auto it = ioTypeMap.find(memberType.getStruct());
    176         if (it != ioTypeMap.end() && it->second.uniform)
    177             newTypeList = it->second.uniform;
    178     }
    179     TParseContextBase::growGlobalUniformBlock(loc, memberType, memberName, newTypeList);
    180 }
    181 
    182 //
    183 // Return a TLayoutFormat corresponding to the given texture type.
    184 //
    185 TLayoutFormat HlslParseContext::getLayoutFromTxType(const TSourceLoc& loc, const TType& txType)
    186 {
    187     if (txType.isStruct()) {
    188         // TODO: implement.
    189         error(loc, "unimplemented: structure type in image or buffer", "", "");
    190         return ElfNone;
    191     }
    192 
    193     const int components = txType.getVectorSize();
    194     const TBasicType txBasicType = txType.getBasicType();
    195 
    196     const auto selectFormat = [this,&components](TLayoutFormat v1, TLayoutFormat v2, TLayoutFormat v4) -> TLayoutFormat {
    197         if (intermediate.getNoStorageFormat())
    198             return ElfNone;
    199 
    200         return components == 1 ? v1 :
    201                components == 2 ? v2 : v4;
    202     };
    203 
    204     switch (txBasicType) {
    205     case EbtFloat: return selectFormat(ElfR32f,  ElfRg32f,  ElfRgba32f);
    206     case EbtInt:   return selectFormat(ElfR32i,  ElfRg32i,  ElfRgba32i);
    207     case EbtUint:  return selectFormat(ElfR32ui, ElfRg32ui, ElfRgba32ui);
    208     default:
    209         error(loc, "unknown basic type in image format", "", "");
    210         return ElfNone;
    211     }
    212 }
    213 
    214 //
    215 // Both test and if necessary, spit out an error, to see if the node is really
    216 // an l-value that can be operated on this way.
    217 //
    218 // Returns true if there was an error.
    219 //
    220 bool HlslParseContext::lValueErrorCheck(const TSourceLoc& loc, const char* op, TIntermTyped* node)
    221 {
    222     if (shouldConvertLValue(node)) {
    223         // if we're writing to a texture, it must be an RW form.
    224 
    225         TIntermAggregate* lhsAsAggregate = node->getAsAggregate();
    226         TIntermTyped* object = lhsAsAggregate->getSequence()[0]->getAsTyped();
    227 
    228         if (!object->getType().getSampler().isImage()) {
    229             error(loc, "operator[] on a non-RW texture must be an r-value", "", "");
    230             return true;
    231         }
    232     }
    233 
    234     // We tolerate samplers as l-values, even though they are nominally
    235     // illegal, because we expect a later optimization to eliminate them.
    236     if (node->getType().getBasicType() == EbtSampler) {
    237         intermediate.setNeedsLegalization();
    238         return false;
    239     }
    240 
    241     // Let the base class check errors
    242     return TParseContextBase::lValueErrorCheck(loc, op, node);
    243 }
    244 
    245 //
    246 // This function handles l-value conversions and verifications.  It uses, but is not synonymous
    247 // with lValueErrorCheck.  That function accepts an l-value directly, while this one must be
    248 // given the surrounding tree - e.g, with an assignment, so we can convert the assign into a
    249 // series of other image operations.
    250 //
    251 // Most things are passed through unmodified, except for error checking.
    252 //
    253 TIntermTyped* HlslParseContext::handleLvalue(const TSourceLoc& loc, const char* op, TIntermTyped*& node)
    254 {
    255     if (node == nullptr)
    256         return nullptr;
    257 
    258     TIntermBinary* nodeAsBinary = node->getAsBinaryNode();
    259     TIntermUnary* nodeAsUnary = node->getAsUnaryNode();
    260     TIntermAggregate* sequence = nullptr;
    261 
    262     TIntermTyped* lhs = nodeAsUnary  ? nodeAsUnary->getOperand() :
    263                         nodeAsBinary ? nodeAsBinary->getLeft() :
    264                         nullptr;
    265 
    266     // Early bail out if there is no conversion to apply
    267     if (!shouldConvertLValue(lhs)) {
    268         if (lhs != nullptr)
    269             if (lValueErrorCheck(loc, op, lhs))
    270                 return nullptr;
    271         return node;
    272     }
    273 
    274     // *** If we get here, we're going to apply some conversion to an l-value.
    275 
    276     // Helper to create a load.
    277     const auto makeLoad = [&](TIntermSymbol* rhsTmp, TIntermTyped* object, TIntermTyped* coord, const TType& derefType) {
    278         TIntermAggregate* loadOp = new TIntermAggregate(EOpImageLoad);
    279         loadOp->setLoc(loc);
    280         loadOp->getSequence().push_back(object);
    281         loadOp->getSequence().push_back(intermediate.addSymbol(*coord->getAsSymbolNode()));
    282         loadOp->setType(derefType);
    283 
    284         sequence = intermediate.growAggregate(sequence,
    285                                               intermediate.addAssign(EOpAssign, rhsTmp, loadOp, loc),
    286                                               loc);
    287     };
    288 
    289     // Helper to create a store.
    290     const auto makeStore = [&](TIntermTyped* object, TIntermTyped* coord, TIntermSymbol* rhsTmp) {
    291         TIntermAggregate* storeOp = new TIntermAggregate(EOpImageStore);
    292         storeOp->getSequence().push_back(object);
    293         storeOp->getSequence().push_back(coord);
    294         storeOp->getSequence().push_back(intermediate.addSymbol(*rhsTmp));
    295         storeOp->setLoc(loc);
    296         storeOp->setType(TType(EbtVoid));
    297 
    298         sequence = intermediate.growAggregate(sequence, storeOp);
    299     };
    300 
    301     // Helper to create an assign.
    302     const auto makeBinary = [&](TOperator op, TIntermTyped* lhs, TIntermTyped* rhs) {
    303         sequence = intermediate.growAggregate(sequence,
    304                                               intermediate.addBinaryNode(op, lhs, rhs, loc, lhs->getType()),
    305                                               loc);
    306     };
    307 
    308     // Helper to complete sequence by adding trailing variable, so we evaluate to the right value.
    309     const auto finishSequence = [&](TIntermSymbol* rhsTmp, const TType& derefType) -> TIntermAggregate* {
    310         // Add a trailing use of the temp, so the sequence returns the proper value.
    311         sequence = intermediate.growAggregate(sequence, intermediate.addSymbol(*rhsTmp));
    312         sequence->setOperator(EOpSequence);
    313         sequence->setLoc(loc);
    314         sequence->setType(derefType);
    315 
    316         return sequence;
    317     };
    318 
    319     // Helper to add unary op
    320     const auto makeUnary = [&](TOperator op, TIntermSymbol* rhsTmp) {
    321         sequence = intermediate.growAggregate(sequence,
    322                                               intermediate.addUnaryNode(op, intermediate.addSymbol(*rhsTmp), loc,
    323                                                                         rhsTmp->getType()),
    324                                               loc);
    325     };
    326 
    327     // Return true if swizzle or index writes all components of the given variable.
    328     const auto writesAllComponents = [&](TIntermSymbol* var, TIntermBinary* swizzle) -> bool {
    329         if (swizzle == nullptr)  // not a swizzle or index
    330             return true;
    331 
    332         // Track which components are being set.
    333         std::array<bool, 4> compIsSet;
    334         compIsSet.fill(false);
    335 
    336         const TIntermConstantUnion* asConst     = swizzle->getRight()->getAsConstantUnion();
    337         const TIntermAggregate*     asAggregate = swizzle->getRight()->getAsAggregate();
    338 
    339         // This could be either a direct index, or a swizzle.
    340         if (asConst) {
    341             compIsSet[asConst->getConstArray()[0].getIConst()] = true;
    342         } else if (asAggregate) {
    343             const TIntermSequence& seq = asAggregate->getSequence();
    344             for (int comp=0; comp<int(seq.size()); ++comp)
    345                 compIsSet[seq[comp]->getAsConstantUnion()->getConstArray()[0].getIConst()] = true;
    346         } else {
    347             assert(0);
    348         }
    349 
    350         // Return true if all components are being set by the index or swizzle
    351         return std::all_of(compIsSet.begin(), compIsSet.begin() + var->getType().getVectorSize(),
    352                            [](bool isSet) { return isSet; } );
    353     };
    354 
    355     // Create swizzle matching input swizzle
    356     const auto addSwizzle = [&](TIntermSymbol* var, TIntermBinary* swizzle) -> TIntermTyped* {
    357         if (swizzle)
    358             return intermediate.addBinaryNode(swizzle->getOp(), var, swizzle->getRight(), loc, swizzle->getType());
    359         else
    360             return var;
    361     };
    362 
    363     TIntermBinary*    lhsAsBinary    = lhs->getAsBinaryNode();
    364     TIntermAggregate* lhsAsAggregate = lhs->getAsAggregate();
    365     bool lhsIsSwizzle = false;
    366 
    367     // If it's a swizzled L-value, remember the swizzle, and use the LHS.
    368     if (lhsAsBinary != nullptr && (lhsAsBinary->getOp() == EOpVectorSwizzle || lhsAsBinary->getOp() == EOpIndexDirect)) {
    369         lhsAsAggregate = lhsAsBinary->getLeft()->getAsAggregate();
    370         lhsIsSwizzle = true;
    371     }
    372 
    373     TIntermTyped* object = lhsAsAggregate->getSequence()[0]->getAsTyped();
    374     TIntermTyped* coord  = lhsAsAggregate->getSequence()[1]->getAsTyped();
    375 
    376     const TSampler& texSampler = object->getType().getSampler();
    377 
    378     TType objDerefType;
    379     getTextureReturnType(texSampler, objDerefType);
    380 
    381     if (nodeAsBinary) {
    382         TIntermTyped* rhs = nodeAsBinary->getRight();
    383         const TOperator assignOp = nodeAsBinary->getOp();
    384 
    385         bool isModifyOp = false;
    386 
    387         switch (assignOp) {
    388         case EOpAddAssign:
    389         case EOpSubAssign:
    390         case EOpMulAssign:
    391         case EOpVectorTimesMatrixAssign:
    392         case EOpVectorTimesScalarAssign:
    393         case EOpMatrixTimesScalarAssign:
    394         case EOpMatrixTimesMatrixAssign:
    395         case EOpDivAssign:
    396         case EOpModAssign:
    397         case EOpAndAssign:
    398         case EOpInclusiveOrAssign:
    399         case EOpExclusiveOrAssign:
    400         case EOpLeftShiftAssign:
    401         case EOpRightShiftAssign:
    402             isModifyOp = true;
    403             // fall through...
    404         case EOpAssign:
    405             {
    406                 // Since this is an lvalue, we'll convert an image load to a sequence like this
    407                 // (to still provide the value):
    408                 //   OpSequence
    409                 //      OpImageStore(object, lhs, rhs)
    410                 //      rhs
    411                 // But if it's not a simple symbol RHS (say, a fn call), we don't want to duplicate the RHS,
    412                 // so we'll convert instead to this:
    413                 //   OpSequence
    414                 //      rhsTmp = rhs
    415                 //      OpImageStore(object, coord, rhsTmp)
    416                 //      rhsTmp
    417                 // If this is a read-modify-write op, like +=, we issue:
    418                 //   OpSequence
    419                 //      coordtmp = load's param1
    420                 //      rhsTmp = OpImageLoad(object, coordTmp)
    421                 //      rhsTmp op= rhs
    422                 //      OpImageStore(object, coordTmp, rhsTmp)
    423                 //      rhsTmp
    424                 //
    425                 // If the lvalue is swizzled, we apply that when writing the temp variable, like so:
    426                 //    ...
    427                 //    rhsTmp.some_swizzle = ...
    428                 // For partial writes, an error is generated.
    429 
    430                 TIntermSymbol* rhsTmp = rhs->getAsSymbolNode();
    431                 TIntermTyped* coordTmp = coord;
    432 
    433                 if (rhsTmp == nullptr || isModifyOp || lhsIsSwizzle) {
    434                     rhsTmp = makeInternalVariableNode(loc, "storeTemp", objDerefType);
    435 
    436                     // Partial updates not yet supported
    437                     if (!writesAllComponents(rhsTmp, lhsAsBinary)) {
    438                         error(loc, "unimplemented: partial image updates", "", "");
    439                     }
    440 
    441                     // Assign storeTemp = rhs
    442                     if (isModifyOp) {
    443                         // We have to make a temp var for the coordinate, to avoid evaluating it twice.
    444                         coordTmp = makeInternalVariableNode(loc, "coordTemp", coord->getType());
    445                         makeBinary(EOpAssign, coordTmp, coord); // coordtmp = load[param1]
    446                         makeLoad(rhsTmp, object, coordTmp, objDerefType); // rhsTmp = OpImageLoad(object, coordTmp)
    447                     }
    448 
    449                     // rhsTmp op= rhs.
    450                     makeBinary(assignOp, addSwizzle(intermediate.addSymbol(*rhsTmp), lhsAsBinary), rhs);
    451                 }
    452 
    453                 makeStore(object, coordTmp, rhsTmp);         // add a store
    454                 return finishSequence(rhsTmp, objDerefType); // return rhsTmp from sequence
    455             }
    456 
    457         default:
    458             break;
    459         }
    460     }
    461 
    462     if (nodeAsUnary) {
    463         const TOperator assignOp = nodeAsUnary->getOp();
    464 
    465         switch (assignOp) {
    466         case EOpPreIncrement:
    467         case EOpPreDecrement:
    468             {
    469                 // We turn this into:
    470                 //   OpSequence
    471                 //      coordtmp = load's param1
    472                 //      rhsTmp = OpImageLoad(object, coordTmp)
    473                 //      rhsTmp op
    474                 //      OpImageStore(object, coordTmp, rhsTmp)
    475                 //      rhsTmp
    476 
    477                 TIntermSymbol* rhsTmp = makeInternalVariableNode(loc, "storeTemp", objDerefType);
    478                 TIntermTyped* coordTmp = makeInternalVariableNode(loc, "coordTemp", coord->getType());
    479 
    480                 makeBinary(EOpAssign, coordTmp, coord);           // coordtmp = load[param1]
    481                 makeLoad(rhsTmp, object, coordTmp, objDerefType); // rhsTmp = OpImageLoad(object, coordTmp)
    482                 makeUnary(assignOp, rhsTmp);                      // op rhsTmp
    483                 makeStore(object, coordTmp, rhsTmp);              // OpImageStore(object, coordTmp, rhsTmp)
    484                 return finishSequence(rhsTmp, objDerefType);      // return rhsTmp from sequence
    485             }
    486 
    487         case EOpPostIncrement:
    488         case EOpPostDecrement:
    489             {
    490                 // We turn this into:
    491                 //   OpSequence
    492                 //      coordtmp = load's param1
    493                 //      rhsTmp1 = OpImageLoad(object, coordTmp)
    494                 //      rhsTmp2 = rhsTmp1
    495                 //      rhsTmp2 op
    496                 //      OpImageStore(object, coordTmp, rhsTmp2)
    497                 //      rhsTmp1 (pre-op value)
    498                 TIntermSymbol* rhsTmp1 = makeInternalVariableNode(loc, "storeTempPre",  objDerefType);
    499                 TIntermSymbol* rhsTmp2 = makeInternalVariableNode(loc, "storeTempPost", objDerefType);
    500                 TIntermTyped* coordTmp = makeInternalVariableNode(loc, "coordTemp", coord->getType());
    501 
    502                 makeBinary(EOpAssign, coordTmp, coord);            // coordtmp = load[param1]
    503                 makeLoad(rhsTmp1, object, coordTmp, objDerefType); // rhsTmp1 = OpImageLoad(object, coordTmp)
    504                 makeBinary(EOpAssign, rhsTmp2, rhsTmp1);           // rhsTmp2 = rhsTmp1
    505                 makeUnary(assignOp, rhsTmp2);                      // rhsTmp op
    506                 makeStore(object, coordTmp, rhsTmp2);              // OpImageStore(object, coordTmp, rhsTmp2)
    507                 return finishSequence(rhsTmp1, objDerefType);      // return rhsTmp from sequence
    508             }
    509 
    510         default:
    511             break;
    512         }
    513     }
    514 
    515     if (lhs)
    516         if (lValueErrorCheck(loc, op, lhs))
    517             return nullptr;
    518 
    519     return node;
    520 }
    521 
    522 void HlslParseContext::handlePragma(const TSourceLoc& loc, const TVector<TString>& tokens)
    523 {
    524     if (pragmaCallback)
    525         pragmaCallback(loc.line, tokens);
    526 
    527     if (tokens.size() == 0)
    528         return;
    529 
    530     // These pragmas are case insensitive in HLSL, so we'll compare in lower case.
    531     TVector<TString> lowerTokens = tokens;
    532 
    533     for (auto it = lowerTokens.begin(); it != lowerTokens.end(); ++it)
    534         std::transform(it->begin(), it->end(), it->begin(), ::tolower);
    535 
    536     // Handle pack_matrix
    537     if (tokens.size() == 4 && lowerTokens[0] == "pack_matrix" && tokens[1] == "(" && tokens[3] == ")") {
    538         // Note that HLSL semantic order is Mrc, not Mcr like SPIR-V, so we reverse the sense.
    539         // Row major becomes column major and vice versa.
    540 
    541         if (lowerTokens[2] == "row_major") {
    542             globalUniformDefaults.layoutMatrix = globalBufferDefaults.layoutMatrix = ElmColumnMajor;
    543         } else if (lowerTokens[2] == "column_major") {
    544             globalUniformDefaults.layoutMatrix = globalBufferDefaults.layoutMatrix = ElmRowMajor;
    545         } else {
    546             // unknown majorness strings are treated as (HLSL column major)==(SPIR-V row major)
    547             warn(loc, "unknown pack_matrix pragma value", tokens[2].c_str(), "");
    548             globalUniformDefaults.layoutMatrix = globalBufferDefaults.layoutMatrix = ElmRowMajor;
    549         }
    550         return;
    551     }
    552 
    553     // Handle once
    554     if (lowerTokens[0] == "once") {
    555         warn(loc, "not implemented", "#pragma once", "");
    556         return;
    557     }
    558 }
    559 
    560 //
    561 // Look at a '.' matrix selector string and change it into components
    562 // for a matrix. There are two types:
    563 //
    564 //   _21    second row, first column (one based)
    565 //   _m21   third row, second column (zero based)
    566 //
    567 // Returns true if there is no error.
    568 //
    569 bool HlslParseContext::parseMatrixSwizzleSelector(const TSourceLoc& loc, const TString& fields, int cols, int rows,
    570                                                   TSwizzleSelectors<TMatrixSelector>& components)
    571 {
    572     int startPos[MaxSwizzleSelectors];
    573     int numComps = 0;
    574     TString compString = fields;
    575 
    576     // Find where each component starts,
    577     // recording the first character position after the '_'.
    578     for (size_t c = 0; c < compString.size(); ++c) {
    579         if (compString[c] == '_') {
    580             if (numComps >= MaxSwizzleSelectors) {
    581                 error(loc, "matrix component swizzle has too many components", compString.c_str(), "");
    582                 return false;
    583             }
    584             if (c > compString.size() - 3 ||
    585                     ((compString[c+1] == 'm' || compString[c+1] == 'M') && c > compString.size() - 4)) {
    586                 error(loc, "matrix component swizzle missing", compString.c_str(), "");
    587                 return false;
    588             }
    589             startPos[numComps++] = (int)c + 1;
    590         }
    591     }
    592 
    593     // Process each component
    594     for (int i = 0; i < numComps; ++i) {
    595         int pos = startPos[i];
    596         int bias = -1;
    597         if (compString[pos] == 'm' || compString[pos] == 'M') {
    598             bias = 0;
    599             ++pos;
    600         }
    601         TMatrixSelector comp;
    602         comp.coord1 = compString[pos+0] - '0' + bias;
    603         comp.coord2 = compString[pos+1] - '0' + bias;
    604         if (comp.coord1 < 0 || comp.coord1 >= cols) {
    605             error(loc, "matrix row component out of range", compString.c_str(), "");
    606             return false;
    607         }
    608         if (comp.coord2 < 0 || comp.coord2 >= rows) {
    609             error(loc, "matrix column component out of range", compString.c_str(), "");
    610             return false;
    611         }
    612         components.push_back(comp);
    613     }
    614 
    615     return true;
    616 }
    617 
    618 // If the 'comps' express a column of a matrix,
    619 // return the column.  Column means the first coords all match.
    620 //
    621 // Otherwise, return -1.
    622 //
    623 int HlslParseContext::getMatrixComponentsColumn(int rows, const TSwizzleSelectors<TMatrixSelector>& selector)
    624 {
    625     int col = -1;
    626 
    627     // right number of comps?
    628     if (selector.size() != rows)
    629         return -1;
    630 
    631     // all comps in the same column?
    632     // rows in order?
    633     col = selector[0].coord1;
    634     for (int i = 0; i < rows; ++i) {
    635         if (col != selector[i].coord1)
    636             return -1;
    637         if (i != selector[i].coord2)
    638             return -1;
    639     }
    640 
    641     return col;
    642 }
    643 
    644 //
    645 // Handle seeing a variable identifier in the grammar.
    646 //
    647 TIntermTyped* HlslParseContext::handleVariable(const TSourceLoc& loc, const TString* string)
    648 {
    649     int thisDepth;
    650     TSymbol* symbol = symbolTable.find(*string, thisDepth);
    651     if (symbol && symbol->getAsVariable() && symbol->getAsVariable()->isUserType()) {
    652         error(loc, "expected symbol, not user-defined type", string->c_str(), "");
    653         return nullptr;
    654     }
    655 
    656     // Error check for requiring specific extensions present.
    657     if (symbol && symbol->getNumExtensions())
    658         requireExtensions(loc, symbol->getNumExtensions(), symbol->getExtensions(), symbol->getName().c_str());
    659 
    660     const TVariable* variable = nullptr;
    661     const TAnonMember* anon = symbol ? symbol->getAsAnonMember() : nullptr;
    662     TIntermTyped* node = nullptr;
    663     if (anon) {
    664         // It was a member of an anonymous container, which could be a 'this' structure.
    665 
    666         // Create a subtree for its dereference.
    667         if (thisDepth > 0) {
    668             variable = getImplicitThis(thisDepth);
    669             if (variable == nullptr)
    670                 error(loc, "cannot access member variables (static member function?)", "this", "");
    671         }
    672         if (variable == nullptr)
    673             variable = anon->getAnonContainer().getAsVariable();
    674 
    675         TIntermTyped* container = intermediate.addSymbol(*variable, loc);
    676         TIntermTyped* constNode = intermediate.addConstantUnion(anon->getMemberNumber(), loc);
    677         node = intermediate.addIndex(EOpIndexDirectStruct, container, constNode, loc);
    678 
    679         node->setType(*(*variable->getType().getStruct())[anon->getMemberNumber()].type);
    680         if (node->getType().hiddenMember())
    681             error(loc, "member of nameless block was not redeclared", string->c_str(), "");
    682     } else {
    683         // Not a member of an anonymous container.
    684 
    685         // The symbol table search was done in the lexical phase.
    686         // See if it was a variable.
    687         variable = symbol ? symbol->getAsVariable() : nullptr;
    688         if (variable) {
    689             if ((variable->getType().getBasicType() == EbtBlock ||
    690                 variable->getType().getBasicType() == EbtStruct) && variable->getType().getStruct() == nullptr) {
    691                 error(loc, "cannot be used (maybe an instance name is needed)", string->c_str(), "");
    692                 variable = nullptr;
    693             }
    694         } else {
    695             if (symbol)
    696                 error(loc, "variable name expected", string->c_str(), "");
    697         }
    698 
    699         // Recovery, if it wasn't found or was not a variable.
    700         if (variable == nullptr) {
    701             error(loc, "unknown variable", string->c_str(), "");
    702             variable = new TVariable(string, TType(EbtVoid));
    703         }
    704 
    705         if (variable->getType().getQualifier().isFrontEndConstant())
    706             node = intermediate.addConstantUnion(variable->getConstArray(), variable->getType(), loc);
    707         else
    708             node = intermediate.addSymbol(*variable, loc);
    709     }
    710 
    711     if (variable->getType().getQualifier().isIo())
    712         intermediate.addIoAccessed(*string);
    713 
    714     return node;
    715 }
    716 
    717 //
    718 // Handle operator[] on any objects it applies to.  Currently:
    719 //    Textures
    720 //    Buffers
    721 //
    722 TIntermTyped* HlslParseContext::handleBracketOperator(const TSourceLoc& loc, TIntermTyped* base, TIntermTyped* index)
    723 {
    724     // handle r-value operator[] on textures and images.  l-values will be processed later.
    725     if (base->getType().getBasicType() == EbtSampler && !base->isArray()) {
    726         const TSampler& sampler = base->getType().getSampler();
    727         if (sampler.isImage() || sampler.isTexture()) {
    728             if (! mipsOperatorMipArg.empty() && mipsOperatorMipArg.back().mipLevel == nullptr) {
    729                 // The first operator[] to a .mips[] sequence is the mip level.  We'll remember it.
    730                 mipsOperatorMipArg.back().mipLevel = index;
    731                 return base;  // next [] index is to the same base.
    732             } else {
    733                 TIntermAggregate* load = new TIntermAggregate(sampler.isImage() ? EOpImageLoad : EOpTextureFetch);
    734 
    735                 TType sampReturnType;
    736                 getTextureReturnType(sampler, sampReturnType);
    737 
    738                 load->setType(sampReturnType);
    739                 load->setLoc(loc);
    740                 load->getSequence().push_back(base);
    741                 load->getSequence().push_back(index);
    742 
    743                 // Textures need a MIP.  If we saw one go by, use it.  Otherwise, use zero.
    744                 if (sampler.isTexture()) {
    745                     if (! mipsOperatorMipArg.empty()) {
    746                         load->getSequence().push_back(mipsOperatorMipArg.back().mipLevel);
    747                         mipsOperatorMipArg.pop_back();
    748                     } else {
    749                         load->getSequence().push_back(intermediate.addConstantUnion(0, loc, true));
    750                     }
    751                 }
    752 
    753                 return load;
    754             }
    755         }
    756     }
    757 
    758     // Handle operator[] on structured buffers: this indexes into the array element of the buffer.
    759     // indexStructBufferContent returns nullptr if it isn't a structuredbuffer (SSBO).
    760     TIntermTyped* sbArray = indexStructBufferContent(loc, base);
    761     if (sbArray != nullptr) {
    762         if (sbArray == nullptr)
    763             return nullptr;
    764 
    765         // Now we'll apply the [] index to that array
    766         const TOperator idxOp = (index->getQualifier().storage == EvqConst) ? EOpIndexDirect : EOpIndexIndirect;
    767 
    768         TIntermTyped* element = intermediate.addIndex(idxOp, sbArray, index, loc);
    769         const TType derefType(sbArray->getType(), 0);
    770         element->setType(derefType);
    771         return element;
    772     }
    773 
    774     return nullptr;
    775 }
    776 
    777 //
    778 // Cast index value to a uint if it isn't already (for operator[], load indexes, etc)
    779 TIntermTyped* HlslParseContext::makeIntegerIndex(TIntermTyped* index)
    780 {
    781     const TBasicType indexBasicType = index->getType().getBasicType();
    782     const int vecSize = index->getType().getVectorSize();
    783 
    784     // We can use int types directly as the index
    785     if (indexBasicType == EbtInt || indexBasicType == EbtUint ||
    786         indexBasicType == EbtInt64 || indexBasicType == EbtUint64)
    787         return index;
    788 
    789     // Cast index to unsigned integer if it isn't one.
    790     return intermediate.addConversion(EOpConstructUint, TType(EbtUint, EvqTemporary, vecSize), index);
    791 }
    792 
    793 //
    794 // Handle seeing a base[index] dereference in the grammar.
    795 //
    796 TIntermTyped* HlslParseContext::handleBracketDereference(const TSourceLoc& loc, TIntermTyped* base, TIntermTyped* index)
    797 {
    798     index = makeIntegerIndex(index);
    799 
    800     if (index == nullptr) {
    801         error(loc, " unknown index type ", "", "");
    802         return nullptr;
    803     }
    804 
    805     TIntermTyped* result = handleBracketOperator(loc, base, index);
    806 
    807     if (result != nullptr)
    808         return result;  // it was handled as an operator[]
    809 
    810     bool flattened = false;
    811     int indexValue = 0;
    812     if (index->getQualifier().isFrontEndConstant())
    813         indexValue = index->getAsConstantUnion()->getConstArray()[0].getIConst();
    814 
    815     variableCheck(base);
    816     if (! base->isArray() && ! base->isMatrix() && ! base->isVector()) {
    817         if (base->getAsSymbolNode())
    818             error(loc, " left of '[' is not of type array, matrix, or vector ",
    819                   base->getAsSymbolNode()->getName().c_str(), "");
    820         else
    821             error(loc, " left of '[' is not of type array, matrix, or vector ", "expression", "");
    822     } else if (base->getType().getQualifier().storage == EvqConst && index->getQualifier().storage == EvqConst) {
    823         // both base and index are front-end constants
    824         checkIndex(loc, base->getType(), indexValue);
    825         return intermediate.foldDereference(base, indexValue, loc);
    826     } else {
    827         // at least one of base and index is variable...
    828 
    829         if (index->getQualifier().isFrontEndConstant())
    830             checkIndex(loc, base->getType(), indexValue);
    831 
    832         if (base->getType().isScalarOrVec1())
    833             result = base;
    834         else if (base->getAsSymbolNode() && wasFlattened(base)) {
    835             if (index->getQualifier().storage != EvqConst)
    836                 error(loc, "Invalid variable index to flattened array", base->getAsSymbolNode()->getName().c_str(), "");
    837 
    838             result = flattenAccess(base, indexValue);
    839             flattened = (result != base);
    840         } else {
    841             if (index->getQualifier().isFrontEndConstant()) {
    842                 if (base->getType().isUnsizedArray())
    843                     base->getWritableType().updateImplicitArraySize(indexValue + 1);
    844                 else
    845                     checkIndex(loc, base->getType(), indexValue);
    846                 result = intermediate.addIndex(EOpIndexDirect, base, index, loc);
    847             } else
    848                 result = intermediate.addIndex(EOpIndexIndirect, base, index, loc);
    849         }
    850     }
    851 
    852     if (result == nullptr) {
    853         // Insert dummy error-recovery result
    854         result = intermediate.addConstantUnion(0.0, EbtFloat, loc);
    855     } else {
    856         // If the array reference was flattened, it has the correct type.  E.g, if it was
    857         // a uniform array, it was flattened INTO a set of scalar uniforms, not scalar temps.
    858         // In that case, we preserve the qualifiers.
    859         if (!flattened) {
    860             // Insert valid dereferenced result
    861             TType newType(base->getType(), 0);  // dereferenced type
    862             if (base->getType().getQualifier().storage == EvqConst && index->getQualifier().storage == EvqConst)
    863                 newType.getQualifier().storage = EvqConst;
    864             else
    865                 newType.getQualifier().storage = EvqTemporary;
    866             result->setType(newType);
    867         }
    868     }
    869 
    870     return result;
    871 }
    872 
    873 // Handle seeing a binary node with a math operation.
    874 TIntermTyped* HlslParseContext::handleBinaryMath(const TSourceLoc& loc, const char* str, TOperator op,
    875                                                  TIntermTyped* left, TIntermTyped* right)
    876 {
    877     TIntermTyped* result = intermediate.addBinaryMath(op, left, right, loc);
    878     if (result == nullptr)
    879         binaryOpError(loc, str, left->getCompleteString(), right->getCompleteString());
    880 
    881     return result;
    882 }
    883 
    884 // Handle seeing a unary node with a math operation.
    885 TIntermTyped* HlslParseContext::handleUnaryMath(const TSourceLoc& loc, const char* str, TOperator op,
    886                                                 TIntermTyped* childNode)
    887 {
    888     TIntermTyped* result = intermediate.addUnaryMath(op, childNode, loc);
    889 
    890     if (result)
    891         return result;
    892     else
    893         unaryOpError(loc, str, childNode->getCompleteString());
    894 
    895     return childNode;
    896 }
    897 //
    898 // Return true if the name is a struct buffer method
    899 //
    900 bool HlslParseContext::isStructBufferMethod(const TString& name) const
    901 {
    902     return
    903         name == "GetDimensions"              ||
    904         name == "Load"                       ||
    905         name == "Load2"                      ||
    906         name == "Load3"                      ||
    907         name == "Load4"                      ||
    908         name == "Store"                      ||
    909         name == "Store2"                     ||
    910         name == "Store3"                     ||
    911         name == "Store4"                     ||
    912         name == "InterlockedAdd"             ||
    913         name == "InterlockedAnd"             ||
    914         name == "InterlockedCompareExchange" ||
    915         name == "InterlockedCompareStore"    ||
    916         name == "InterlockedExchange"        ||
    917         name == "InterlockedMax"             ||
    918         name == "InterlockedMin"             ||
    919         name == "InterlockedOr"              ||
    920         name == "InterlockedXor"             ||
    921         name == "IncrementCounter"           ||
    922         name == "DecrementCounter"           ||
    923         name == "Append"                     ||
    924         name == "Consume";
    925 }
    926 
    927 //
    928 // Handle seeing a base.field dereference in the grammar, where 'field' is a
    929 // swizzle or member variable.
    930 //
    931 TIntermTyped* HlslParseContext::handleDotDereference(const TSourceLoc& loc, TIntermTyped* base, const TString& field)
    932 {
    933     variableCheck(base);
    934 
    935     if (base->isArray()) {
    936         error(loc, "cannot apply to an array:", ".", field.c_str());
    937         return base;
    938     }
    939 
    940     TIntermTyped* result = base;
    941 
    942     if (base->getType().getBasicType() == EbtSampler) {
    943         // Handle .mips[mipid][pos] operation on textures
    944         const TSampler& sampler = base->getType().getSampler();
    945         if (sampler.isTexture() && field == "mips") {
    946             // Push a null to signify that we expect a mip level under operator[] next.
    947             mipsOperatorMipArg.push_back(tMipsOperatorData(loc, nullptr));
    948             // Keep 'result' pointing to 'base', since we expect an operator[] to go by next.
    949         } else {
    950             if (field == "mips")
    951                 error(loc, "unexpected texture type for .mips[][] operator:",
    952                       base->getType().getCompleteString().c_str(), "");
    953             else
    954                 error(loc, "unexpected operator on texture type:", field.c_str(),
    955                       base->getType().getCompleteString().c_str());
    956         }
    957     } else if (base->isVector() || base->isScalar()) {
    958         TSwizzleSelectors<TVectorSelector> selectors;
    959         parseSwizzleSelector(loc, field, base->getVectorSize(), selectors);
    960 
    961         if (base->isScalar()) {
    962             if (selectors.size() == 1)
    963                 return result;
    964             else {
    965                 TType type(base->getBasicType(), EvqTemporary, selectors.size());
    966                 return addConstructor(loc, base, type);
    967             }
    968         }
    969         if (base->getVectorSize() == 1) {
    970             TType scalarType(base->getBasicType(), EvqTemporary, 1);
    971             if (selectors.size() == 1)
    972                 return addConstructor(loc, base, scalarType);
    973             else {
    974                 TType vectorType(base->getBasicType(), EvqTemporary, selectors.size());
    975                 return addConstructor(loc, addConstructor(loc, base, scalarType), vectorType);
    976             }
    977         }
    978 
    979         if (base->getType().getQualifier().isFrontEndConstant())
    980             result = intermediate.foldSwizzle(base, selectors, loc);
    981         else {
    982             if (selectors.size() == 1) {
    983                 TIntermTyped* index = intermediate.addConstantUnion(selectors[0], loc);
    984                 result = intermediate.addIndex(EOpIndexDirect, base, index, loc);
    985                 result->setType(TType(base->getBasicType(), EvqTemporary));
    986             } else {
    987                 TIntermTyped* index = intermediate.addSwizzle(selectors, loc);
    988                 result = intermediate.addIndex(EOpVectorSwizzle, base, index, loc);
    989                 result->setType(TType(base->getBasicType(), EvqTemporary, base->getType().getQualifier().precision,
    990                                 selectors.size()));
    991             }
    992         }
    993     } else if (base->isMatrix()) {
    994         TSwizzleSelectors<TMatrixSelector> selectors;
    995         if (! parseMatrixSwizzleSelector(loc, field, base->getMatrixCols(), base->getMatrixRows(), selectors))
    996             return result;
    997 
    998         if (selectors.size() == 1) {
    999             // Representable by m[c][r]
   1000             if (base->getType().getQualifier().isFrontEndConstant()) {
   1001                 result = intermediate.foldDereference(base, selectors[0].coord1, loc);
   1002                 result = intermediate.foldDereference(result, selectors[0].coord2, loc);
   1003             } else {
   1004                 result = intermediate.addIndex(EOpIndexDirect, base,
   1005                                                intermediate.addConstantUnion(selectors[0].coord1, loc),
   1006                                                loc);
   1007                 TType dereferencedCol(base->getType(), 0);
   1008                 result->setType(dereferencedCol);
   1009                 result = intermediate.addIndex(EOpIndexDirect, result,
   1010                                                intermediate.addConstantUnion(selectors[0].coord2, loc),
   1011                                                loc);
   1012                 TType dereferenced(dereferencedCol, 0);
   1013                 result->setType(dereferenced);
   1014             }
   1015         } else {
   1016             int column = getMatrixComponentsColumn(base->getMatrixRows(), selectors);
   1017             if (column >= 0) {
   1018                 // Representable by m[c]
   1019                 if (base->getType().getQualifier().isFrontEndConstant())
   1020                     result = intermediate.foldDereference(base, column, loc);
   1021                 else {
   1022                     result = intermediate.addIndex(EOpIndexDirect, base, intermediate.addConstantUnion(column, loc),
   1023                                                    loc);
   1024                     TType dereferenced(base->getType(), 0);
   1025                     result->setType(dereferenced);
   1026                 }
   1027             } else {
   1028                 // general case, not a column, not a single component
   1029                 TIntermTyped* index = intermediate.addSwizzle(selectors, loc);
   1030                 result = intermediate.addIndex(EOpMatrixSwizzle, base, index, loc);
   1031                 result->setType(TType(base->getBasicType(), EvqTemporary, base->getType().getQualifier().precision,
   1032                                       selectors.size()));
   1033            }
   1034         }
   1035     } else if (base->getBasicType() == EbtStruct || base->getBasicType() == EbtBlock) {
   1036         const TTypeList* fields = base->getType().getStruct();
   1037         bool fieldFound = false;
   1038         int member;
   1039         for (member = 0; member < (int)fields->size(); ++member) {
   1040             if ((*fields)[member].type->getFieldName() == field) {
   1041                 fieldFound = true;
   1042                 break;
   1043             }
   1044         }
   1045         if (fieldFound) {
   1046             if (base->getAsSymbolNode() && wasFlattened(base)) {
   1047                 result = flattenAccess(base, member);
   1048             } else {
   1049                 if (base->getType().getQualifier().storage == EvqConst)
   1050                     result = intermediate.foldDereference(base, member, loc);
   1051                 else {
   1052                     TIntermTyped* index = intermediate.addConstantUnion(member, loc);
   1053                     result = intermediate.addIndex(EOpIndexDirectStruct, base, index, loc);
   1054                     result->setType(*(*fields)[member].type);
   1055                 }
   1056             }
   1057         } else
   1058             error(loc, "no such field in structure", field.c_str(), "");
   1059     } else
   1060         error(loc, "does not apply to this type:", field.c_str(), base->getType().getCompleteString().c_str());
   1061 
   1062     return result;
   1063 }
   1064 
   1065 //
   1066 // Return true if the field should be treated as a built-in method.
   1067 // Return false otherwise.
   1068 //
   1069 bool HlslParseContext::isBuiltInMethod(const TSourceLoc&, TIntermTyped* base, const TString& field)
   1070 {
   1071     if (base == nullptr)
   1072         return false;
   1073 
   1074     variableCheck(base);
   1075 
   1076     if (base->getType().getBasicType() == EbtSampler) {
   1077         return true;
   1078     } else if (isStructBufferType(base->getType()) && isStructBufferMethod(field)) {
   1079         return true;
   1080     } else if (field == "Append" ||
   1081                field == "RestartStrip") {
   1082         // We cannot check the type here: it may be sanitized if we're not compiling a geometry shader, but
   1083         // the code is around in the shader source.
   1084         return true;
   1085     } else
   1086         return false;
   1087 }
   1088 
   1089 // Independently establish a built-in that is a member of a structure.
   1090 // 'arraySizes' are what's desired for the independent built-in, whatever
   1091 // the higher-level source/expression of them was.
   1092 void HlslParseContext::splitBuiltIn(const TString& baseName, const TType& memberType, const TArraySizes* arraySizes,
   1093                                     const TQualifier& outerQualifier)
   1094 {
   1095     // Because of arrays of structs, we might be asked more than once,
   1096     // but the arraySizes passed in should have captured the whole thing
   1097     // the first time.
   1098     // However, clip/cull rely on multiple updates.
   1099     if (!isClipOrCullDistance(memberType))
   1100         if (splitBuiltIns.find(tInterstageIoData(memberType.getQualifier().builtIn, outerQualifier.storage)) !=
   1101             splitBuiltIns.end())
   1102             return;
   1103 
   1104     TVariable* ioVar = makeInternalVariable(baseName + "." + memberType.getFieldName(), memberType);
   1105 
   1106     if (arraySizes != nullptr && !memberType.isArray())
   1107         ioVar->getWritableType().copyArraySizes(*arraySizes);
   1108 
   1109     splitBuiltIns[tInterstageIoData(memberType.getQualifier().builtIn, outerQualifier.storage)] = ioVar;
   1110     if (!isClipOrCullDistance(ioVar->getType()))
   1111         trackLinkage(*ioVar);
   1112 
   1113     // Merge qualifier from the user structure
   1114     mergeQualifiers(ioVar->getWritableType().getQualifier(), outerQualifier);
   1115 
   1116     // Fix the builtin type if needed (e.g, some types require fixed array sizes, no matter how the
   1117     // shader declared them).  This is done after mergeQualifiers(), in case fixBuiltInIoType looks
   1118     // at the qualifier to determine e.g, in or out qualifications.
   1119     fixBuiltInIoType(ioVar->getWritableType());
   1120 
   1121     // But, not location, we're losing that
   1122     ioVar->getWritableType().getQualifier().layoutLocation = TQualifier::layoutLocationEnd;
   1123 }
   1124 
   1125 // Split a type into
   1126 //   1. a struct of non-I/O members
   1127 //   2. a collection of independent I/O variables
   1128 void HlslParseContext::split(const TVariable& variable)
   1129 {
   1130     // Create a new variable:
   1131     const TType& clonedType = *variable.getType().clone();
   1132     const TType& splitType = split(clonedType, variable.getName(), clonedType.getQualifier());
   1133     splitNonIoVars[variable.getUniqueId()] = makeInternalVariable(variable.getName(), splitType);
   1134 }
   1135 
   1136 // Recursive implementation of split().
   1137 // Returns reference to the modified type.
   1138 const TType& HlslParseContext::split(const TType& type, const TString& name, const TQualifier& outerQualifier)
   1139 {
   1140     if (type.isStruct()) {
   1141         TTypeList* userStructure = type.getWritableStruct();
   1142         for (auto ioType = userStructure->begin(); ioType != userStructure->end(); ) {
   1143             if (ioType->type->isBuiltIn()) {
   1144                 // move out the built-in
   1145                 splitBuiltIn(name, *ioType->type, type.getArraySizes(), outerQualifier);
   1146                 ioType = userStructure->erase(ioType);
   1147             } else {
   1148                 split(*ioType->type, name + "." + ioType->type->getFieldName(), outerQualifier);
   1149                 ++ioType;
   1150             }
   1151         }
   1152     }
   1153 
   1154     return type;
   1155 }
   1156 
   1157 // Is this an aggregate that should be flattened?
   1158 // Can be applied to intermediate levels of type in a hierarchy.
   1159 // Some things like flattening uniform arrays are only about the top level
   1160 // of the aggregate, triggered on 'topLevel'.
   1161 bool HlslParseContext::shouldFlatten(const TType& type, TStorageQualifier qualifier, bool topLevel) const
   1162 {
   1163     switch (qualifier) {
   1164     case EvqVaryingIn:
   1165     case EvqVaryingOut:
   1166         return type.isStruct() || type.isArray();
   1167     case EvqUniform:
   1168         return (type.isArray() && intermediate.getFlattenUniformArrays() && topLevel) ||
   1169                (type.isStruct() && type.containsOpaque());
   1170     default:
   1171         return false;
   1172     };
   1173 }
   1174 
   1175 // Top level variable flattening: construct data
   1176 void HlslParseContext::flatten(const TVariable& variable, bool linkage)
   1177 {
   1178     const TType& type = variable.getType();
   1179 
   1180     // If it's a standalone built-in, there is nothing to flatten
   1181     if (type.isBuiltIn() && !type.isStruct())
   1182         return;
   1183 
   1184     auto entry = flattenMap.insert(std::make_pair(variable.getUniqueId(),
   1185                                                   TFlattenData(type.getQualifier().layoutBinding,
   1186                                                                type.getQualifier().layoutLocation)));
   1187 
   1188     // the item is a map pair, so first->second is the TFlattenData itself.
   1189     flatten(variable, type, entry.first->second, variable.getName(), linkage, type.getQualifier(), nullptr);
   1190 }
   1191 
   1192 // Recursively flatten the given variable at the provided type, building the flattenData as we go.
   1193 //
   1194 // This is mutually recursive with flattenStruct and flattenArray.
   1195 // We are going to flatten an arbitrarily nested composite structure into a linear sequence of
   1196 // members, and later on, we want to turn a path through the tree structure into a final
   1197 // location in this linear sequence.
   1198 //
   1199 // If the tree was N-ary, that can be directly calculated.  However, we are dealing with
   1200 // arbitrary numbers - perhaps a struct of 7 members containing an array of 3.  Thus, we must
   1201 // build a data structure to allow the sequence of bracket and dot operators on arrays and
   1202 // structs to arrive at the proper member.
   1203 //
   1204 // To avoid storing a tree with pointers, we are going to flatten the tree into a vector of integers.
   1205 // The leaves are the indexes into the flattened member array.
   1206 // Each level will have the next location for the Nth item stored sequentially, so for instance:
   1207 //
   1208 // struct { float2 a[2]; int b; float4 c[3] };
   1209 //
   1210 // This will produce the following flattened tree:
   1211 // Pos: 0  1   2    3  4    5  6   7     8   9  10   11  12 13
   1212 //     (3, 7,  8,   5, 6,   0, 1,  2,   11, 12, 13,   3,  4, 5}
   1213 //
   1214 // Given a reference to mystruct.c[1], the access chain is (2,1), so we traverse:
   1215 //   (0+2) = 8  -->  (8+1) = 12 -->   12 = 4
   1216 //
   1217 // so the 4th flattened member in traversal order is ours.
   1218 //
   1219 int HlslParseContext::flatten(const TVariable& variable, const TType& type,
   1220                               TFlattenData& flattenData, TString name, bool linkage,
   1221                               const TQualifier& outerQualifier,
   1222                               const TArraySizes* builtInArraySizes)
   1223 {
   1224     // If something is an arrayed struct, the array flattener will recursively call flatten()
   1225     // to then flatten the struct, so this is an "if else": we don't do both.
   1226     if (type.isArray())
   1227         return flattenArray(variable, type, flattenData, name, linkage, outerQualifier);
   1228     else if (type.isStruct())
   1229         return flattenStruct(variable, type, flattenData, name, linkage, outerQualifier, builtInArraySizes);
   1230     else {
   1231         assert(0); // should never happen
   1232         return -1;
   1233     }
   1234 }
   1235 
   1236 // Add a single flattened member to the flattened data being tracked for the composite
   1237 // Returns true for the final flattening level.
   1238 int HlslParseContext::addFlattenedMember(const TVariable& variable, const TType& type, TFlattenData& flattenData,
   1239                                          const TString& memberName, bool linkage,
   1240                                          const TQualifier& outerQualifier,
   1241                                          const TArraySizes* builtInArraySizes)
   1242 {
   1243     if (!shouldFlatten(type, outerQualifier.storage, false)) {
   1244         // This is as far as we flatten.  Insert the variable.
   1245         TVariable* memberVariable = makeInternalVariable(memberName, type);
   1246         mergeQualifiers(memberVariable->getWritableType().getQualifier(), variable.getType().getQualifier());
   1247 
   1248         if (flattenData.nextBinding != TQualifier::layoutBindingEnd)
   1249             memberVariable->getWritableType().getQualifier().layoutBinding = flattenData.nextBinding++;
   1250 
   1251         if (memberVariable->getType().isBuiltIn()) {
   1252             // inherited locations are nonsensical for built-ins (TODO: what if semantic had a number)
   1253             memberVariable->getWritableType().getQualifier().layoutLocation = TQualifier::layoutLocationEnd;
   1254         } else {
   1255             // inherited locations must be auto bumped, not replicated
   1256             if (flattenData.nextLocation != TQualifier::layoutLocationEnd) {
   1257                 memberVariable->getWritableType().getQualifier().layoutLocation = flattenData.nextLocation;
   1258                 flattenData.nextLocation += intermediate.computeTypeLocationSize(memberVariable->getType(), language);
   1259                 nextOutLocation = std::max(nextOutLocation, flattenData.nextLocation);
   1260             }
   1261         }
   1262 
   1263         flattenData.offsets.push_back(static_cast<int>(flattenData.members.size()));
   1264         flattenData.members.push_back(memberVariable);
   1265 
   1266         if (linkage)
   1267             trackLinkage(*memberVariable);
   1268 
   1269         return static_cast<int>(flattenData.offsets.size()) - 1; // location of the member reference
   1270     } else {
   1271         // Further recursion required
   1272         return flatten(variable, type, flattenData, memberName, linkage, outerQualifier, builtInArraySizes);
   1273     }
   1274 }
   1275 
   1276 // Figure out the mapping between an aggregate's top members and an
   1277 // equivalent set of individual variables.
   1278 //
   1279 // Assumes shouldFlatten() or equivalent was called first.
   1280 int HlslParseContext::flattenStruct(const TVariable& variable, const TType& type,
   1281                                     TFlattenData& flattenData, TString name, bool linkage,
   1282                                     const TQualifier& outerQualifier,
   1283                                     const TArraySizes* builtInArraySizes)
   1284 {
   1285     assert(type.isStruct());
   1286 
   1287     auto members = *type.getStruct();
   1288 
   1289     // Reserve space for this tree level.
   1290     int start = static_cast<int>(flattenData.offsets.size());
   1291     int pos = start;
   1292     flattenData.offsets.resize(int(pos + members.size()), -1);
   1293 
   1294     for (int member = 0; member < (int)members.size(); ++member) {
   1295         TType& dereferencedType = *members[member].type;
   1296         if (dereferencedType.isBuiltIn())
   1297             splitBuiltIn(variable.getName(), dereferencedType, builtInArraySizes, outerQualifier);
   1298         else {
   1299             const int mpos = addFlattenedMember(variable, dereferencedType, flattenData,
   1300                                                 name + "." + dereferencedType.getFieldName(),
   1301                                                 linkage, outerQualifier,
   1302                                                 builtInArraySizes == nullptr && dereferencedType.isArray()
   1303                                                                        ? dereferencedType.getArraySizes()
   1304                                                                        : builtInArraySizes);
   1305             flattenData.offsets[pos++] = mpos;
   1306         }
   1307     }
   1308 
   1309     return start;
   1310 }
   1311 
   1312 // Figure out mapping between an array's members and an
   1313 // equivalent set of individual variables.
   1314 //
   1315 // Assumes shouldFlatten() or equivalent was called first.
   1316 int HlslParseContext::flattenArray(const TVariable& variable, const TType& type,
   1317                                    TFlattenData& flattenData, TString name, bool linkage,
   1318                                    const TQualifier& outerQualifier)
   1319 {
   1320     assert(type.isSizedArray());
   1321 
   1322     const int size = type.getOuterArraySize();
   1323     const TType dereferencedType(type, 0);
   1324 
   1325     if (name.empty())
   1326         name = variable.getName();
   1327 
   1328     // Reserve space for this tree level.
   1329     int start = static_cast<int>(flattenData.offsets.size());
   1330     int pos   = start;
   1331     flattenData.offsets.resize(int(pos + size), -1);
   1332 
   1333     for (int element=0; element < size; ++element) {
   1334         char elementNumBuf[20];  // sufficient for MAXINT
   1335         snprintf(elementNumBuf, sizeof(elementNumBuf)-1, "[%d]", element);
   1336         const int mpos = addFlattenedMember(variable, dereferencedType, flattenData,
   1337                                             name + elementNumBuf, linkage, outerQualifier,
   1338                                             type.getArraySizes());
   1339 
   1340         flattenData.offsets[pos++] = mpos;
   1341     }
   1342 
   1343     return start;
   1344 }
   1345 
   1346 // Return true if we have flattened this node.
   1347 bool HlslParseContext::wasFlattened(const TIntermTyped* node) const
   1348 {
   1349     return node != nullptr && node->getAsSymbolNode() != nullptr &&
   1350            wasFlattened(node->getAsSymbolNode()->getId());
   1351 }
   1352 
   1353 // Return true if we have split this structure
   1354 bool HlslParseContext::wasSplit(const TIntermTyped* node) const
   1355 {
   1356     return node != nullptr && node->getAsSymbolNode() != nullptr &&
   1357            wasSplit(node->getAsSymbolNode()->getId());
   1358 }
   1359 
   1360 // Turn an access into an aggregate that was flattened to instead be
   1361 // an access to the individual variable the member was flattened to.
   1362 // Assumes wasFlattened() or equivalent was called first.
   1363 TIntermTyped* HlslParseContext::flattenAccess(TIntermTyped* base, int member)
   1364 {
   1365     const TType dereferencedType(base->getType(), member);  // dereferenced type
   1366     const TIntermSymbol& symbolNode = *base->getAsSymbolNode();
   1367     TIntermTyped* flattened = flattenAccess(symbolNode.getId(), member, base->getQualifier().storage,
   1368                                             dereferencedType, symbolNode.getFlattenSubset());
   1369 
   1370     return flattened ? flattened : base;
   1371 }
   1372 TIntermTyped* HlslParseContext::flattenAccess(int uniqueId, int member, TStorageQualifier outerStorage,
   1373     const TType& dereferencedType, int subset)
   1374 {
   1375     const auto flattenData = flattenMap.find(uniqueId);
   1376 
   1377     if (flattenData == flattenMap.end())
   1378         return nullptr;
   1379 
   1380     // Calculate new cumulative offset from the packed tree
   1381     int newSubset = flattenData->second.offsets[subset >= 0 ? subset + member : member];
   1382 
   1383     TIntermSymbol* subsetSymbol;
   1384     if (!shouldFlatten(dereferencedType, outerStorage, false)) {
   1385         // Finished flattening: create symbol for variable
   1386         member = flattenData->second.offsets[newSubset];
   1387         const TVariable* memberVariable = flattenData->second.members[member];
   1388         subsetSymbol = intermediate.addSymbol(*memberVariable);
   1389         subsetSymbol->setFlattenSubset(-1);
   1390     } else {
   1391 
   1392         // If this is not the final flattening, accumulate the position and return
   1393         // an object of the partially dereferenced type.
   1394         subsetSymbol = new TIntermSymbol(uniqueId, "flattenShadow", dereferencedType);
   1395         subsetSymbol->setFlattenSubset(newSubset);
   1396     }
   1397 
   1398     return subsetSymbol;
   1399 }
   1400 
   1401 // For finding where the first leaf is in a subtree of a multi-level aggregate
   1402 // that is just getting a subset assigned. Follows the same logic as flattenAccess,
   1403 // but logically going down the "left-most" tree branch each step of the way.
   1404 //
   1405 // Returns the offset into the first leaf of the subset.
   1406 int HlslParseContext::findSubtreeOffset(const TIntermNode& node) const
   1407 {
   1408     const TIntermSymbol* sym = node.getAsSymbolNode();
   1409     if (sym == nullptr)
   1410         return 0;
   1411     if (!sym->isArray() && !sym->isStruct())
   1412         return 0;
   1413     int subset = sym->getFlattenSubset();
   1414     if (subset == -1)
   1415         return 0;
   1416 
   1417     // Getting this far means a partial aggregate is identified by the flatten subset.
   1418     // Find the first leaf of the subset.
   1419 
   1420     const auto flattenData = flattenMap.find(sym->getId());
   1421     if (flattenData == flattenMap.end())
   1422         return 0;
   1423 
   1424     return findSubtreeOffset(sym->getType(), subset, flattenData->second.offsets);
   1425 
   1426     do {
   1427         subset = flattenData->second.offsets[subset];
   1428     } while (true);
   1429 }
   1430 // Recursively do the desent
   1431 int HlslParseContext::findSubtreeOffset(const TType& type, int subset, const TVector<int>& offsets) const
   1432 {
   1433     if (!type.isArray() && !type.isStruct())
   1434         return offsets[subset];
   1435     TType derefType(type, 0);
   1436     return findSubtreeOffset(derefType, offsets[subset], offsets);
   1437 };
   1438 
   1439 // Find and return the split IO TVariable for id, or nullptr if none.
   1440 TVariable* HlslParseContext::getSplitNonIoVar(int id) const
   1441 {
   1442     const auto splitNonIoVar = splitNonIoVars.find(id);
   1443     if (splitNonIoVar == splitNonIoVars.end())
   1444         return nullptr;
   1445 
   1446     return splitNonIoVar->second;
   1447 }
   1448 
   1449 // Pass through to base class after remembering built-in mappings.
   1450 void HlslParseContext::trackLinkage(TSymbol& symbol)
   1451 {
   1452     TBuiltInVariable biType = symbol.getType().getQualifier().builtIn;
   1453 
   1454     if (biType != EbvNone)
   1455         builtInTessLinkageSymbols[biType] = symbol.clone();
   1456 
   1457     TParseContextBase::trackLinkage(symbol);
   1458 }
   1459 
   1460 
   1461 // Returns true if the built-in is a clip or cull distance variable.
   1462 bool HlslParseContext::isClipOrCullDistance(TBuiltInVariable builtIn)
   1463 {
   1464     return builtIn == EbvClipDistance || builtIn == EbvCullDistance;
   1465 }
   1466 
   1467 // Some types require fixed array sizes in SPIR-V, but can be scalars or
   1468 // arrays of sizes SPIR-V doesn't allow.  For example, tessellation factors.
   1469 // This creates the right size.  A conversion is performed when the internal
   1470 // type is copied to or from the external type.  This corrects the externally
   1471 // facing input or output type to abide downstream semantics.
   1472 void HlslParseContext::fixBuiltInIoType(TType& type)
   1473 {
   1474     int requiredArraySize = 0;
   1475     int requiredVectorSize = 0;
   1476 
   1477     switch (type.getQualifier().builtIn) {
   1478     case EbvTessLevelOuter: requiredArraySize = 4; break;
   1479     case EbvTessLevelInner: requiredArraySize = 2; break;
   1480 
   1481     case EbvSampleMask:
   1482         {
   1483             // Promote scalar to array of size 1.  Leave existing arrays alone.
   1484             if (!type.isArray())
   1485                 requiredArraySize = 1;
   1486             break;
   1487         }
   1488 
   1489     case EbvWorkGroupId:        requiredVectorSize = 3; break;
   1490     case EbvGlobalInvocationId: requiredVectorSize = 3; break;
   1491     case EbvLocalInvocationId:  requiredVectorSize = 3; break;
   1492     case EbvTessCoord:          requiredVectorSize = 3; break;
   1493 
   1494     default:
   1495         if (isClipOrCullDistance(type)) {
   1496             const int loc = type.getQualifier().layoutLocation;
   1497 
   1498             if (type.getQualifier().builtIn == EbvClipDistance) {
   1499                 if (type.getQualifier().storage == EvqVaryingIn)
   1500                     clipSemanticNSizeIn[loc] = type.getVectorSize();
   1501                 else
   1502                     clipSemanticNSizeOut[loc] = type.getVectorSize();
   1503             } else {
   1504                 if (type.getQualifier().storage == EvqVaryingIn)
   1505                     cullSemanticNSizeIn[loc] = type.getVectorSize();
   1506                 else
   1507                     cullSemanticNSizeOut[loc] = type.getVectorSize();
   1508             }
   1509         }
   1510 
   1511         return;
   1512     }
   1513 
   1514     // Alter or set vector size as needed.
   1515     if (requiredVectorSize > 0) {
   1516         TType newType(type.getBasicType(), type.getQualifier().storage, requiredVectorSize);
   1517         newType.getQualifier() = type.getQualifier();
   1518 
   1519         type.shallowCopy(newType);
   1520     }
   1521 
   1522     // Alter or set array size as needed.
   1523     if (requiredArraySize > 0) {
   1524         if (!type.isArray() || type.getOuterArraySize() != requiredArraySize) {
   1525             TArraySizes* arraySizes = new TArraySizes;
   1526             arraySizes->addInnerSize(requiredArraySize);
   1527             type.transferArraySizes(arraySizes);
   1528         }
   1529     }
   1530 }
   1531 
   1532 // Variables that correspond to the user-interface in and out of a stage
   1533 // (not the built-in interface) are
   1534 //  - assigned locations
   1535 //  - registered as a linkage node (part of the stage's external interface).
   1536 // Assumes it is called in the order in which locations should be assigned.
   1537 void HlslParseContext::assignToInterface(TVariable& variable)
   1538 {
   1539     const auto assignLocation = [&](TVariable& variable) {
   1540         TType& type = variable.getWritableType();
   1541         if (!type.isStruct() || type.getStruct()->size() > 0) {
   1542             TQualifier& qualifier = type.getQualifier();
   1543             if (qualifier.storage == EvqVaryingIn || qualifier.storage == EvqVaryingOut) {
   1544                 if (qualifier.builtIn == EbvNone && !qualifier.hasLocation()) {
   1545                     // Strip off the outer array dimension for those having an extra one.
   1546                     int size;
   1547                     if (type.isArray() && qualifier.isArrayedIo(language)) {
   1548                         TType elementType(type, 0);
   1549                         size = intermediate.computeTypeLocationSize(elementType, language);
   1550                     } else
   1551                         size = intermediate.computeTypeLocationSize(type, language);
   1552 
   1553                     if (qualifier.storage == EvqVaryingIn) {
   1554                         variable.getWritableType().getQualifier().layoutLocation = nextInLocation;
   1555                         nextInLocation += size;
   1556                     } else {
   1557                         variable.getWritableType().getQualifier().layoutLocation = nextOutLocation;
   1558                         nextOutLocation += size;
   1559                     }
   1560                 }
   1561                 trackLinkage(variable);
   1562             }
   1563         }
   1564     };
   1565 
   1566     if (wasFlattened(variable.getUniqueId())) {
   1567         auto& memberList = flattenMap[variable.getUniqueId()].members;
   1568         for (auto member = memberList.begin(); member != memberList.end(); ++member)
   1569             assignLocation(**member);
   1570     } else if (wasSplit(variable.getUniqueId())) {
   1571         TVariable* splitIoVar = getSplitNonIoVar(variable.getUniqueId());
   1572         assignLocation(*splitIoVar);
   1573     } else {
   1574         assignLocation(variable);
   1575     }
   1576 }
   1577 
   1578 //
   1579 // Handle seeing a function declarator in the grammar.  This is the precursor
   1580 // to recognizing a function prototype or function definition.
   1581 //
   1582 void HlslParseContext::handleFunctionDeclarator(const TSourceLoc& loc, TFunction& function, bool prototype)
   1583 {
   1584     //
   1585     // Multiple declarations of the same function name are allowed.
   1586     //
   1587     // If this is a definition, the definition production code will check for redefinitions
   1588     // (we don't know at this point if it's a definition or not).
   1589     //
   1590     bool builtIn;
   1591     TSymbol* symbol = symbolTable.find(function.getMangledName(), &builtIn);
   1592     const TFunction* prevDec = symbol ? symbol->getAsFunction() : 0;
   1593 
   1594     if (prototype) {
   1595         // All built-in functions are defined, even though they don't have a body.
   1596         // Count their prototype as a definition instead.
   1597         if (symbolTable.atBuiltInLevel())
   1598             function.setDefined();
   1599         else {
   1600             if (prevDec && ! builtIn)
   1601                 symbol->getAsFunction()->setPrototyped();  // need a writable one, but like having prevDec as a const
   1602             function.setPrototyped();
   1603         }
   1604     }
   1605 
   1606     // This insert won't actually insert it if it's a duplicate signature, but it will still check for
   1607     // other forms of name collisions.
   1608     if (! symbolTable.insert(function))
   1609         error(loc, "function name is redeclaration of existing name", function.getName().c_str(), "");
   1610 }
   1611 
   1612 // For struct buffers with counters, we must pass the counter buffer as hidden parameter.
   1613 // This adds the hidden parameter to the parameter list in 'paramNodes' if needed.
   1614 // Otherwise, it's a no-op
   1615 void HlslParseContext::addStructBufferHiddenCounterParam(const TSourceLoc& loc, TParameter& param,
   1616                                                          TIntermAggregate*& paramNodes)
   1617 {
   1618     if (! hasStructBuffCounter(*param.type))
   1619         return;
   1620 
   1621     const TString counterBlockName(intermediate.addCounterBufferName(*param.name));
   1622 
   1623     TType counterType;
   1624     counterBufferType(loc, counterType);
   1625     TVariable *variable = makeInternalVariable(counterBlockName, counterType);
   1626 
   1627     if (! symbolTable.insert(*variable))
   1628         error(loc, "redefinition", variable->getName().c_str(), "");
   1629 
   1630     paramNodes = intermediate.growAggregate(paramNodes,
   1631                                             intermediate.addSymbol(*variable, loc),
   1632                                             loc);
   1633 }
   1634 
   1635 //
   1636 // Handle seeing the function prototype in front of a function definition in the grammar.
   1637 // The body is handled after this function returns.
   1638 //
   1639 // Returns an aggregate of parameter-symbol nodes.
   1640 //
   1641 TIntermAggregate* HlslParseContext::handleFunctionDefinition(const TSourceLoc& loc, TFunction& function,
   1642                                                              const TAttributes& attributes,
   1643                                                              TIntermNode*& entryPointTree)
   1644 {
   1645     currentCaller = function.getMangledName();
   1646     TSymbol* symbol = symbolTable.find(function.getMangledName());
   1647     TFunction* prevDec = symbol ? symbol->getAsFunction() : nullptr;
   1648 
   1649     if (prevDec == nullptr)
   1650         error(loc, "can't find function", function.getName().c_str(), "");
   1651     // Note:  'prevDec' could be 'function' if this is the first time we've seen function
   1652     // as it would have just been put in the symbol table.  Otherwise, we're looking up
   1653     // an earlier occurrence.
   1654 
   1655     if (prevDec && prevDec->isDefined()) {
   1656         // Then this function already has a body.
   1657         error(loc, "function already has a body", function.getName().c_str(), "");
   1658     }
   1659     if (prevDec && ! prevDec->isDefined()) {
   1660         prevDec->setDefined();
   1661 
   1662         // Remember the return type for later checking for RETURN statements.
   1663         currentFunctionType = &(prevDec->getType());
   1664     } else
   1665         currentFunctionType = new TType(EbtVoid);
   1666     functionReturnsValue = false;
   1667 
   1668     // Entry points need different I/O and other handling, transform it so the
   1669     // rest of this function doesn't care.
   1670     entryPointTree = transformEntryPoint(loc, function, attributes);
   1671 
   1672     //
   1673     // New symbol table scope for body of function plus its arguments
   1674     //
   1675     pushScope();
   1676 
   1677     //
   1678     // Insert parameters into the symbol table.
   1679     // If the parameter has no name, it's not an error, just don't insert it
   1680     // (could be used for unused args).
   1681     //
   1682     // Also, accumulate the list of parameters into the AST, so lower level code
   1683     // knows where to find parameters.
   1684     //
   1685     TIntermAggregate* paramNodes = new TIntermAggregate;
   1686     for (int i = 0; i < function.getParamCount(); i++) {
   1687         TParameter& param = function[i];
   1688         if (param.name != nullptr) {
   1689             TVariable *variable = new TVariable(param.name, *param.type);
   1690 
   1691             if (i == 0 && function.hasImplicitThis()) {
   1692                 // Anonymous 'this' members are already in a symbol-table level,
   1693                 // and we need to know what function parameter to map them to.
   1694                 symbolTable.makeInternalVariable(*variable);
   1695                 pushImplicitThis(variable);
   1696             }
   1697 
   1698             // Insert the parameters with name in the symbol table.
   1699             if (! symbolTable.insert(*variable))
   1700                 error(loc, "redefinition", variable->getName().c_str(), "");
   1701 
   1702             // Add parameters to the AST list.
   1703             if (shouldFlatten(variable->getType(), variable->getType().getQualifier().storage, true)) {
   1704                 // Expand the AST parameter nodes (but not the name mangling or symbol table view)
   1705                 // for structures that need to be flattened.
   1706                 flatten(*variable, false);
   1707                 const TTypeList* structure = variable->getType().getStruct();
   1708                 for (int mem = 0; mem < (int)structure->size(); ++mem) {
   1709                     paramNodes = intermediate.growAggregate(paramNodes,
   1710                                                             flattenAccess(variable->getUniqueId(), mem,
   1711                                                                           variable->getType().getQualifier().storage,
   1712                                                                           *(*structure)[mem].type),
   1713                                                             loc);
   1714                 }
   1715             } else {
   1716                 // Add the parameter to the AST
   1717                 paramNodes = intermediate.growAggregate(paramNodes,
   1718                                                         intermediate.addSymbol(*variable, loc),
   1719                                                         loc);
   1720             }
   1721 
   1722             // Add hidden AST parameter for struct buffer counters, if needed.
   1723             addStructBufferHiddenCounterParam(loc, param, paramNodes);
   1724         } else
   1725             paramNodes = intermediate.growAggregate(paramNodes, intermediate.addSymbol(*param.type, loc), loc);
   1726     }
   1727     if (function.hasIllegalImplicitThis())
   1728         pushImplicitThis(nullptr);
   1729 
   1730     intermediate.setAggregateOperator(paramNodes, EOpParameters, TType(EbtVoid), loc);
   1731     loopNestingLevel = 0;
   1732     controlFlowNestingLevel = 0;
   1733     postEntryPointReturn = false;
   1734 
   1735     return paramNodes;
   1736 }
   1737 
   1738 // Handle all [attrib] attribute for the shader entry point
   1739 void HlslParseContext::handleEntryPointAttributes(const TSourceLoc& loc, const TAttributes& attributes)
   1740 {
   1741     for (auto it = attributes.begin(); it != attributes.end(); ++it) {
   1742         switch (it->name) {
   1743         case EatNumThreads:
   1744         {
   1745             const TIntermSequence& sequence = it->args->getSequence();
   1746             for (int lid = 0; lid < int(sequence.size()); ++lid)
   1747                 intermediate.setLocalSize(lid, sequence[lid]->getAsConstantUnion()->getConstArray()[0].getIConst());
   1748             break;
   1749         }
   1750         case EatMaxVertexCount:
   1751         {
   1752             int maxVertexCount;
   1753 
   1754             if (! it->getInt(maxVertexCount)) {
   1755                 error(loc, "invalid maxvertexcount", "", "");
   1756             } else {
   1757                 if (! intermediate.setVertices(maxVertexCount))
   1758                     error(loc, "cannot change previously set maxvertexcount attribute", "", "");
   1759             }
   1760             break;
   1761         }
   1762         case EatPatchConstantFunc:
   1763         {
   1764             TString pcfName;
   1765             if (! it->getString(pcfName, 0, false)) {
   1766                 error(loc, "invalid patch constant function", "", "");
   1767             } else {
   1768                 patchConstantFunctionName = pcfName;
   1769             }
   1770             break;
   1771         }
   1772         case EatDomain:
   1773         {
   1774             // Handle [domain("...")]
   1775             TString domainStr;
   1776             if (! it->getString(domainStr)) {
   1777                 error(loc, "invalid domain", "", "");
   1778             } else {
   1779                 TLayoutGeometry domain = ElgNone;
   1780 
   1781                 if (domainStr == "tri") {
   1782                     domain = ElgTriangles;
   1783                 } else if (domainStr == "quad") {
   1784                     domain = ElgQuads;
   1785                 } else if (domainStr == "isoline") {
   1786                     domain = ElgIsolines;
   1787                 } else {
   1788                     error(loc, "unsupported domain type", domainStr.c_str(), "");
   1789                 }
   1790 
   1791                 if (language == EShLangTessEvaluation) {
   1792                     if (! intermediate.setInputPrimitive(domain))
   1793                         error(loc, "cannot change previously set domain", TQualifier::getGeometryString(domain), "");
   1794                 } else {
   1795                     if (! intermediate.setOutputPrimitive(domain))
   1796                         error(loc, "cannot change previously set domain", TQualifier::getGeometryString(domain), "");
   1797                 }
   1798             }
   1799             break;
   1800         }
   1801         case EatOutputTopology:
   1802         {
   1803             // Handle [outputtopology("...")]
   1804             TString topologyStr;
   1805             if (! it->getString(topologyStr)) {
   1806                 error(loc, "invalid outputtopology", "", "");
   1807             } else {
   1808                 TVertexOrder vertexOrder = EvoNone;
   1809                 TLayoutGeometry primitive = ElgNone;
   1810 
   1811                 if (topologyStr == "point") {
   1812                     intermediate.setPointMode();
   1813                 } else if (topologyStr == "line") {
   1814                     primitive = ElgIsolines;
   1815                 } else if (topologyStr == "triangle_cw") {
   1816                     vertexOrder = EvoCw;
   1817                     primitive = ElgTriangles;
   1818                 } else if (topologyStr == "triangle_ccw") {
   1819                     vertexOrder = EvoCcw;
   1820                     primitive = ElgTriangles;
   1821                 } else {
   1822                     error(loc, "unsupported outputtopology type", topologyStr.c_str(), "");
   1823                 }
   1824 
   1825                 if (vertexOrder != EvoNone) {
   1826                     if (! intermediate.setVertexOrder(vertexOrder)) {
   1827                         error(loc, "cannot change previously set outputtopology",
   1828                               TQualifier::getVertexOrderString(vertexOrder), "");
   1829                     }
   1830                 }
   1831                 if (primitive != ElgNone)
   1832                     intermediate.setOutputPrimitive(primitive);
   1833             }
   1834             break;
   1835         }
   1836         case EatPartitioning:
   1837         {
   1838             // Handle [partitioning("...")]
   1839             TString partitionStr;
   1840             if (! it->getString(partitionStr)) {
   1841                 error(loc, "invalid partitioning", "", "");
   1842             } else {
   1843                 TVertexSpacing partitioning = EvsNone;
   1844 
   1845                 if (partitionStr == "integer") {
   1846                     partitioning = EvsEqual;
   1847                 } else if (partitionStr == "fractional_even") {
   1848                     partitioning = EvsFractionalEven;
   1849                 } else if (partitionStr == "fractional_odd") {
   1850                     partitioning = EvsFractionalOdd;
   1851                     //} else if (partition == "pow2") { // TODO: currently nothing to map this to.
   1852                 } else {
   1853                     error(loc, "unsupported partitioning type", partitionStr.c_str(), "");
   1854                 }
   1855 
   1856                 if (! intermediate.setVertexSpacing(partitioning))
   1857                     error(loc, "cannot change previously set partitioning",
   1858                           TQualifier::getVertexSpacingString(partitioning), "");
   1859             }
   1860             break;
   1861         }
   1862         case EatOutputControlPoints:
   1863         {
   1864             // Handle [outputcontrolpoints("...")]
   1865             int ctrlPoints;
   1866             if (! it->getInt(ctrlPoints)) {
   1867                 error(loc, "invalid outputcontrolpoints", "", "");
   1868             } else {
   1869                 if (! intermediate.setVertices(ctrlPoints)) {
   1870                     error(loc, "cannot change previously set outputcontrolpoints attribute", "", "");
   1871                 }
   1872             }
   1873             break;
   1874         }
   1875         case EatBuiltIn:
   1876         case EatLocation:
   1877             // tolerate these because of dual use of entrypoint and type attributes
   1878             break;
   1879         default:
   1880             warn(loc, "attribute does not apply to entry point", "", "");
   1881             break;
   1882         }
   1883     }
   1884 }
   1885 
   1886 // Update the given type with any type-like attribute information in the
   1887 // attributes.
   1888 void HlslParseContext::transferTypeAttributes(const TSourceLoc& loc, const TAttributes& attributes, TType& type,
   1889     bool allowEntry)
   1890 {
   1891     if (attributes.size() == 0)
   1892         return;
   1893 
   1894     int value;
   1895     TString builtInString;
   1896     for (auto it = attributes.begin(); it != attributes.end(); ++it) {
   1897         switch (it->name) {
   1898         case EatLocation:
   1899             // location
   1900             if (it->getInt(value))
   1901                 type.getQualifier().layoutLocation = value;
   1902             break;
   1903         case EatBinding:
   1904             // binding
   1905             if (it->getInt(value)) {
   1906                 type.getQualifier().layoutBinding = value;
   1907                 type.getQualifier().layoutSet = 0;
   1908             }
   1909             // set
   1910             if (it->getInt(value, 1))
   1911                 type.getQualifier().layoutSet = value;
   1912             break;
   1913         case EatGlobalBinding:
   1914             // global cbuffer binding
   1915             if (it->getInt(value))
   1916                 globalUniformBinding = value;
   1917             // global cbuffer binding
   1918             if (it->getInt(value, 1))
   1919                 globalUniformSet = value;
   1920             break;
   1921         case EatInputAttachment:
   1922             // input attachment
   1923             if (it->getInt(value))
   1924                 type.getQualifier().layoutAttachment = value;
   1925             break;
   1926         case EatBuiltIn:
   1927             // PointSize built-in
   1928             if (it->getString(builtInString, 0, false)) {
   1929                 if (builtInString == "PointSize")
   1930                     type.getQualifier().builtIn = EbvPointSize;
   1931             }
   1932             break;
   1933         case EatPushConstant:
   1934             // push_constant
   1935             type.getQualifier().layoutPushConstant = true;
   1936             break;
   1937         case EatConstantId:
   1938             // specialization constant
   1939             if (it->getInt(value)) {
   1940                 TSourceLoc loc;
   1941                 loc.init();
   1942                 setSpecConstantId(loc, type.getQualifier(), value);
   1943             }
   1944             break;
   1945         default:
   1946             if (! allowEntry)
   1947                 warn(loc, "attribute does not apply to a type", "", "");
   1948             break;
   1949         }
   1950     }
   1951 }
   1952 
   1953 //
   1954 // Do all special handling for the entry point, including wrapping
   1955 // the shader's entry point with the official entry point that will call it.
   1956 //
   1957 // The following:
   1958 //
   1959 //    retType shaderEntryPoint(args...) // shader declared entry point
   1960 //    { body }
   1961 //
   1962 // Becomes
   1963 //
   1964 //    out retType ret;
   1965 //    in iargs<that are input>...;
   1966 //    out oargs<that are output> ...;
   1967 //
   1968 //    void shaderEntryPoint()    // synthesized, but official, entry point
   1969 //    {
   1970 //        args<that are input> = iargs...;
   1971 //        ret = @shaderEntryPoint(args...);
   1972 //        oargs = args<that are output>...;
   1973 //    }
   1974 //    retType @shaderEntryPoint(args...)
   1975 //    { body }
   1976 //
   1977 // The symbol table will still map the original entry point name to the
   1978 // the modified function and its new name:
   1979 //
   1980 //    symbol table:  shaderEntryPoint  ->   @shaderEntryPoint
   1981 //
   1982 // Returns nullptr if no entry-point tree was built, otherwise, returns
   1983 // a subtree that creates the entry point.
   1984 //
   1985 TIntermNode* HlslParseContext::transformEntryPoint(const TSourceLoc& loc, TFunction& userFunction,
   1986                                                    const TAttributes& attributes)
   1987 {
   1988     // Return true if this is a tessellation patch constant function input to a domain shader.
   1989     const auto isDsPcfInput = [this](const TType& type) {
   1990         return language == EShLangTessEvaluation &&
   1991         type.contains([](const TType* t) {
   1992                 return t->getQualifier().builtIn == EbvTessLevelOuter ||
   1993                        t->getQualifier().builtIn == EbvTessLevelInner;
   1994             });
   1995     };
   1996 
   1997     // if we aren't in the entry point, fix the IO as such and exit
   1998     if (userFunction.getName().compare(intermediate.getEntryPointName().c_str()) != 0) {
   1999         remapNonEntryPointIO(userFunction);
   2000         return nullptr;
   2001     }
   2002 
   2003     entryPointFunction = &userFunction; // needed in finish()
   2004 
   2005     // Handle entry point attributes
   2006     handleEntryPointAttributes(loc, attributes);
   2007 
   2008     // entry point logic...
   2009 
   2010     // Move parameters and return value to shader in/out
   2011     TVariable* entryPointOutput; // gets created in remapEntryPointIO
   2012     TVector<TVariable*> inputs;
   2013     TVector<TVariable*> outputs;
   2014     remapEntryPointIO(userFunction, entryPointOutput, inputs, outputs);
   2015 
   2016     // Further this return/in/out transform by flattening, splitting, and assigning locations
   2017     const auto makeVariableInOut = [&](TVariable& variable) {
   2018         if (variable.getType().isStruct()) {
   2019             if (variable.getType().getQualifier().isArrayedIo(language)) {
   2020                 if (variable.getType().containsBuiltIn())
   2021                     split(variable);
   2022             } else if (shouldFlatten(variable.getType(), EvqVaryingIn /* not assigned yet, but close enough */, true))
   2023                 flatten(variable, false /* don't track linkage here, it will be tracked in assignToInterface() */);
   2024         }
   2025         // TODO: flatten arrays too
   2026         // TODO: flatten everything in I/O
   2027         // TODO: replace all split with flatten, make all paths can create flattened I/O, then split code can be removed
   2028 
   2029         // For clip and cull distance, multiple output variables potentially get merged
   2030         // into one in assignClipCullDistance.  That code in assignClipCullDistance
   2031         // handles the interface logic, so we avoid it here in that case.
   2032         if (!isClipOrCullDistance(variable.getType()))
   2033             assignToInterface(variable);
   2034     };
   2035     if (entryPointOutput != nullptr)
   2036         makeVariableInOut(*entryPointOutput);
   2037     for (auto it = inputs.begin(); it != inputs.end(); ++it)
   2038         if (!isDsPcfInput((*it)->getType()))  // wait until the end for PCF input (see comment below)
   2039             makeVariableInOut(*(*it));
   2040     for (auto it = outputs.begin(); it != outputs.end(); ++it)
   2041         makeVariableInOut(*(*it));
   2042 
   2043     // In the domain shader, PCF input must be at the end of the linkage.  That's because in the
   2044     // hull shader there is no ordering: the output comes from the separate PCF, which does not
   2045     // participate in the argument list.  That is always put at the end of the HS linkage, so the
   2046     // input side of the DS must match.  The argument may be in any position in the DS argument list
   2047     // however, so this ensures the linkage is built in the correct order regardless of argument order.
   2048     if (language == EShLangTessEvaluation) {
   2049         for (auto it = inputs.begin(); it != inputs.end(); ++it)
   2050             if (isDsPcfInput((*it)->getType()))
   2051                 makeVariableInOut(*(*it));
   2052     }
   2053 
   2054     // Synthesize the call
   2055 
   2056     pushScope(); // matches the one in handleFunctionBody()
   2057 
   2058     // new signature
   2059     TType voidType(EbtVoid);
   2060     TFunction synthEntryPoint(&userFunction.getName(), voidType);
   2061     TIntermAggregate* synthParams = new TIntermAggregate();
   2062     intermediate.setAggregateOperator(synthParams, EOpParameters, voidType, loc);
   2063     intermediate.setEntryPointMangledName(synthEntryPoint.getMangledName().c_str());
   2064     intermediate.incrementEntryPointCount();
   2065     TFunction callee(&userFunction.getName(), voidType); // call based on old name, which is still in the symbol table
   2066 
   2067     // change original name
   2068     userFunction.addPrefix("@");                         // change the name in the function, but not in the symbol table
   2069 
   2070     // Copy inputs (shader-in -> calling arg), while building up the call node
   2071     TVector<TVariable*> argVars;
   2072     TIntermAggregate* synthBody = new TIntermAggregate();
   2073     auto inputIt = inputs.begin();
   2074     TIntermTyped* callingArgs = nullptr;
   2075 
   2076     for (int i = 0; i < userFunction.getParamCount(); i++) {
   2077         TParameter& param = userFunction[i];
   2078         argVars.push_back(makeInternalVariable(*param.name, *param.type));
   2079         argVars.back()->getWritableType().getQualifier().makeTemporary();
   2080 
   2081         // Track the input patch, which is the only non-builtin supported by hull shader PCF.
   2082         if (param.getDeclaredBuiltIn() == EbvInputPatch)
   2083             inputPatch = argVars.back();
   2084 
   2085         TIntermSymbol* arg = intermediate.addSymbol(*argVars.back());
   2086         handleFunctionArgument(&callee, callingArgs, arg);
   2087         if (param.type->getQualifier().isParamInput()) {
   2088             intermediate.growAggregate(synthBody, handleAssign(loc, EOpAssign, arg,
   2089                                                                intermediate.addSymbol(**inputIt)));
   2090             inputIt++;
   2091         }
   2092     }
   2093 
   2094     // Call
   2095     currentCaller = synthEntryPoint.getMangledName();
   2096     TIntermTyped* callReturn = handleFunctionCall(loc, &callee, callingArgs);
   2097     currentCaller = userFunction.getMangledName();
   2098 
   2099     // Return value
   2100     if (entryPointOutput) {
   2101         TIntermTyped* returnAssign;
   2102 
   2103         // For hull shaders, the wrapped entry point return value is written to
   2104         // an array element as indexed by invocation ID, which we might have to make up.
   2105         // This is required to match SPIR-V semantics.
   2106         if (language == EShLangTessControl) {
   2107             TIntermSymbol* invocationIdSym = findTessLinkageSymbol(EbvInvocationId);
   2108 
   2109             // If there is no user declared invocation ID, we must make one.
   2110             if (invocationIdSym == nullptr) {
   2111                 TType invocationIdType(EbtUint, EvqIn, 1);
   2112                 TString* invocationIdName = NewPoolTString("InvocationId");
   2113                 invocationIdType.getQualifier().builtIn = EbvInvocationId;
   2114 
   2115                 TVariable* variable = makeInternalVariable(*invocationIdName, invocationIdType);
   2116 
   2117                 globalQualifierFix(loc, variable->getWritableType().getQualifier());
   2118                 trackLinkage(*variable);
   2119 
   2120                 invocationIdSym = intermediate.addSymbol(*variable);
   2121             }
   2122 
   2123             TIntermTyped* element = intermediate.addIndex(EOpIndexIndirect, intermediate.addSymbol(*entryPointOutput),
   2124                                                           invocationIdSym, loc);
   2125 
   2126             // Set the type of the array element being dereferenced
   2127             const TType derefElementType(entryPointOutput->getType(), 0);
   2128             element->setType(derefElementType);
   2129 
   2130             returnAssign = handleAssign(loc, EOpAssign, element, callReturn);
   2131         } else {
   2132             returnAssign = handleAssign(loc, EOpAssign, intermediate.addSymbol(*entryPointOutput), callReturn);
   2133         }
   2134         intermediate.growAggregate(synthBody, returnAssign);
   2135     } else
   2136         intermediate.growAggregate(synthBody, callReturn);
   2137 
   2138     // Output copies
   2139     auto outputIt = outputs.begin();
   2140     for (int i = 0; i < userFunction.getParamCount(); i++) {
   2141         TParameter& param = userFunction[i];
   2142 
   2143         // GS outputs are via emit, so we do not copy them here.
   2144         if (param.type->getQualifier().isParamOutput()) {
   2145             if (param.getDeclaredBuiltIn() == EbvGsOutputStream) {
   2146                 // GS output stream does not assign outputs here: it's the Append() method
   2147                 // which writes to the output, probably multiple times separated by Emit.
   2148                 // We merely remember the output to use, here.
   2149                 gsStreamOutput = *outputIt;
   2150             } else {
   2151                 intermediate.growAggregate(synthBody, handleAssign(loc, EOpAssign,
   2152                                                                    intermediate.addSymbol(**outputIt),
   2153                                                                    intermediate.addSymbol(*argVars[i])));
   2154             }
   2155 
   2156             outputIt++;
   2157         }
   2158     }
   2159 
   2160     // Put the pieces together to form a full function subtree
   2161     // for the synthesized entry point.
   2162     synthBody->setOperator(EOpSequence);
   2163     TIntermNode* synthFunctionDef = synthParams;
   2164     handleFunctionBody(loc, synthEntryPoint, synthBody, synthFunctionDef);
   2165 
   2166     entryPointFunctionBody = synthBody;
   2167 
   2168     return synthFunctionDef;
   2169 }
   2170 
   2171 void HlslParseContext::handleFunctionBody(const TSourceLoc& loc, TFunction& function, TIntermNode* functionBody,
   2172                                           TIntermNode*& node)
   2173 {
   2174     node = intermediate.growAggregate(node, functionBody);
   2175     intermediate.setAggregateOperator(node, EOpFunction, function.getType(), loc);
   2176     node->getAsAggregate()->setName(function.getMangledName().c_str());
   2177 
   2178     popScope();
   2179     if (function.hasImplicitThis())
   2180         popImplicitThis();
   2181 
   2182     if (function.getType().getBasicType() != EbtVoid && ! functionReturnsValue)
   2183         error(loc, "function does not return a value:", "", function.getName().c_str());
   2184 }
   2185 
   2186 // AST I/O is done through shader globals declared in the 'in' or 'out'
   2187 // storage class.  An HLSL entry point has a return value, input parameters
   2188 // and output parameters.  These need to get remapped to the AST I/O.
   2189 void HlslParseContext::remapEntryPointIO(TFunction& function, TVariable*& returnValue,
   2190     TVector<TVariable*>& inputs, TVector<TVariable*>& outputs)
   2191 {
   2192     // We might have in input structure type with no decorations that caused it
   2193     // to look like an input type, yet it has (e.g.) interpolation types that
   2194     // must be modified that turn it into an input type.
   2195     // Hence, a missing ioTypeMap for 'input' might need to be synthesized.
   2196     const auto synthesizeEditedInput = [this](TType& type) {
   2197         // True if a type needs to be 'flat'
   2198         const auto needsFlat = [](const TType& type) {
   2199             return type.containsBasicType(EbtInt) ||
   2200                     type.containsBasicType(EbtUint) ||
   2201                     type.containsBasicType(EbtInt64) ||
   2202                     type.containsBasicType(EbtUint64) ||
   2203                     type.containsBasicType(EbtBool) ||
   2204                     type.containsBasicType(EbtDouble);
   2205         };
   2206 
   2207         if (language == EShLangFragment && needsFlat(type)) {
   2208             if (type.isStruct()) {
   2209                 TTypeList* finalList = nullptr;
   2210                 auto it = ioTypeMap.find(type.getStruct());
   2211                 if (it == ioTypeMap.end() || it->second.input == nullptr) {
   2212                     // Getting here means we have no input struct, but we need one.
   2213                     auto list = new TTypeList;
   2214                     for (auto member = type.getStruct()->begin(); member != type.getStruct()->end(); ++member) {
   2215                         TType* newType = new TType;
   2216                         newType->shallowCopy(*member->type);
   2217                         TTypeLoc typeLoc = { newType, member->loc };
   2218                         list->push_back(typeLoc);
   2219                     }
   2220                     // install the new input type
   2221                     if (it == ioTypeMap.end()) {
   2222                         tIoKinds newLists = { list, nullptr, nullptr };
   2223                         ioTypeMap[type.getStruct()] = newLists;
   2224                     } else
   2225                         it->second.input = list;
   2226                     finalList = list;
   2227                 } else
   2228                     finalList = it->second.input;
   2229                 // edit for 'flat'
   2230                 for (auto member = finalList->begin(); member != finalList->end(); ++member) {
   2231                     if (needsFlat(*member->type)) {
   2232                         member->type->getQualifier().clearInterpolation();
   2233                         member->type->getQualifier().flat = true;
   2234                     }
   2235                 }
   2236             } else {
   2237                 type.getQualifier().clearInterpolation();
   2238                 type.getQualifier().flat = true;
   2239             }
   2240         }
   2241     };
   2242 
   2243     // Do the actual work to make a type be a shader input or output variable,
   2244     // and clear the original to be non-IO (for use as a normal function parameter/return).
   2245     const auto makeIoVariable = [this](const char* name, TType& type, TStorageQualifier storage) -> TVariable* {
   2246         TVariable* ioVariable = makeInternalVariable(name, type);
   2247         clearUniformInputOutput(type.getQualifier());
   2248         if (type.isStruct()) {
   2249             auto newLists = ioTypeMap.find(ioVariable->getType().getStruct());
   2250             if (newLists != ioTypeMap.end()) {
   2251                 if (storage == EvqVaryingIn && newLists->second.input)
   2252                     ioVariable->getWritableType().setStruct(newLists->second.input);
   2253                 else if (storage == EvqVaryingOut && newLists->second.output)
   2254                     ioVariable->getWritableType().setStruct(newLists->second.output);
   2255             }
   2256         }
   2257         if (storage == EvqVaryingIn) {
   2258             correctInput(ioVariable->getWritableType().getQualifier());
   2259             if (language == EShLangTessEvaluation)
   2260                 if (!ioVariable->getType().isArray())
   2261                     ioVariable->getWritableType().getQualifier().patch = true;
   2262         } else {
   2263             correctOutput(ioVariable->getWritableType().getQualifier());
   2264         }
   2265         ioVariable->getWritableType().getQualifier().storage = storage;
   2266 
   2267         fixBuiltInIoType(ioVariable->getWritableType());
   2268 
   2269         return ioVariable;
   2270     };
   2271 
   2272     // return value is actually a shader-scoped output (out)
   2273     if (function.getType().getBasicType() == EbtVoid) {
   2274         returnValue = nullptr;
   2275     } else {
   2276         if (language == EShLangTessControl) {
   2277             // tessellation evaluation in HLSL writes a per-ctrl-pt value, but it needs to be an
   2278             // array in SPIR-V semantics.  We'll write to it indexed by invocation ID.
   2279 
   2280             returnValue = makeIoVariable("@entryPointOutput", function.getWritableType(), EvqVaryingOut);
   2281 
   2282             TType outputType;
   2283             outputType.shallowCopy(function.getType());
   2284 
   2285             // vertices has necessarily already been set when handling entry point attributes.
   2286             TArraySizes* arraySizes = new TArraySizes;
   2287             arraySizes->addInnerSize(intermediate.getVertices());
   2288             outputType.transferArraySizes(arraySizes);
   2289 
   2290             clearUniformInputOutput(function.getWritableType().getQualifier());
   2291             returnValue = makeIoVariable("@entryPointOutput", outputType, EvqVaryingOut);
   2292         } else {
   2293             returnValue = makeIoVariable("@entryPointOutput", function.getWritableType(), EvqVaryingOut);
   2294         }
   2295     }
   2296 
   2297     // parameters are actually shader-scoped inputs and outputs (in or out)
   2298     for (int i = 0; i < function.getParamCount(); i++) {
   2299         TType& paramType = *function[i].type;
   2300         if (paramType.getQualifier().isParamInput()) {
   2301             synthesizeEditedInput(paramType);
   2302             TVariable* argAsGlobal = makeIoVariable(function[i].name->c_str(), paramType, EvqVaryingIn);
   2303             inputs.push_back(argAsGlobal);
   2304         }
   2305         if (paramType.getQualifier().isParamOutput()) {
   2306             TVariable* argAsGlobal = makeIoVariable(function[i].name->c_str(), paramType, EvqVaryingOut);
   2307             outputs.push_back(argAsGlobal);
   2308         }
   2309     }
   2310 }
   2311 
   2312 // An HLSL function that looks like an entry point, but is not,
   2313 // declares entry point IO built-ins, but these have to be undone.
   2314 void HlslParseContext::remapNonEntryPointIO(TFunction& function)
   2315 {
   2316     // return value
   2317     if (function.getType().getBasicType() != EbtVoid)
   2318         clearUniformInputOutput(function.getWritableType().getQualifier());
   2319 
   2320     // parameters.
   2321     // References to structuredbuffer types are left unmodified
   2322     for (int i = 0; i < function.getParamCount(); i++)
   2323         if (!isReference(*function[i].type))
   2324             clearUniformInputOutput(function[i].type->getQualifier());
   2325 }
   2326 
   2327 // Handle function returns, including type conversions to the function return type
   2328 // if necessary.
   2329 TIntermNode* HlslParseContext::handleReturnValue(const TSourceLoc& loc, TIntermTyped* value)
   2330 {
   2331     functionReturnsValue = true;
   2332 
   2333     if (currentFunctionType->getBasicType() == EbtVoid) {
   2334         error(loc, "void function cannot return a value", "return", "");
   2335         return intermediate.addBranch(EOpReturn, loc);
   2336     } else if (*currentFunctionType != value->getType()) {
   2337         value = intermediate.addConversion(EOpReturn, *currentFunctionType, value);
   2338         if (value && *currentFunctionType != value->getType())
   2339             value = intermediate.addUniShapeConversion(EOpReturn, *currentFunctionType, value);
   2340         if (value == nullptr || *currentFunctionType != value->getType()) {
   2341             error(loc, "type does not match, or is not convertible to, the function's return type", "return", "");
   2342             return value;
   2343         }
   2344     }
   2345 
   2346     return intermediate.addBranch(EOpReturn, value, loc);
   2347 }
   2348 
   2349 void HlslParseContext::handleFunctionArgument(TFunction* function,
   2350                                               TIntermTyped*& arguments, TIntermTyped* newArg)
   2351 {
   2352     TParameter param = { 0, new TType, nullptr };
   2353     param.type->shallowCopy(newArg->getType());
   2354 
   2355     function->addParameter(param);
   2356     if (arguments)
   2357         arguments = intermediate.growAggregate(arguments, newArg);
   2358     else
   2359         arguments = newArg;
   2360 }
   2361 
   2362 // Position may require special handling: we can optionally invert Y.
   2363 // See: https://github.com/KhronosGroup/glslang/issues/1173
   2364 //      https://github.com/KhronosGroup/glslang/issues/494
   2365 TIntermTyped* HlslParseContext::assignPosition(const TSourceLoc& loc, TOperator op,
   2366                                                TIntermTyped* left, TIntermTyped* right)
   2367 {
   2368     // If we are not asked for Y inversion, use a plain old assign.
   2369     if (!intermediate.getInvertY())
   2370         return intermediate.addAssign(op, left, right, loc);
   2371 
   2372     // If we get here, we should invert Y.
   2373     TIntermAggregate* assignList = nullptr;
   2374 
   2375     // If this is a complex rvalue, we don't want to dereference it many times.  Create a temporary.
   2376     TVariable* rhsTempVar = nullptr;
   2377     rhsTempVar = makeInternalVariable("@position", right->getType());
   2378     rhsTempVar->getWritableType().getQualifier().makeTemporary();
   2379 
   2380     {
   2381         TIntermTyped* rhsTempSym = intermediate.addSymbol(*rhsTempVar, loc);
   2382         assignList = intermediate.growAggregate(assignList,
   2383                                                 intermediate.addAssign(EOpAssign, rhsTempSym, right, loc), loc);
   2384     }
   2385 
   2386     // pos.y = -pos.y
   2387     {
   2388         const int Y = 1;
   2389 
   2390         TIntermTyped* tempSymL = intermediate.addSymbol(*rhsTempVar, loc);
   2391         TIntermTyped* tempSymR = intermediate.addSymbol(*rhsTempVar, loc);
   2392         TIntermTyped* index = intermediate.addConstantUnion(Y, loc);
   2393 
   2394         TIntermTyped* lhsElement = intermediate.addIndex(EOpIndexDirect, tempSymL, index, loc);
   2395         TIntermTyped* rhsElement = intermediate.addIndex(EOpIndexDirect, tempSymR, index, loc);
   2396 
   2397         const TType derefType(right->getType(), 0);
   2398 
   2399         lhsElement->setType(derefType);
   2400         rhsElement->setType(derefType);
   2401 
   2402         TIntermTyped* yNeg = intermediate.addUnaryMath(EOpNegative, rhsElement, loc);
   2403 
   2404         assignList = intermediate.growAggregate(assignList, intermediate.addAssign(EOpAssign, lhsElement, yNeg, loc));
   2405     }
   2406 
   2407     // Assign the rhs temp (now with Y inversion) to the final output
   2408     {
   2409         TIntermTyped* rhsTempSym = intermediate.addSymbol(*rhsTempVar, loc);
   2410         assignList = intermediate.growAggregate(assignList, intermediate.addAssign(op, left, rhsTempSym, loc));
   2411     }
   2412 
   2413     assert(assignList != nullptr);
   2414     assignList->setOperator(EOpSequence);
   2415 
   2416     return assignList;
   2417 }
   2418 
   2419 // Clip and cull distance require special handling due to a semantic mismatch.  In HLSL,
   2420 // these can be float scalar, float vector, or arrays of float scalar or float vector.
   2421 // In SPIR-V, they are arrays of scalar floats in all cases.  We must copy individual components
   2422 // (e.g, both x and y components of a float2) out into the destination float array.
   2423 //
   2424 // The values are assigned to sequential members of the output array.  The inner dimension
   2425 // is vector components.  The outer dimension is array elements.
   2426 TIntermAggregate* HlslParseContext::assignClipCullDistance(const TSourceLoc& loc, TOperator op, int semanticId,
   2427                                                            TIntermTyped* left, TIntermTyped* right)
   2428 {
   2429     switch (language) {
   2430     case EShLangFragment:
   2431     case EShLangVertex:
   2432     case EShLangGeometry:
   2433         break;
   2434     default:
   2435         error(loc, "unimplemented: clip/cull not currently implemented for this stage", "", "");
   2436         return nullptr;
   2437     }
   2438 
   2439     TVariable** clipCullVar = nullptr;
   2440 
   2441     // Figure out if we are assigning to, or from, clip or cull distance.
   2442     const bool isOutput = isClipOrCullDistance(left->getType());
   2443 
   2444     // This is the rvalue or lvalue holding the clip or cull distance.
   2445     TIntermTyped* clipCullNode = isOutput ? left : right;
   2446     // This is the value going into or out of the clip or cull distance.
   2447     TIntermTyped* internalNode = isOutput ? right : left;
   2448 
   2449     const TBuiltInVariable builtInType = clipCullNode->getQualifier().builtIn;
   2450 
   2451     decltype(clipSemanticNSizeIn)* semanticNSize = nullptr;
   2452 
   2453     // Refer to either the clip or the cull distance, depending on semantic.
   2454     switch (builtInType) {
   2455     case EbvClipDistance:
   2456         clipCullVar = isOutput ? &clipDistanceOutput : &clipDistanceInput;
   2457         semanticNSize = isOutput ? &clipSemanticNSizeOut : &clipSemanticNSizeIn;
   2458         break;
   2459     case EbvCullDistance:
   2460         clipCullVar = isOutput ? &cullDistanceOutput : &cullDistanceInput;
   2461         semanticNSize = isOutput ? &cullSemanticNSizeOut : &cullSemanticNSizeIn;
   2462         break;
   2463 
   2464     // called invalidly: we expected a clip or a cull distance.
   2465     // static compile time problem: should not happen.
   2466     default: assert(0); return nullptr;
   2467     }
   2468 
   2469     // This is the offset in the destination array of a given semantic's data
   2470     std::array<int, maxClipCullRegs> semanticOffset;
   2471 
   2472     // Calculate offset of variable of semantic N in destination array
   2473     int arrayLoc = 0;
   2474     int vecItems = 0;
   2475 
   2476     for (int x = 0; x < maxClipCullRegs; ++x) {
   2477         // See if we overflowed the vec4 packing
   2478         if ((vecItems + (*semanticNSize)[x]) > 4) {
   2479             arrayLoc = (arrayLoc + 3) & (~0x3); // round up to next multiple of 4
   2480             vecItems = 0;
   2481         }
   2482 
   2483         semanticOffset[x] = arrayLoc;
   2484         vecItems += (*semanticNSize)[x];
   2485         arrayLoc += (*semanticNSize)[x];
   2486     }
   2487 
   2488 
   2489     // It can have up to 2 array dimensions (in the case of geometry shader inputs)
   2490     const TArraySizes* const internalArraySizes = internalNode->getType().getArraySizes();
   2491     const int internalArrayDims = internalNode->getType().isArray() ? internalArraySizes->getNumDims() : 0;
   2492     // vector sizes:
   2493     const int internalVectorSize = internalNode->getType().getVectorSize();
   2494     // array sizes, or 1 if it's not an array:
   2495     const int internalInnerArraySize = (internalArrayDims > 0 ? internalArraySizes->getDimSize(internalArrayDims-1) : 1);
   2496     const int internalOuterArraySize = (internalArrayDims > 1 ? internalArraySizes->getDimSize(0) : 1);
   2497 
   2498     // The created type may be an array of arrays, e.g, for geometry shader inputs.
   2499     const bool isImplicitlyArrayed = (language == EShLangGeometry && !isOutput);
   2500 
   2501     // If we haven't created the output already, create it now.
   2502     if (*clipCullVar == nullptr) {
   2503         // ClipDistance and CullDistance are handled specially in the entry point input/output copy
   2504         // algorithm, because they may need to be unpacked from components of vectors (or a scalar)
   2505         // into a float array, or vice versa.  Here, we make the array the right size and type,
   2506         // which depends on the incoming data, which has several potential dimensions:
   2507         //    * Semantic ID
   2508         //    * vector size
   2509         //    * array size
   2510         // Of those, semantic ID and array size cannot appear simultaneously.
   2511         //
   2512         // Also to note: for implicitly arrayed forms (e.g, geometry shader inputs), we need to create two
   2513         // array dimensions.  The shader's declaration may have one or two array dimensions.  One is always
   2514         // the geometry's dimension.
   2515 
   2516         const bool useInnerSize = internalArrayDims > 1 || !isImplicitlyArrayed;
   2517 
   2518         const int requiredInnerArraySize = arrayLoc * (useInnerSize ? internalInnerArraySize : 1);
   2519         const int requiredOuterArraySize = (internalArrayDims > 0) ? internalArraySizes->getDimSize(0) : 1;
   2520 
   2521         TType clipCullType(EbtFloat, clipCullNode->getType().getQualifier().storage, 1);
   2522         clipCullType.getQualifier() = clipCullNode->getType().getQualifier();
   2523 
   2524         // Create required array dimension
   2525         TArraySizes* arraySizes = new TArraySizes;
   2526         if (isImplicitlyArrayed)
   2527             arraySizes->addInnerSize(requiredOuterArraySize);
   2528         arraySizes->addInnerSize(requiredInnerArraySize);
   2529         clipCullType.transferArraySizes(arraySizes);
   2530 
   2531         // Obtain symbol name: we'll use that for the symbol we introduce.
   2532         TIntermSymbol* sym = clipCullNode->getAsSymbolNode();
   2533         assert(sym != nullptr);
   2534 
   2535         // We are moving the semantic ID from the layout location, so it is no longer needed or
   2536         // desired there.
   2537         clipCullType.getQualifier().layoutLocation = TQualifier::layoutLocationEnd;
   2538 
   2539         // Create variable and track its linkage
   2540         *clipCullVar = makeInternalVariable(sym->getName().c_str(), clipCullType);
   2541 
   2542         trackLinkage(**clipCullVar);
   2543     }
   2544 
   2545     // Create symbol for the clip or cull variable.
   2546     TIntermSymbol* clipCullSym = intermediate.addSymbol(**clipCullVar);
   2547 
   2548     // vector sizes:
   2549     const int clipCullVectorSize = clipCullSym->getType().getVectorSize();
   2550 
   2551     // array sizes, or 1 if it's not an array:
   2552     const TArraySizes* const clipCullArraySizes = clipCullSym->getType().getArraySizes();
   2553     const int clipCullOuterArraySize = isImplicitlyArrayed ? clipCullArraySizes->getDimSize(0) : 1;
   2554     const int clipCullInnerArraySize = clipCullArraySizes->getDimSize(isImplicitlyArrayed ? 1 : 0);
   2555 
   2556     // clipCullSym has got to be an array of scalar floats, per SPIR-V semantics.
   2557     // fixBuiltInIoType() should have handled that upstream.
   2558     assert(clipCullSym->getType().isArray());
   2559     assert(clipCullSym->getType().getVectorSize() == 1);
   2560     assert(clipCullSym->getType().getBasicType() == EbtFloat);
   2561 
   2562     // We may be creating multiple sub-assignments.  This is an aggregate to hold them.
   2563     // TODO: it would be possible to be clever sometimes and avoid the sequence node if not needed.
   2564     TIntermAggregate* assignList = nullptr;
   2565 
   2566     // Holds individual component assignments as we make them.
   2567     TIntermTyped* clipCullAssign = nullptr;
   2568 
   2569     // If the types are homomorphic, use a simple assign.  No need to mess about with
   2570     // individual components.
   2571     if (clipCullSym->getType().isArray() == internalNode->getType().isArray() &&
   2572         clipCullInnerArraySize == internalInnerArraySize &&
   2573         clipCullOuterArraySize == internalOuterArraySize &&
   2574         clipCullVectorSize == internalVectorSize) {
   2575 
   2576         if (isOutput)
   2577             clipCullAssign = intermediate.addAssign(op, clipCullSym, internalNode, loc);
   2578         else
   2579             clipCullAssign = intermediate.addAssign(op, internalNode, clipCullSym, loc);
   2580 
   2581         assignList = intermediate.growAggregate(assignList, clipCullAssign);
   2582         assignList->setOperator(EOpSequence);
   2583 
   2584         return assignList;
   2585     }
   2586 
   2587     // We are going to copy each component of the internal (per array element if indicated) to sequential
   2588     // array elements of the clipCullSym.  This tracks the lhs element we're writing to as we go along.
   2589     // We may be starting in the middle - e.g, for a non-zero semantic ID calculated above.
   2590     int clipCullInnerArrayPos = semanticOffset[semanticId];
   2591     int clipCullOuterArrayPos = 0;
   2592 
   2593     // Lambda to add an index to a node, set the type of the result, and return the new node.
   2594     const auto addIndex = [this, &loc](TIntermTyped* node, int pos) -> TIntermTyped* {
   2595         const TType derefType(node->getType(), 0);
   2596         node = intermediate.addIndex(EOpIndexDirect, node, intermediate.addConstantUnion(pos, loc), loc);
   2597         node->setType(derefType);
   2598         return node;
   2599     };
   2600 
   2601     // Loop through every component of every element of the internal, and copy to or from the matching external.
   2602     for (int internalOuterArrayPos = 0; internalOuterArrayPos < internalOuterArraySize; ++internalOuterArrayPos) {
   2603         for (int internalInnerArrayPos = 0; internalInnerArrayPos < internalInnerArraySize; ++internalInnerArrayPos) {
   2604             for (int internalComponent = 0; internalComponent < internalVectorSize; ++internalComponent) {
   2605                 // clip/cull array member to read from / write to:
   2606                 TIntermTyped* clipCullMember = clipCullSym;
   2607 
   2608                 // If implicitly arrayed, there is an outer array dimension involved
   2609                 if (isImplicitlyArrayed)
   2610                     clipCullMember = addIndex(clipCullMember, clipCullOuterArrayPos);
   2611 
   2612                 // Index into proper array position for clip cull member
   2613                 clipCullMember = addIndex(clipCullMember, clipCullInnerArrayPos++);
   2614 
   2615                 // if needed, start over with next outer array slice.
   2616                 if (isImplicitlyArrayed && clipCullInnerArrayPos >= clipCullInnerArraySize) {
   2617                     clipCullInnerArrayPos = semanticOffset[semanticId];
   2618                     ++clipCullOuterArrayPos;
   2619                 }
   2620 
   2621                 // internal member to read from / write to:
   2622                 TIntermTyped* internalMember = internalNode;
   2623 
   2624                 // If internal node has outer array dimension, index appropriately.
   2625                 if (internalArrayDims > 1)
   2626                     internalMember = addIndex(internalMember, internalOuterArrayPos);
   2627 
   2628                 // If internal node has inner array dimension, index appropriately.
   2629                 if (internalArrayDims > 0)
   2630                     internalMember = addIndex(internalMember, internalInnerArrayPos);
   2631 
   2632                 // If internal node is a vector, extract the component of interest.
   2633                 if (internalNode->getType().isVector())
   2634                     internalMember = addIndex(internalMember, internalComponent);
   2635 
   2636                 // Create an assignment: output from internal to clip cull, or input from clip cull to internal.
   2637                 if (isOutput)
   2638                     clipCullAssign = intermediate.addAssign(op, clipCullMember, internalMember, loc);
   2639                 else
   2640                     clipCullAssign = intermediate.addAssign(op, internalMember, clipCullMember, loc);
   2641 
   2642                 // Track assignment in the sequence.
   2643                 assignList = intermediate.growAggregate(assignList, clipCullAssign);
   2644             }
   2645         }
   2646     }
   2647 
   2648     assert(assignList != nullptr);
   2649     assignList->setOperator(EOpSequence);
   2650 
   2651     return assignList;
   2652 }
   2653 
   2654 // Some simple source assignments need to be flattened to a sequence
   2655 // of AST assignments. Catch these and flatten, otherwise, pass through
   2656 // to intermediate.addAssign().
   2657 //
   2658 // Also, assignment to matrix swizzles requires multiple component assignments,
   2659 // intercept those as well.
   2660 TIntermTyped* HlslParseContext::handleAssign(const TSourceLoc& loc, TOperator op, TIntermTyped* left,
   2661                                              TIntermTyped* right)
   2662 {
   2663     if (left == nullptr || right == nullptr)
   2664         return nullptr;
   2665 
   2666     // writing to opaques will require fixing transforms
   2667     if (left->getType().containsOpaque())
   2668         intermediate.setNeedsLegalization();
   2669 
   2670     if (left->getAsOperator() && left->getAsOperator()->getOp() == EOpMatrixSwizzle)
   2671         return handleAssignToMatrixSwizzle(loc, op, left, right);
   2672 
   2673     // Return true if the given node is an index operation into a split variable.
   2674     const auto indexesSplit = [this](const TIntermTyped* node) -> bool {
   2675         const TIntermBinary* binaryNode = node->getAsBinaryNode();
   2676 
   2677         if (binaryNode == nullptr)
   2678             return false;
   2679 
   2680         return (binaryNode->getOp() == EOpIndexDirect || binaryNode->getOp() == EOpIndexIndirect) &&
   2681                wasSplit(binaryNode->getLeft());
   2682     };
   2683 
   2684     // Return true if this stage assigns clip position with potentially inverted Y
   2685     const auto assignsClipPos = [this](const TIntermTyped* node) -> bool {
   2686         return node->getType().getQualifier().builtIn == EbvPosition &&
   2687                (language == EShLangVertex || language == EShLangGeometry || language == EShLangTessEvaluation);
   2688     };
   2689 
   2690     const bool isSplitLeft    = wasSplit(left) || indexesSplit(left);
   2691     const bool isSplitRight   = wasSplit(right) || indexesSplit(right);
   2692 
   2693     const bool isFlattenLeft  = wasFlattened(left);
   2694     const bool isFlattenRight = wasFlattened(right);
   2695 
   2696     // OK to do a single assign if neither side is split or flattened.  Otherwise,
   2697     // fall through to a member-wise copy.
   2698     if (!isFlattenLeft && !isFlattenRight && !isSplitLeft && !isSplitRight) {
   2699         // Clip and cull distance requires more processing.  See comment above assignClipCullDistance.
   2700         if (isClipOrCullDistance(left->getType()) || isClipOrCullDistance(right->getType())) {
   2701             const bool isOutput = isClipOrCullDistance(left->getType());
   2702 
   2703             const int semanticId = (isOutput ? left : right)->getType().getQualifier().layoutLocation;
   2704             return assignClipCullDistance(loc, op, semanticId, left, right);
   2705         } else if (assignsClipPos(left)) {
   2706             // Position can require special handling: see comment above assignPosition
   2707             return assignPosition(loc, op, left, right);
   2708         } else if (left->getQualifier().builtIn == EbvSampleMask) {
   2709             // Certain builtins are required to be arrayed outputs in SPIR-V, but may internally be scalars
   2710             // in the shader.  Copy the scalar RHS into the LHS array element zero, if that happens.
   2711             if (left->isArray() && !right->isArray()) {
   2712                 const TType derefType(left->getType(), 0);
   2713                 left = intermediate.addIndex(EOpIndexDirect, left, intermediate.addConstantUnion(0, loc), loc);
   2714                 left->setType(derefType);
   2715                 // Fall through to add assign.
   2716             }
   2717         }
   2718 
   2719         return intermediate.addAssign(op, left, right, loc);
   2720     }
   2721 
   2722     TIntermAggregate* assignList = nullptr;
   2723     const TVector<TVariable*>* leftVariables = nullptr;
   2724     const TVector<TVariable*>* rightVariables = nullptr;
   2725 
   2726     // A temporary to store the right node's value, so we don't keep indirecting into it
   2727     // if it's not a simple symbol.
   2728     TVariable* rhsTempVar = nullptr;
   2729 
   2730     // If the RHS is a simple symbol node, we'll copy it for each member.
   2731     TIntermSymbol* cloneSymNode = nullptr;
   2732 
   2733     int memberCount = 0;
   2734 
   2735     // Track how many items there are to copy.
   2736     if (left->getType().isStruct())
   2737         memberCount = (int)left->getType().getStruct()->size();
   2738     if (left->getType().isArray())
   2739         memberCount = left->getType().getCumulativeArraySize();
   2740 
   2741     if (isFlattenLeft)
   2742         leftVariables = &flattenMap.find(left->getAsSymbolNode()->getId())->second.members;
   2743 
   2744     if (isFlattenRight) {
   2745         rightVariables = &flattenMap.find(right->getAsSymbolNode()->getId())->second.members;
   2746     } else {
   2747         // The RHS is not flattened.  There are several cases:
   2748         // 1. 1 item to copy:  Use the RHS directly.
   2749         // 2. >1 item, simple symbol RHS: we'll create a new TIntermSymbol node for each, but no assign to temp.
   2750         // 3. >1 item, complex RHS: assign it to a new temp variable, and create a TIntermSymbol for each member.
   2751 
   2752         if (memberCount <= 1) {
   2753             // case 1: we'll use the symbol directly below.  Nothing to do.
   2754         } else {
   2755             if (right->getAsSymbolNode() != nullptr) {
   2756                 // case 2: we'll copy the symbol per iteration below.
   2757                 cloneSymNode = right->getAsSymbolNode();
   2758             } else {
   2759                 // case 3: assign to a temp, and indirect into that.
   2760                 rhsTempVar = makeInternalVariable("flattenTemp", right->getType());
   2761                 rhsTempVar->getWritableType().getQualifier().makeTemporary();
   2762                 TIntermTyped* noFlattenRHS = intermediate.addSymbol(*rhsTempVar, loc);
   2763 
   2764                 // Add this to the aggregate being built.
   2765                 assignList = intermediate.growAggregate(assignList,
   2766                                                         intermediate.addAssign(op, noFlattenRHS, right, loc), loc);
   2767             }
   2768         }
   2769     }
   2770 
   2771     // When dealing with split arrayed structures of built-ins, the arrayness is moved to the extracted built-in
   2772     // variables, which is awkward when copying between split and unsplit structures.  This variable tracks
   2773     // array indirections so they can be percolated from outer structs to inner variables.
   2774     std::vector <int> arrayElement;
   2775 
   2776     TStorageQualifier leftStorage = left->getType().getQualifier().storage;
   2777     TStorageQualifier rightStorage = right->getType().getQualifier().storage;
   2778 
   2779     int leftOffset = findSubtreeOffset(*left);
   2780     int rightOffset = findSubtreeOffset(*right);
   2781 
   2782     const auto getMember = [&](bool isLeft, const TType& type, int member, TIntermTyped* splitNode, int splitMember,
   2783                                bool flattened)
   2784                            -> TIntermTyped * {
   2785         const bool split     = isLeft ? isSplitLeft   : isSplitRight;
   2786 
   2787         TIntermTyped* subTree;
   2788         const TType derefType(type, member);
   2789         const TVariable* builtInVar = nullptr;
   2790         if ((flattened || split) && derefType.isBuiltIn()) {
   2791             auto splitPair = splitBuiltIns.find(HlslParseContext::tInterstageIoData(
   2792                                                    derefType.getQualifier().builtIn,
   2793                                                    isLeft ? leftStorage : rightStorage));
   2794             if (splitPair != splitBuiltIns.end())
   2795                 builtInVar = splitPair->second;
   2796         }
   2797         if (builtInVar != nullptr) {
   2798             // copy from interstage IO built-in if needed
   2799             subTree = intermediate.addSymbol(*builtInVar);
   2800 
   2801             if (subTree->getType().isArray()) {
   2802                 // Arrayness of builtIn symbols isn't handled by the normal recursion:
   2803                 // it's been extracted and moved to the built-in.
   2804                 if (!arrayElement.empty()) {
   2805                     const TType splitDerefType(subTree->getType(), arrayElement.back());
   2806                     subTree = intermediate.addIndex(EOpIndexDirect, subTree,
   2807                                                     intermediate.addConstantUnion(arrayElement.back(), loc), loc);
   2808                     subTree->setType(splitDerefType);
   2809                 } else if (splitNode->getAsOperator() != nullptr && (splitNode->getAsOperator()->getOp() == EOpIndexIndirect)) {
   2810                     // This might also be a stage with arrayed outputs, in which case there's an index
   2811                     // operation we should transfer to the output builtin.
   2812 
   2813                     const TType splitDerefType(subTree->getType(), 0);
   2814                     subTree = intermediate.addIndex(splitNode->getAsOperator()->getOp(), subTree,
   2815                                                     splitNode->getAsBinaryNode()->getRight(), loc);
   2816                     subTree->setType(splitDerefType);
   2817                 }
   2818             }
   2819         } else if (flattened && !shouldFlatten(derefType, isLeft ? leftStorage : rightStorage, false)) {
   2820             if (isLeft)
   2821                 subTree = intermediate.addSymbol(*(*leftVariables)[leftOffset++]);
   2822             else
   2823                 subTree = intermediate.addSymbol(*(*rightVariables)[rightOffset++]);
   2824         } else {
   2825             // Index operator if it's an aggregate, else EOpNull
   2826             const TOperator accessOp = type.isArray()  ? EOpIndexDirect
   2827                                      : type.isStruct() ? EOpIndexDirectStruct
   2828                                      : EOpNull;
   2829             if (accessOp == EOpNull) {
   2830                 subTree = splitNode;
   2831             } else {
   2832                 subTree = intermediate.addIndex(accessOp, splitNode, intermediate.addConstantUnion(splitMember, loc),
   2833                                                 loc);
   2834                 const TType splitDerefType(splitNode->getType(), splitMember);
   2835                 subTree->setType(splitDerefType);
   2836             }
   2837         }
   2838 
   2839         return subTree;
   2840     };
   2841 
   2842     // Use the proper RHS node: a new symbol from a TVariable, copy
   2843     // of an TIntermSymbol node, or sometimes the right node directly.
   2844     right = rhsTempVar != nullptr   ? intermediate.addSymbol(*rhsTempVar, loc) :
   2845             cloneSymNode != nullptr ? intermediate.addSymbol(*cloneSymNode) :
   2846             right;
   2847 
   2848     // Cannot use auto here, because this is recursive, and auto can't work out the type without seeing the
   2849     // whole thing.  So, we'll resort to an explicit type via std::function.
   2850     const std::function<void(TIntermTyped* left, TIntermTyped* right, TIntermTyped* splitLeft, TIntermTyped* splitRight,
   2851                              bool topLevel)>
   2852     traverse = [&](TIntermTyped* left, TIntermTyped* right, TIntermTyped* splitLeft, TIntermTyped* splitRight,
   2853                    bool topLevel) -> void {
   2854         // If we get here, we are assigning to or from a whole array or struct that must be
   2855         // flattened, so have to do member-by-member assignment:
   2856 
   2857         bool shouldFlattenSubsetLeft = isFlattenLeft && shouldFlatten(left->getType(), leftStorage, topLevel);
   2858         bool shouldFlattenSubsetRight = isFlattenRight && shouldFlatten(right->getType(), rightStorage, topLevel);
   2859 
   2860         if ((left->getType().isArray() || right->getType().isArray()) &&
   2861               (shouldFlattenSubsetLeft  || isSplitLeft ||
   2862                shouldFlattenSubsetRight || isSplitRight)) {
   2863             const int elementsL = left->getType().isArray()  ? left->getType().getOuterArraySize()  : 1;
   2864             const int elementsR = right->getType().isArray() ? right->getType().getOuterArraySize() : 1;
   2865 
   2866             // The arrays might not be the same size,
   2867             // e.g., if the size has been forced for EbvTessLevelInner/Outer.
   2868             const int elementsToCopy = std::min(elementsL, elementsR);
   2869 
   2870             // array case
   2871             for (int element = 0; element < elementsToCopy; ++element) {
   2872                 arrayElement.push_back(element);
   2873 
   2874                 // Add a new AST symbol node if we have a temp variable holding a complex RHS.
   2875                 TIntermTyped* subLeft  = getMember(true,  left->getType(),  element, left, element,
   2876                                                    shouldFlattenSubsetLeft);
   2877                 TIntermTyped* subRight = getMember(false, right->getType(), element, right, element,
   2878                                                    shouldFlattenSubsetRight);
   2879 
   2880                 TIntermTyped* subSplitLeft =  isSplitLeft  ? getMember(true,  left->getType(),  element, splitLeft,
   2881                                                                        element, shouldFlattenSubsetLeft)
   2882                                                            : subLeft;
   2883                 TIntermTyped* subSplitRight = isSplitRight ? getMember(false, right->getType(), element, splitRight,
   2884                                                                        element, shouldFlattenSubsetRight)
   2885                                                            : subRight;
   2886 
   2887                 traverse(subLeft, subRight, subSplitLeft, subSplitRight, false);
   2888 
   2889                 arrayElement.pop_back();
   2890             }
   2891         } else if (left->getType().isStruct() && (shouldFlattenSubsetLeft  || isSplitLeft ||
   2892                                                   shouldFlattenSubsetRight || isSplitRight)) {
   2893             // struct case
   2894             const auto& membersL = *left->getType().getStruct();
   2895             const auto& membersR = *right->getType().getStruct();
   2896 
   2897             // These track the members in the split structures corresponding to the same in the unsplit structures,
   2898             // which we traverse in parallel.
   2899             int memberL = 0;
   2900             int memberR = 0;
   2901 
   2902             // Handle empty structure assignment
   2903             if (int(membersL.size()) == 0 && int(membersR.size()) == 0)
   2904                 assignList = intermediate.growAggregate(assignList, intermediate.addAssign(op, left, right, loc), loc);
   2905 
   2906             for (int member = 0; member < int(membersL.size()); ++member) {
   2907                 const TType& typeL = *membersL[member].type;
   2908                 const TType& typeR = *membersR[member].type;
   2909 
   2910                 TIntermTyped* subLeft  = getMember(true,  left->getType(), member, left, member,
   2911                                                    shouldFlattenSubsetLeft);
   2912                 TIntermTyped* subRight = getMember(false, right->getType(), member, right, member,
   2913                                                    shouldFlattenSubsetRight);
   2914 
   2915                 // If there is no splitting, use the same values to avoid inefficiency.
   2916                 TIntermTyped* subSplitLeft =  isSplitLeft  ? getMember(true,  left->getType(),  member, splitLeft,
   2917                                                                        memberL, shouldFlattenSubsetLeft)
   2918                                                            : subLeft;
   2919                 TIntermTyped* subSplitRight = isSplitRight ? getMember(false, right->getType(), member, splitRight,
   2920                                                                        memberR, shouldFlattenSubsetRight)
   2921                                                            : subRight;
   2922 
   2923                 if (isClipOrCullDistance(subSplitLeft->getType()) || isClipOrCullDistance(subSplitRight->getType())) {
   2924                     // Clip and cull distance built-in assignment is complex in its own right, and is handled in
   2925                     // a separate function dedicated to that task.  See comment above assignClipCullDistance;
   2926 
   2927                     const bool isOutput = isClipOrCullDistance(subSplitLeft->getType());
   2928 
   2929                     // Since all clip/cull semantics boil down to the same built-in type, we need to get the
   2930                     // semantic ID from the dereferenced type's layout location, to avoid an N-1 mapping.
   2931                     const TType derefType((isOutput ? left : right)->getType(), member);
   2932                     const int semanticId = derefType.getQualifier().layoutLocation;
   2933 
   2934                     TIntermAggregate* clipCullAssign = assignClipCullDistance(loc, op, semanticId,
   2935                                                                               subSplitLeft, subSplitRight);
   2936 
   2937                     assignList = intermediate.growAggregate(assignList, clipCullAssign, loc);
   2938                 } else if (assignsClipPos(subSplitLeft)) {
   2939                     // Position can require special handling: see comment above assignPosition
   2940                     TIntermTyped* positionAssign = assignPosition(loc, op, subSplitLeft, subSplitRight);
   2941                     assignList = intermediate.growAggregate(assignList, positionAssign, loc);
   2942                 } else if (!shouldFlattenSubsetLeft && !shouldFlattenSubsetRight &&
   2943                            !typeL.containsBuiltIn() && !typeR.containsBuiltIn()) {
   2944                     // If this is the final flattening (no nested types below to flatten)
   2945                     // we'll copy the member, else recurse into the type hierarchy.
   2946                     // However, if splitting the struct, that means we can copy a whole
   2947                     // subtree here IFF it does not itself contain any interstage built-in
   2948                     // IO variables, so we only have to recurse into it if there's something
   2949                     // for splitting to do.  That can save a lot of AST verbosity for
   2950                     // a bunch of memberwise copies.
   2951 
   2952                     assignList = intermediate.growAggregate(assignList,
   2953                                                             intermediate.addAssign(op, subSplitLeft, subSplitRight, loc),
   2954                                                             loc);
   2955                 } else {
   2956                     traverse(subLeft, subRight, subSplitLeft, subSplitRight, false);
   2957                 }
   2958 
   2959                 memberL += (typeL.isBuiltIn() ? 0 : 1);
   2960                 memberR += (typeR.isBuiltIn() ? 0 : 1);
   2961             }
   2962         } else {
   2963             // Member copy
   2964             assignList = intermediate.growAggregate(assignList, intermediate.addAssign(op, left, right, loc), loc);
   2965         }
   2966 
   2967     };
   2968 
   2969     TIntermTyped* splitLeft  = left;
   2970     TIntermTyped* splitRight = right;
   2971 
   2972     // If either left or right was a split structure, we must read or write it, but still have to
   2973     // parallel-recurse through the unsplit structure to identify the built-in IO vars.
   2974     // The left can be either a symbol, or an index into a symbol (e.g, array reference)
   2975     if (isSplitLeft) {
   2976         if (indexesSplit(left)) {
   2977             // Index case: Refer to the indexed symbol, if the left is an index operator.
   2978             const TIntermSymbol* symNode = left->getAsBinaryNode()->getLeft()->getAsSymbolNode();
   2979 
   2980             TIntermTyped* splitLeftNonIo = intermediate.addSymbol(*getSplitNonIoVar(symNode->getId()), loc);
   2981 
   2982             splitLeft = intermediate.addIndex(left->getAsBinaryNode()->getOp(), splitLeftNonIo,
   2983                                               left->getAsBinaryNode()->getRight(), loc);
   2984 
   2985             const TType derefType(splitLeftNonIo->getType(), 0);
   2986             splitLeft->setType(derefType);
   2987         } else {
   2988             // Symbol case: otherwise, if not indexed, we have the symbol directly.
   2989             const TIntermSymbol* symNode = left->getAsSymbolNode();
   2990             splitLeft = intermediate.addSymbol(*getSplitNonIoVar(symNode->getId()), loc);
   2991         }
   2992     }
   2993 
   2994     if (isSplitRight)
   2995         splitRight = intermediate.addSymbol(*getSplitNonIoVar(right->getAsSymbolNode()->getId()), loc);
   2996 
   2997     // This makes the whole assignment, recursing through subtypes as needed.
   2998     traverse(left, right, splitLeft, splitRight, true);
   2999 
   3000     assert(assignList != nullptr);
   3001     assignList->setOperator(EOpSequence);
   3002 
   3003     return assignList;
   3004 }
   3005 
   3006 // An assignment to matrix swizzle must be decomposed into individual assignments.
   3007 // These must be selected component-wise from the RHS and stored component-wise
   3008 // into the LHS.
   3009 TIntermTyped* HlslParseContext::handleAssignToMatrixSwizzle(const TSourceLoc& loc, TOperator op, TIntermTyped* left,
   3010                                                             TIntermTyped* right)
   3011 {
   3012     assert(left->getAsOperator() && left->getAsOperator()->getOp() == EOpMatrixSwizzle);
   3013 
   3014     if (op != EOpAssign)
   3015         error(loc, "only simple assignment to non-simple matrix swizzle is supported", "assign", "");
   3016 
   3017     // isolate the matrix and swizzle nodes
   3018     TIntermTyped* matrix = left->getAsBinaryNode()->getLeft()->getAsTyped();
   3019     const TIntermSequence& swizzle = left->getAsBinaryNode()->getRight()->getAsAggregate()->getSequence();
   3020 
   3021     // if the RHS isn't already a simple vector, let's store into one
   3022     TIntermSymbol* vector = right->getAsSymbolNode();
   3023     TIntermTyped* vectorAssign = nullptr;
   3024     if (vector == nullptr) {
   3025         // create a new intermediate vector variable to assign to
   3026         TType vectorType(matrix->getBasicType(), EvqTemporary, matrix->getQualifier().precision, (int)swizzle.size()/2);
   3027         vector = intermediate.addSymbol(*makeInternalVariable("intermVec", vectorType), loc);
   3028 
   3029         // assign the right to the new vector
   3030         vectorAssign = handleAssign(loc, op, vector, right);
   3031     }
   3032 
   3033     // Assign the vector components to the matrix components.
   3034     // Store this as a sequence, so a single aggregate node represents this
   3035     // entire operation.
   3036     TIntermAggregate* result = intermediate.makeAggregate(vectorAssign);
   3037     TType columnType(matrix->getType(), 0);
   3038     TType componentType(columnType, 0);
   3039     TType indexType(EbtInt);
   3040     for (int i = 0; i < (int)swizzle.size(); i += 2) {
   3041         // the right component, single index into the RHS vector
   3042         TIntermTyped* rightComp = intermediate.addIndex(EOpIndexDirect, vector,
   3043                                     intermediate.addConstantUnion(i/2, loc), loc);
   3044 
   3045         // the left component, double index into the LHS matrix
   3046         TIntermTyped* leftComp = intermediate.addIndex(EOpIndexDirect, matrix,
   3047                                     intermediate.addConstantUnion(swizzle[i]->getAsConstantUnion()->getConstArray(),
   3048                                                                   indexType, loc),
   3049                                     loc);
   3050         leftComp->setType(columnType);
   3051         leftComp = intermediate.addIndex(EOpIndexDirect, leftComp,
   3052                                     intermediate.addConstantUnion(swizzle[i+1]->getAsConstantUnion()->getConstArray(),
   3053                                                                   indexType, loc),
   3054                                     loc);
   3055         leftComp->setType(componentType);
   3056 
   3057         // Add the assignment to the aggregate
   3058         result = intermediate.growAggregate(result, intermediate.addAssign(op, leftComp, rightComp, loc));
   3059     }
   3060 
   3061     result->setOp(EOpSequence);
   3062 
   3063     return result;
   3064 }
   3065 
   3066 //
   3067 // HLSL atomic operations have slightly different arguments than
   3068 // GLSL/AST/SPIRV.  The semantics are converted below in decomposeIntrinsic.
   3069 // This provides the post-decomposition equivalent opcode.
   3070 //
   3071 TOperator HlslParseContext::mapAtomicOp(const TSourceLoc& loc, TOperator op, bool isImage)
   3072 {
   3073     switch (op) {
   3074     case EOpInterlockedAdd:             return isImage ? EOpImageAtomicAdd      : EOpAtomicAdd;
   3075     case EOpInterlockedAnd:             return isImage ? EOpImageAtomicAnd      : EOpAtomicAnd;
   3076     case EOpInterlockedCompareExchange: return isImage ? EOpImageAtomicCompSwap : EOpAtomicCompSwap;
   3077     case EOpInterlockedMax:             return isImage ? EOpImageAtomicMax      : EOpAtomicMax;
   3078     case EOpInterlockedMin:             return isImage ? EOpImageAtomicMin      : EOpAtomicMin;
   3079     case EOpInterlockedOr:              return isImage ? EOpImageAtomicOr       : EOpAtomicOr;
   3080     case EOpInterlockedXor:             return isImage ? EOpImageAtomicXor      : EOpAtomicXor;
   3081     case EOpInterlockedExchange:        return isImage ? EOpImageAtomicExchange : EOpAtomicExchange;
   3082     case EOpInterlockedCompareStore:  // TODO: ...
   3083     default:
   3084         error(loc, "unknown atomic operation", "unknown op", "");
   3085         return EOpNull;
   3086     }
   3087 }
   3088 
   3089 //
   3090 // Create a combined sampler/texture from separate sampler and texture.
   3091 //
   3092 TIntermAggregate* HlslParseContext::handleSamplerTextureCombine(const TSourceLoc& loc, TIntermTyped* argTex,
   3093                                                                 TIntermTyped* argSampler)
   3094 {
   3095     TIntermAggregate* txcombine = new TIntermAggregate(EOpConstructTextureSampler);
   3096 
   3097     txcombine->getSequence().push_back(argTex);
   3098     txcombine->getSequence().push_back(argSampler);
   3099 
   3100     TSampler samplerType = argTex->getType().getSampler();
   3101     samplerType.combined = true;
   3102 
   3103     // TODO:
   3104     // This block exists until the spec no longer requires shadow modes on texture objects.
   3105     // It can be deleted after that, along with the shadowTextureVariant member.
   3106     {
   3107         const bool shadowMode = argSampler->getType().getSampler().shadow;
   3108 
   3109         TIntermSymbol* texSymbol = argTex->getAsSymbolNode();
   3110 
   3111         if (texSymbol == nullptr)
   3112             texSymbol = argTex->getAsBinaryNode()->getLeft()->getAsSymbolNode();
   3113 
   3114         if (texSymbol == nullptr) {
   3115             error(loc, "unable to find texture symbol", "", "");
   3116             return nullptr;
   3117         }
   3118 
   3119         // This forces the texture's shadow state to be the sampler's
   3120         // shadow state.  This depends on downstream optimization to
   3121         // DCE one variant in [shadow, nonshadow] if both are present,
   3122         // or the SPIR-V module would be invalid.
   3123         int newId = texSymbol->getId();
   3124 
   3125         // Check to see if this texture has been given a shadow mode already.
   3126         // If so, look up the one we already have.
   3127         const auto textureShadowEntry = textureShadowVariant.find(texSymbol->getId());
   3128 
   3129         if (textureShadowEntry != textureShadowVariant.end())
   3130             newId = textureShadowEntry->second->get(shadowMode);
   3131         else
   3132             textureShadowVariant[texSymbol->getId()] = new tShadowTextureSymbols;
   3133 
   3134         // Sometimes we have to create another symbol (if this texture has been seen before,
   3135         // and we haven't created the form for this shadow mode).
   3136         if (newId == -1) {
   3137             TType texType;
   3138             texType.shallowCopy(argTex->getType());
   3139             texType.getSampler().shadow = shadowMode;  // set appropriate shadow mode.
   3140             globalQualifierFix(loc, texType.getQualifier());
   3141 
   3142             TVariable* newTexture = makeInternalVariable(texSymbol->getName(), texType);
   3143 
   3144             trackLinkage(*newTexture);
   3145 
   3146             newId = newTexture->getUniqueId();
   3147         }
   3148 
   3149         assert(newId != -1);
   3150 
   3151         if (textureShadowVariant.find(newId) == textureShadowVariant.end())
   3152             textureShadowVariant[newId] = textureShadowVariant[texSymbol->getId()];
   3153 
   3154         textureShadowVariant[newId]->set(shadowMode, newId);
   3155 
   3156         // Remember this shadow mode in the texture and the merged type.
   3157         argTex->getWritableType().getSampler().shadow = shadowMode;
   3158         samplerType.shadow = shadowMode;
   3159 
   3160         texSymbol->switchId(newId);
   3161     }
   3162 
   3163     txcombine->setType(TType(samplerType, EvqTemporary));
   3164     txcombine->setLoc(loc);
   3165 
   3166     return txcombine;
   3167 }
   3168 
   3169 // Return true if this a buffer type that has an associated counter buffer.
   3170 bool HlslParseContext::hasStructBuffCounter(const TType& type) const
   3171 {
   3172     switch (type.getQualifier().declaredBuiltIn) {
   3173     case EbvAppendConsume:       // fall through...
   3174     case EbvRWStructuredBuffer:  // ...
   3175         return true;
   3176     default:
   3177         return false; // the other structuredbuffer types do not have a counter.
   3178     }
   3179 }
   3180 
   3181 void HlslParseContext::counterBufferType(const TSourceLoc& loc, TType& type)
   3182 {
   3183     // Counter type
   3184     TType* counterType = new TType(EbtUint, EvqBuffer);
   3185     counterType->setFieldName(intermediate.implicitCounterName);
   3186 
   3187     TTypeList* blockStruct = new TTypeList;
   3188     TTypeLoc  member = { counterType, loc };
   3189     blockStruct->push_back(member);
   3190 
   3191     TType blockType(blockStruct, "", counterType->getQualifier());
   3192     blockType.getQualifier().storage = EvqBuffer;
   3193 
   3194     type.shallowCopy(blockType);
   3195     shareStructBufferType(type);
   3196 }
   3197 
   3198 // declare counter for a structured buffer type
   3199 void HlslParseContext::declareStructBufferCounter(const TSourceLoc& loc, const TType& bufferType, const TString& name)
   3200 {
   3201     // Bail out if not a struct buffer
   3202     if (! isStructBufferType(bufferType))
   3203         return;
   3204 
   3205     if (! hasStructBuffCounter(bufferType))
   3206         return;
   3207 
   3208     TType blockType;
   3209     counterBufferType(loc, blockType);
   3210 
   3211     TString* blockName = new TString(intermediate.addCounterBufferName(name));
   3212 
   3213     // Counter buffer is not yet in use
   3214     structBufferCounter[*blockName] = false;
   3215 
   3216     shareStructBufferType(blockType);
   3217     declareBlock(loc, blockType, blockName);
   3218 }
   3219 
   3220 // return the counter that goes with a given structuredbuffer
   3221 TIntermTyped* HlslParseContext::getStructBufferCounter(const TSourceLoc& loc, TIntermTyped* buffer)
   3222 {
   3223     // Bail out if not a struct buffer
   3224     if (buffer == nullptr || ! isStructBufferType(buffer->getType()))
   3225         return nullptr;
   3226 
   3227     const TString counterBlockName(intermediate.addCounterBufferName(buffer->getAsSymbolNode()->getName()));
   3228 
   3229     // Mark the counter as being used
   3230     structBufferCounter[counterBlockName] = true;
   3231 
   3232     TIntermTyped* counterVar = handleVariable(loc, &counterBlockName);  // find the block structure
   3233     TIntermTyped* index = intermediate.addConstantUnion(0, loc); // index to counter inside block struct
   3234 
   3235     TIntermTyped* counterMember = intermediate.addIndex(EOpIndexDirectStruct, counterVar, index, loc);
   3236     counterMember->setType(TType(EbtUint));
   3237     return counterMember;
   3238 }
   3239 
   3240 //
   3241 // Decompose structure buffer methods into AST
   3242 //
   3243 void HlslParseContext::decomposeStructBufferMethods(const TSourceLoc& loc, TIntermTyped*& node, TIntermNode* arguments)
   3244 {
   3245     if (node == nullptr || node->getAsOperator() == nullptr || arguments == nullptr)
   3246         return;
   3247 
   3248     const TOperator op  = node->getAsOperator()->getOp();
   3249     TIntermAggregate* argAggregate = arguments->getAsAggregate();
   3250 
   3251     // Buffer is the object upon which method is called, so always arg 0
   3252     TIntermTyped* bufferObj = nullptr;
   3253 
   3254     // The parameters can be an aggregate, or just a the object as a symbol if there are no fn params.
   3255     if (argAggregate) {
   3256         if (argAggregate->getSequence().empty())
   3257             return;
   3258         bufferObj = argAggregate->getSequence()[0]->getAsTyped();
   3259     } else {
   3260         bufferObj = arguments->getAsSymbolNode();
   3261     }
   3262 
   3263     if (bufferObj == nullptr || bufferObj->getAsSymbolNode() == nullptr)
   3264         return;
   3265 
   3266     // Some methods require a hidden internal counter, obtained via getStructBufferCounter().
   3267     // This lambda adds something to it and returns the old value.
   3268     const auto incDecCounter = [&](int incval) -> TIntermTyped* {
   3269         TIntermTyped* incrementValue = intermediate.addConstantUnion(static_cast<unsigned int>(incval), loc, true);
   3270         TIntermTyped* counter = getStructBufferCounter(loc, bufferObj); // obtain the counter member
   3271 
   3272         if (counter == nullptr)
   3273             return nullptr;
   3274 
   3275         TIntermAggregate* counterIncrement = new TIntermAggregate(EOpAtomicAdd);
   3276         counterIncrement->setType(TType(EbtUint, EvqTemporary));
   3277         counterIncrement->setLoc(loc);
   3278         counterIncrement->getSequence().push_back(counter);
   3279         counterIncrement->getSequence().push_back(incrementValue);
   3280 
   3281         return counterIncrement;
   3282     };
   3283 
   3284     // Index to obtain the runtime sized array out of the buffer.
   3285     TIntermTyped* argArray = indexStructBufferContent(loc, bufferObj);
   3286     if (argArray == nullptr)
   3287         return;  // It might not be a struct buffer method.
   3288 
   3289     switch (op) {
   3290     case EOpMethodLoad:
   3291         {
   3292             TIntermTyped* argIndex = makeIntegerIndex(argAggregate->getSequence()[1]->getAsTyped());  // index
   3293 
   3294             const TType& bufferType = bufferObj->getType();
   3295 
   3296             const TBuiltInVariable builtInType = bufferType.getQualifier().declaredBuiltIn;
   3297 
   3298             // Byte address buffers index in bytes (only multiples of 4 permitted... not so much a byte address
   3299             // buffer then, but that's what it calls itself.
   3300             const bool isByteAddressBuffer = (builtInType == EbvByteAddressBuffer   ||
   3301                                               builtInType == EbvRWByteAddressBuffer);
   3302 
   3303 
   3304             if (isByteAddressBuffer)
   3305                 argIndex = intermediate.addBinaryNode(EOpRightShift, argIndex,
   3306                                                       intermediate.addConstantUnion(2, loc, true),
   3307                                                       loc, TType(EbtInt));
   3308 
   3309             // Index into the array to find the item being loaded.
   3310             const TOperator idxOp = (argIndex->getQualifier().storage == EvqConst) ? EOpIndexDirect : EOpIndexIndirect;
   3311 
   3312             node = intermediate.addIndex(idxOp, argArray, argIndex, loc);
   3313 
   3314             const TType derefType(argArray->getType(), 0);
   3315             node->setType(derefType);
   3316         }
   3317 
   3318         break;
   3319 
   3320     case EOpMethodLoad2:
   3321     case EOpMethodLoad3:
   3322     case EOpMethodLoad4:
   3323         {
   3324             TIntermTyped* argIndex = makeIntegerIndex(argAggregate->getSequence()[1]->getAsTyped());  // index
   3325 
   3326             TOperator constructOp = EOpNull;
   3327             int size = 0;
   3328 
   3329             switch (op) {
   3330             case EOpMethodLoad2: size = 2; constructOp = EOpConstructVec2; break;
   3331             case EOpMethodLoad3: size = 3; constructOp = EOpConstructVec3; break;
   3332             case EOpMethodLoad4: size = 4; constructOp = EOpConstructVec4; break;
   3333             default: assert(0);
   3334             }
   3335 
   3336             TIntermTyped* body = nullptr;
   3337 
   3338             // First, we'll store the address in a variable to avoid multiple shifts
   3339             // (we must convert the byte address to an item address)
   3340             TIntermTyped* byteAddrIdx = intermediate.addBinaryNode(EOpRightShift, argIndex,
   3341                                                                    intermediate.addConstantUnion(2, loc, true),
   3342                                                                    loc, TType(EbtInt));
   3343 
   3344             TVariable* byteAddrSym = makeInternalVariable("byteAddrTemp", TType(EbtInt, EvqTemporary));
   3345             TIntermTyped* byteAddrIdxVar = intermediate.addSymbol(*byteAddrSym, loc);
   3346 
   3347             body = intermediate.growAggregate(body, intermediate.addAssign(EOpAssign, byteAddrIdxVar, byteAddrIdx, loc));
   3348 
   3349             TIntermTyped* vec = nullptr;
   3350 
   3351             // These are only valid on (rw)byteaddressbuffers, so we can always perform the >>2
   3352             // address conversion.
   3353             for (int idx=0; idx<size; ++idx) {
   3354                 TIntermTyped* offsetIdx = byteAddrIdxVar;
   3355 
   3356                 // add index offset
   3357                 if (idx != 0)
   3358                     offsetIdx = intermediate.addBinaryNode(EOpAdd, offsetIdx,
   3359                                                            intermediate.addConstantUnion(idx, loc, true),
   3360                                                            loc, TType(EbtInt));
   3361 
   3362                 const TOperator idxOp = (offsetIdx->getQualifier().storage == EvqConst) ? EOpIndexDirect
   3363                                                                                         : EOpIndexIndirect;
   3364 
   3365                 TIntermTyped* indexVal = intermediate.addIndex(idxOp, argArray, offsetIdx, loc);
   3366 
   3367                 TType derefType(argArray->getType(), 0);
   3368                 derefType.getQualifier().makeTemporary();
   3369                 indexVal->setType(derefType);
   3370 
   3371                 vec = intermediate.growAggregate(vec, indexVal);
   3372             }
   3373 
   3374             vec->setType(TType(argArray->getBasicType(), EvqTemporary, size));
   3375             vec->getAsAggregate()->setOperator(constructOp);
   3376 
   3377             body = intermediate.growAggregate(body, vec);
   3378             body->setType(vec->getType());
   3379             body->getAsAggregate()->setOperator(EOpSequence);
   3380 
   3381             node = body;
   3382         }
   3383 
   3384         break;
   3385 
   3386     case EOpMethodStore:
   3387     case EOpMethodStore2:
   3388     case EOpMethodStore3:
   3389     case EOpMethodStore4:
   3390         {
   3391             TIntermTyped* argIndex = makeIntegerIndex(argAggregate->getSequence()[1]->getAsTyped());  // index
   3392             TIntermTyped* argValue = argAggregate->getSequence()[2]->getAsTyped();  // value
   3393 
   3394             // Index into the array to find the item being loaded.
   3395             // Byte address buffers index in bytes (only multiples of 4 permitted... not so much a byte address
   3396             // buffer then, but that's what it calls itself).
   3397 
   3398             int size = 0;
   3399 
   3400             switch (op) {
   3401             case EOpMethodStore:  size = 1; break;
   3402             case EOpMethodStore2: size = 2; break;
   3403             case EOpMethodStore3: size = 3; break;
   3404             case EOpMethodStore4: size = 4; break;
   3405             default: assert(0);
   3406             }
   3407 
   3408             TIntermAggregate* body = nullptr;
   3409 
   3410             // First, we'll store the address in a variable to avoid multiple shifts
   3411             // (we must convert the byte address to an item address)
   3412             TIntermTyped* byteAddrIdx = intermediate.addBinaryNode(EOpRightShift, argIndex,
   3413                                                                    intermediate.addConstantUnion(2, loc, true), loc, TType(EbtInt));
   3414 
   3415             TVariable* byteAddrSym = makeInternalVariable("byteAddrTemp", TType(EbtInt, EvqTemporary));
   3416             TIntermTyped* byteAddrIdxVar = intermediate.addSymbol(*byteAddrSym, loc);
   3417 
   3418             body = intermediate.growAggregate(body, intermediate.addAssign(EOpAssign, byteAddrIdxVar, byteAddrIdx, loc));
   3419 
   3420             for (int idx=0; idx<size; ++idx) {
   3421                 TIntermTyped* offsetIdx = byteAddrIdxVar;
   3422                 TIntermTyped* idxConst = intermediate.addConstantUnion(idx, loc, true);
   3423 
   3424                 // add index offset
   3425                 if (idx != 0)
   3426                     offsetIdx = intermediate.addBinaryNode(EOpAdd, offsetIdx, idxConst, loc, TType(EbtInt));
   3427 
   3428                 const TOperator idxOp = (offsetIdx->getQualifier().storage == EvqConst) ? EOpIndexDirect
   3429                                                                                         : EOpIndexIndirect;
   3430 
   3431                 TIntermTyped* lValue = intermediate.addIndex(idxOp, argArray, offsetIdx, loc);
   3432                 const TType derefType(argArray->getType(), 0);
   3433                 lValue->setType(derefType);
   3434 
   3435                 TIntermTyped* rValue;
   3436                 if (size == 1) {
   3437                     rValue = argValue;
   3438                 } else {
   3439                     rValue = intermediate.addIndex(EOpIndexDirect, argValue, idxConst, loc);
   3440                     const TType indexType(argValue->getType(), 0);
   3441                     rValue->setType(indexType);
   3442                 }
   3443 
   3444                 TIntermTyped* assign = intermediate.addAssign(EOpAssign, lValue, rValue, loc);
   3445 
   3446                 body = intermediate.growAggregate(body, assign);
   3447             }
   3448 
   3449             body->setOperator(EOpSequence);
   3450             node = body;
   3451         }
   3452 
   3453         break;
   3454 
   3455     case EOpMethodGetDimensions:
   3456         {
   3457             const int numArgs = (int)argAggregate->getSequence().size();
   3458             TIntermTyped* argNumItems = argAggregate->getSequence()[1]->getAsTyped();  // out num items
   3459             TIntermTyped* argStride   = numArgs > 2 ? argAggregate->getSequence()[2]->getAsTyped() : nullptr;  // out stride
   3460 
   3461             TIntermAggregate* body = nullptr;
   3462 
   3463             // Length output:
   3464             if (argArray->getType().isSizedArray()) {
   3465                 const int length = argArray->getType().getOuterArraySize();
   3466                 TIntermTyped* assign = intermediate.addAssign(EOpAssign, argNumItems,
   3467                                                               intermediate.addConstantUnion(length, loc, true), loc);
   3468                 body = intermediate.growAggregate(body, assign, loc);
   3469             } else {
   3470                 TIntermTyped* lengthCall = intermediate.addBuiltInFunctionCall(loc, EOpArrayLength, true, argArray,
   3471                                                                                argNumItems->getType());
   3472                 TIntermTyped* assign = intermediate.addAssign(EOpAssign, argNumItems, lengthCall, loc);
   3473                 body = intermediate.growAggregate(body, assign, loc);
   3474             }
   3475 
   3476             // Stride output:
   3477             if (argStride != nullptr) {
   3478                 int size;
   3479                 int stride;
   3480                 intermediate.getMemberAlignment(argArray->getType(), size, stride, argArray->getType().getQualifier().layoutPacking,
   3481                                                 argArray->getType().getQualifier().layoutMatrix == ElmRowMajor);
   3482 
   3483                 TIntermTyped* assign = intermediate.addAssign(EOpAssign, argStride,
   3484                                                               intermediate.addConstantUnion(stride, loc, true), loc);
   3485 
   3486                 body = intermediate.growAggregate(body, assign);
   3487             }
   3488 
   3489             body->setOperator(EOpSequence);
   3490             node = body;
   3491         }
   3492 
   3493         break;
   3494 
   3495     case EOpInterlockedAdd:
   3496     case EOpInterlockedAnd:
   3497     case EOpInterlockedExchange:
   3498     case EOpInterlockedMax:
   3499     case EOpInterlockedMin:
   3500     case EOpInterlockedOr:
   3501     case EOpInterlockedXor:
   3502     case EOpInterlockedCompareExchange:
   3503     case EOpInterlockedCompareStore:
   3504         {
   3505             // We'll replace the first argument with the block dereference, and let
   3506             // downstream decomposition handle the rest.
   3507 
   3508             TIntermSequence& sequence = argAggregate->getSequence();
   3509 
   3510             TIntermTyped* argIndex     = makeIntegerIndex(sequence[1]->getAsTyped());  // index
   3511             argIndex = intermediate.addBinaryNode(EOpRightShift, argIndex, intermediate.addConstantUnion(2, loc, true),
   3512                                                   loc, TType(EbtInt));
   3513 
   3514             const TOperator idxOp = (argIndex->getQualifier().storage == EvqConst) ? EOpIndexDirect : EOpIndexIndirect;
   3515             TIntermTyped* element = intermediate.addIndex(idxOp, argArray, argIndex, loc);
   3516 
   3517             const TType derefType(argArray->getType(), 0);
   3518             element->setType(derefType);
   3519 
   3520             // Replace the numeric byte offset parameter with array reference.
   3521             sequence[1] = element;
   3522             sequence.erase(sequence.begin(), sequence.begin()+1);
   3523         }
   3524         break;
   3525 
   3526     case EOpMethodIncrementCounter:
   3527         {
   3528             node = incDecCounter(1);
   3529             break;
   3530         }
   3531 
   3532     case EOpMethodDecrementCounter:
   3533         {
   3534             TIntermTyped* preIncValue = incDecCounter(-1); // result is original value
   3535             node = intermediate.addBinaryNode(EOpAdd, preIncValue, intermediate.addConstantUnion(-1, loc, true), loc,
   3536                                               preIncValue->getType());
   3537             break;
   3538         }
   3539 
   3540     case EOpMethodAppend:
   3541         {
   3542             TIntermTyped* oldCounter = incDecCounter(1);
   3543 
   3544             TIntermTyped* lValue = intermediate.addIndex(EOpIndexIndirect, argArray, oldCounter, loc);
   3545             TIntermTyped* rValue = argAggregate->getSequence()[1]->getAsTyped();
   3546 
   3547             const TType derefType(argArray->getType(), 0);
   3548             lValue->setType(derefType);
   3549 
   3550             node = intermediate.addAssign(EOpAssign, lValue, rValue, loc);
   3551 
   3552             break;
   3553         }
   3554 
   3555     case EOpMethodConsume:
   3556         {
   3557             TIntermTyped* oldCounter = incDecCounter(-1);
   3558 
   3559             TIntermTyped* newCounter = intermediate.addBinaryNode(EOpAdd, oldCounter,
   3560                                                                   intermediate.addConstantUnion(-1, loc, true), loc,
   3561                                                                   oldCounter->getType());
   3562 
   3563             node = intermediate.addIndex(EOpIndexIndirect, argArray, newCounter, loc);
   3564 
   3565             const TType derefType(argArray->getType(), 0);
   3566             node->setType(derefType);
   3567 
   3568             break;
   3569         }
   3570 
   3571     default:
   3572         break; // most pass through unchanged
   3573     }
   3574 }
   3575 
   3576 // Create array of standard sample positions for given sample count.
   3577 // TODO: remove when a real method to query sample pos exists in SPIR-V.
   3578 TIntermConstantUnion* HlslParseContext::getSamplePosArray(int count)
   3579 {
   3580     struct tSamplePos { float x, y; };
   3581 
   3582     static const tSamplePos pos1[] = {
   3583         { 0.0/16.0,  0.0/16.0 },
   3584     };
   3585 
   3586     // standard sample positions for 2, 4, 8, and 16 samples.
   3587     static const tSamplePos pos2[] = {
   3588         { 4.0/16.0,  4.0/16.0 }, {-4.0/16.0, -4.0/16.0 },
   3589     };
   3590 
   3591     static const tSamplePos pos4[] = {
   3592         {-2.0/16.0, -6.0/16.0 }, { 6.0/16.0, -2.0/16.0 }, {-6.0/16.0,  2.0/16.0 }, { 2.0/16.0,  6.0/16.0 },
   3593     };
   3594 
   3595     static const tSamplePos pos8[] = {
   3596         { 1.0/16.0, -3.0/16.0 }, {-1.0/16.0,  3.0/16.0 }, { 5.0/16.0,  1.0/16.0 }, {-3.0/16.0, -5.0/16.0 },
   3597         {-5.0/16.0,  5.0/16.0 }, {-7.0/16.0, -1.0/16.0 }, { 3.0/16.0,  7.0/16.0 }, { 7.0/16.0, -7.0/16.0 },
   3598     };
   3599 
   3600     static const tSamplePos pos16[] = {
   3601         { 1.0/16.0,  1.0/16.0 }, {-1.0/16.0, -3.0/16.0 }, {-3.0/16.0,  2.0/16.0 }, { 4.0/16.0, -1.0/16.0 },
   3602         {-5.0/16.0, -2.0/16.0 }, { 2.0/16.0,  5.0/16.0 }, { 5.0/16.0,  3.0/16.0 }, { 3.0/16.0, -5.0/16.0 },
   3603         {-2.0/16.0,  6.0/16.0 }, { 0.0/16.0, -7.0/16.0 }, {-4.0/16.0, -6.0/16.0 }, {-6.0/16.0,  4.0/16.0 },
   3604         {-8.0/16.0,  0.0/16.0 }, { 7.0/16.0, -4.0/16.0 }, { 6.0/16.0,  7.0/16.0 }, {-7.0/16.0, -8.0/16.0 },
   3605     };
   3606 
   3607     const tSamplePos* sampleLoc = nullptr;
   3608     int numSamples = count;
   3609 
   3610     switch (count) {
   3611     case 2:  sampleLoc = pos2;  break;
   3612     case 4:  sampleLoc = pos4;  break;
   3613     case 8:  sampleLoc = pos8;  break;
   3614     case 16: sampleLoc = pos16; break;
   3615     default:
   3616         sampleLoc = pos1;
   3617         numSamples = 1;
   3618     }
   3619 
   3620     TConstUnionArray* values = new TConstUnionArray(numSamples*2);
   3621 
   3622     for (int pos=0; pos<count; ++pos) {
   3623         TConstUnion x, y;
   3624         x.setDConst(sampleLoc[pos].x);
   3625         y.setDConst(sampleLoc[pos].y);
   3626 
   3627         (*values)[pos*2+0] = x;
   3628         (*values)[pos*2+1] = y;
   3629     }
   3630 
   3631     TType retType(EbtFloat, EvqConst, 2);
   3632 
   3633     if (numSamples != 1) {
   3634         TArraySizes* arraySizes = new TArraySizes;
   3635         arraySizes->addInnerSize(numSamples);
   3636         retType.transferArraySizes(arraySizes);
   3637     }
   3638 
   3639     return new TIntermConstantUnion(*values, retType);
   3640 }
   3641 
   3642 //
   3643 // Decompose DX9 and DX10 sample intrinsics & object methods into AST
   3644 //
   3645 void HlslParseContext::decomposeSampleMethods(const TSourceLoc& loc, TIntermTyped*& node, TIntermNode* arguments)
   3646 {
   3647     if (node == nullptr || !node->getAsOperator())
   3648         return;
   3649 
   3650     // Sampler return must always be a vec4, but we can construct a shorter vector or a structure from it.
   3651     const auto convertReturn = [&loc, &node, this](TIntermTyped* result, const TSampler& sampler) -> TIntermTyped* {
   3652         result->setType(TType(node->getType().getBasicType(), EvqTemporary, node->getVectorSize()));
   3653 
   3654         TIntermTyped* convertedResult = nullptr;
   3655 
   3656         TType retType;
   3657         getTextureReturnType(sampler, retType);
   3658 
   3659         if (retType.isStruct()) {
   3660             // For type convenience, conversionAggregate points to the convertedResult (we know it's an aggregate here)
   3661             TIntermAggregate* conversionAggregate = new TIntermAggregate;
   3662             convertedResult = conversionAggregate;
   3663 
   3664             // Convert vector output to return structure.  We will need a temp symbol to copy the results to.
   3665             TVariable* structVar = makeInternalVariable("@sampleStructTemp", retType);
   3666 
   3667             // We also need a temp symbol to hold the result of the texture.  We don't want to re-fetch the
   3668             // sample each time we'll index into the result, so we'll copy to this, and index into the copy.
   3669             TVariable* sampleShadow = makeInternalVariable("@sampleResultShadow", result->getType());
   3670 
   3671             // Initial copy from texture to our sample result shadow.
   3672             TIntermTyped* shadowCopy = intermediate.addAssign(EOpAssign, intermediate.addSymbol(*sampleShadow, loc),
   3673                                                               result, loc);
   3674 
   3675             conversionAggregate->getSequence().push_back(shadowCopy);
   3676 
   3677             unsigned vec4Pos = 0;
   3678 
   3679             for (unsigned m = 0; m < unsigned(retType.getStruct()->size()); ++m) {
   3680                 const TType memberType(retType, m); // dereferenced type of the member we're about to assign.
   3681 
   3682                 // Check for bad struct members.  This should have been caught upstream.  Complain, because
   3683                 // wwe don't know what to do with it.  This algorithm could be generalized to handle
   3684                 // other things, e.g, sub-structures, but HLSL doesn't allow them.
   3685                 if (!memberType.isVector() && !memberType.isScalar()) {
   3686                     error(loc, "expected: scalar or vector type in texture structure", "", "");
   3687                     return nullptr;
   3688                 }
   3689 
   3690                 // Index into the struct variable to find the member to assign.
   3691                 TIntermTyped* structMember = intermediate.addIndex(EOpIndexDirectStruct,
   3692                                                                    intermediate.addSymbol(*structVar, loc),
   3693                                                                    intermediate.addConstantUnion(m, loc), loc);
   3694 
   3695                 structMember->setType(memberType);
   3696 
   3697                 // Assign each component of (possible) vector in struct member.
   3698                 for (int component = 0; component < memberType.getVectorSize(); ++component) {
   3699                     TIntermTyped* vec4Member = intermediate.addIndex(EOpIndexDirect,
   3700                                                                      intermediate.addSymbol(*sampleShadow, loc),
   3701                                                                      intermediate.addConstantUnion(vec4Pos++, loc), loc);
   3702                     vec4Member->setType(TType(memberType.getBasicType(), EvqTemporary, 1));
   3703 
   3704                     TIntermTyped* memberAssign = nullptr;
   3705 
   3706                     if (memberType.isVector()) {
   3707                         // Vector member: we need to create an access chain to the vector component.
   3708 
   3709                         TIntermTyped* structVecComponent = intermediate.addIndex(EOpIndexDirect, structMember,
   3710                                                                                  intermediate.addConstantUnion(component, loc), loc);
   3711 
   3712                         memberAssign = intermediate.addAssign(EOpAssign, structVecComponent, vec4Member, loc);
   3713                     } else {
   3714                         // Scalar member: we can assign to it directly.
   3715                         memberAssign = intermediate.addAssign(EOpAssign, structMember, vec4Member, loc);
   3716                     }
   3717 
   3718 
   3719                     conversionAggregate->getSequence().push_back(memberAssign);
   3720                 }
   3721             }
   3722 
   3723             // Add completed variable so the expression results in the whole struct value we just built.
   3724             conversionAggregate->getSequence().push_back(intermediate.addSymbol(*structVar, loc));
   3725 
   3726             // Make it a sequence.
   3727             intermediate.setAggregateOperator(conversionAggregate, EOpSequence, retType, loc);
   3728         } else {
   3729             // vector clamp the output if template vector type is smaller than sample result.
   3730             if (retType.getVectorSize() < node->getVectorSize()) {
   3731                 // Too many components.  Construct shorter vector from it.
   3732                 const TOperator op = intermediate.mapTypeToConstructorOp(retType);
   3733 
   3734                 convertedResult = constructBuiltIn(retType, op, result, loc, false);
   3735             } else {
   3736                 // Enough components.  Use directly.
   3737                 convertedResult = result;
   3738             }
   3739         }
   3740 
   3741         convertedResult->setLoc(loc);
   3742         return convertedResult;
   3743     };
   3744 
   3745     const TOperator op  = node->getAsOperator()->getOp();
   3746     const TIntermAggregate* argAggregate = arguments ? arguments->getAsAggregate() : nullptr;
   3747 
   3748     // Bail out if not a sampler method.
   3749     // Note though this is odd to do before checking the op, because the op
   3750     // could be something that takes the arguments, and the function in question
   3751     // takes the result of the op.  So, this is not the final word.
   3752     if (arguments != nullptr) {
   3753         if (argAggregate == nullptr) {
   3754             if (arguments->getAsTyped()->getBasicType() != EbtSampler)
   3755                 return;
   3756         } else {
   3757             if (argAggregate->getSequence().size() == 0 ||
   3758                 argAggregate->getSequence()[0]->getAsTyped()->getBasicType() != EbtSampler)
   3759                 return;
   3760         }
   3761     }
   3762 
   3763     switch (op) {
   3764     // **** DX9 intrinsics: ****
   3765     case EOpTexture:
   3766         {
   3767             // Texture with ddx & ddy is really gradient form in HLSL
   3768             if (argAggregate->getSequence().size() == 4)
   3769                 node->getAsAggregate()->setOperator(EOpTextureGrad);
   3770 
   3771             break;
   3772         }
   3773     case EOpTextureLod: //is almost EOpTextureBias (only args & operations are different)
   3774         {
   3775             TIntermTyped *argSamp = argAggregate->getSequence()[0]->getAsTyped();   // sampler
   3776             TIntermTyped *argCoord = argAggregate->getSequence()[1]->getAsTyped();  // coord
   3777 
   3778             assert(argCoord->getVectorSize() == 4);
   3779             TIntermTyped *w = intermediate.addConstantUnion(3, loc, true);
   3780             TIntermTyped *argLod = intermediate.addIndex(EOpIndexDirect, argCoord, w, loc);
   3781 
   3782             TOperator constructOp = EOpNull;
   3783             const TSampler &sampler = argSamp->getType().getSampler();
   3784             int coordSize = 0;
   3785 
   3786             switch (sampler.dim)
   3787             {
   3788             case Esd1D:   constructOp = EOpConstructFloat; coordSize = 1; break; // 1D
   3789             case Esd2D:   constructOp = EOpConstructVec2;  coordSize = 2; break; // 2D
   3790             case Esd3D:   constructOp = EOpConstructVec3;  coordSize = 3; break; // 3D
   3791             case EsdCube: constructOp = EOpConstructVec3;  coordSize = 3; break; // also 3D
   3792             default:
   3793                 break;
   3794             }
   3795 
   3796             TIntermAggregate *constructCoord = new TIntermAggregate(constructOp);
   3797             constructCoord->getSequence().push_back(argCoord);
   3798             constructCoord->setLoc(loc);
   3799             constructCoord->setType(TType(argCoord->getBasicType(), EvqTemporary, coordSize));
   3800 
   3801             TIntermAggregate *tex = new TIntermAggregate(EOpTextureLod);
   3802             tex->getSequence().push_back(argSamp);        // sampler
   3803             tex->getSequence().push_back(constructCoord); // coordinate
   3804             tex->getSequence().push_back(argLod);         // lod
   3805 
   3806             node = convertReturn(tex, sampler);
   3807 
   3808             break;
   3809         }
   3810 
   3811     case EOpTextureBias:
   3812         {
   3813             TIntermTyped* arg0 = argAggregate->getSequence()[0]->getAsTyped();  // sampler
   3814             TIntermTyped* arg1 = argAggregate->getSequence()[1]->getAsTyped();  // coord
   3815 
   3816             // HLSL puts bias in W component of coordinate.  We extract it and add it to
   3817             // the argument list, instead
   3818             TIntermTyped* w = intermediate.addConstantUnion(3, loc, true);
   3819             TIntermTyped* bias = intermediate.addIndex(EOpIndexDirect, arg1, w, loc);
   3820 
   3821             TOperator constructOp = EOpNull;
   3822             const TSampler& sampler = arg0->getType().getSampler();
   3823 
   3824             switch (sampler.dim) {
   3825             case Esd1D:   constructOp = EOpConstructFloat; break; // 1D
   3826             case Esd2D:   constructOp = EOpConstructVec2;  break; // 2D
   3827             case Esd3D:   constructOp = EOpConstructVec3;  break; // 3D
   3828             case EsdCube: constructOp = EOpConstructVec3;  break; // also 3D
   3829             default: break;
   3830             }
   3831 
   3832             TIntermAggregate* constructCoord = new TIntermAggregate(constructOp);
   3833             constructCoord->getSequence().push_back(arg1);
   3834             constructCoord->setLoc(loc);
   3835 
   3836             // The input vector should never be less than 2, since there's always a bias.
   3837             // The max is for safety, and should be a no-op.
   3838             constructCoord->setType(TType(arg1->getBasicType(), EvqTemporary, std::max(arg1->getVectorSize() - 1, 0)));
   3839 
   3840             TIntermAggregate* tex = new TIntermAggregate(EOpTexture);
   3841             tex->getSequence().push_back(arg0);           // sampler
   3842             tex->getSequence().push_back(constructCoord); // coordinate
   3843             tex->getSequence().push_back(bias);           // bias
   3844 
   3845             node = convertReturn(tex, sampler);
   3846 
   3847             break;
   3848         }
   3849 
   3850     // **** DX10 methods: ****
   3851     case EOpMethodSample:     // fall through
   3852     case EOpMethodSampleBias: // ...
   3853         {
   3854             TIntermTyped* argTex    = argAggregate->getSequence()[0]->getAsTyped();
   3855             TIntermTyped* argSamp   = argAggregate->getSequence()[1]->getAsTyped();
   3856             TIntermTyped* argCoord  = argAggregate->getSequence()[2]->getAsTyped();
   3857             TIntermTyped* argBias   = nullptr;
   3858             TIntermTyped* argOffset = nullptr;
   3859             const TSampler& sampler = argTex->getType().getSampler();
   3860 
   3861             int nextArg = 3;
   3862 
   3863             if (op == EOpMethodSampleBias)  // SampleBias has a bias arg
   3864                 argBias = argAggregate->getSequence()[nextArg++]->getAsTyped();
   3865 
   3866             TOperator textureOp = EOpTexture;
   3867 
   3868             if ((int)argAggregate->getSequence().size() == (nextArg+1)) { // last parameter is offset form
   3869                 textureOp = EOpTextureOffset;
   3870                 argOffset = argAggregate->getSequence()[nextArg++]->getAsTyped();
   3871             }
   3872 
   3873             TIntermAggregate* txcombine = handleSamplerTextureCombine(loc, argTex, argSamp);
   3874 
   3875             TIntermAggregate* txsample = new TIntermAggregate(textureOp);
   3876             txsample->getSequence().push_back(txcombine);
   3877             txsample->getSequence().push_back(argCoord);
   3878 
   3879             if (argBias != nullptr)
   3880                 txsample->getSequence().push_back(argBias);
   3881 
   3882             if (argOffset != nullptr)
   3883                 txsample->getSequence().push_back(argOffset);
   3884 
   3885             node = convertReturn(txsample, sampler);
   3886 
   3887             break;
   3888         }
   3889 
   3890     case EOpMethodSampleGrad: // ...
   3891         {
   3892             TIntermTyped* argTex    = argAggregate->getSequence()[0]->getAsTyped();
   3893             TIntermTyped* argSamp   = argAggregate->getSequence()[1]->getAsTyped();
   3894             TIntermTyped* argCoord  = argAggregate->getSequence()[2]->getAsTyped();
   3895             TIntermTyped* argDDX    = argAggregate->getSequence()[3]->getAsTyped();
   3896             TIntermTyped* argDDY    = argAggregate->getSequence()[4]->getAsTyped();
   3897             TIntermTyped* argOffset = nullptr;
   3898             const TSampler& sampler = argTex->getType().getSampler();
   3899 
   3900             TOperator textureOp = EOpTextureGrad;
   3901 
   3902             if (argAggregate->getSequence().size() == 6) { // last parameter is offset form
   3903                 textureOp = EOpTextureGradOffset;
   3904                 argOffset = argAggregate->getSequence()[5]->getAsTyped();
   3905             }
   3906 
   3907             TIntermAggregate* txcombine = handleSamplerTextureCombine(loc, argTex, argSamp);
   3908 
   3909             TIntermAggregate* txsample = new TIntermAggregate(textureOp);
   3910             txsample->getSequence().push_back(txcombine);
   3911             txsample->getSequence().push_back(argCoord);
   3912             txsample->getSequence().push_back(argDDX);
   3913             txsample->getSequence().push_back(argDDY);
   3914 
   3915             if (argOffset != nullptr)
   3916                 txsample->getSequence().push_back(argOffset);
   3917 
   3918             node = convertReturn(txsample, sampler);
   3919 
   3920             break;
   3921         }
   3922 
   3923     case EOpMethodGetDimensions:
   3924         {
   3925             // AST returns a vector of results, which we break apart component-wise into
   3926             // separate values to assign to the HLSL method's outputs, ala:
   3927             //  tx . GetDimensions(width, height);
   3928             //      float2 sizeQueryTemp = EOpTextureQuerySize
   3929             //      width = sizeQueryTemp.X;
   3930             //      height = sizeQueryTemp.Y;
   3931 
   3932             TIntermTyped* argTex = argAggregate->getSequence()[0]->getAsTyped();
   3933             const TType& texType = argTex->getType();
   3934 
   3935             assert(texType.getBasicType() == EbtSampler);
   3936 
   3937             const TSampler& sampler = texType.getSampler();
   3938             const TSamplerDim dim = sampler.dim;
   3939             const bool isImage = sampler.isImage();
   3940             const bool isMs = sampler.isMultiSample();
   3941             const int numArgs = (int)argAggregate->getSequence().size();
   3942 
   3943             int numDims = 0;
   3944 
   3945             switch (dim) {
   3946             case Esd1D:     numDims = 1; break; // W
   3947             case Esd2D:     numDims = 2; break; // W, H
   3948             case Esd3D:     numDims = 3; break; // W, H, D
   3949             case EsdCube:   numDims = 2; break; // W, H (cube)
   3950             case EsdBuffer: numDims = 1; break; // W (buffers)
   3951             case EsdRect:   numDims = 2; break; // W, H (rect)
   3952             default:
   3953                 assert(0 && "unhandled texture dimension");
   3954             }
   3955 
   3956             // Arrayed adds another dimension for the number of array elements
   3957             if (sampler.isArrayed())
   3958                 ++numDims;
   3959 
   3960             // Establish whether the method itself is querying mip levels.  This can be false even
   3961             // if the underlying query requires a MIP level, due to the available HLSL method overloads.
   3962             const bool mipQuery = (numArgs > (numDims + 1 + (isMs ? 1 : 0)));
   3963 
   3964             // Establish whether we must use the LOD form of query (even if the method did not supply a mip level to query).
   3965             // True if:
   3966             //   1. 1D/2D/3D/Cube AND multisample==0 AND NOT image (those can be sent to the non-LOD query)
   3967             // or,
   3968             //   2. There is a LOD (because the non-LOD query cannot be used in that case, per spec)
   3969             const bool mipRequired =
   3970                 ((dim == Esd1D || dim == Esd2D || dim == Esd3D || dim == EsdCube) && !isMs && !isImage) || // 1...
   3971                 mipQuery; // 2...
   3972 
   3973             // AST assumes integer return.  Will be converted to float if required.
   3974             TIntermAggregate* sizeQuery = new TIntermAggregate(isImage ? EOpImageQuerySize : EOpTextureQuerySize);
   3975             sizeQuery->getSequence().push_back(argTex);
   3976 
   3977             // If we're building an LOD query, add the LOD.
   3978             if (mipRequired) {
   3979                 // If the base HLSL query had no MIP level given, use level 0.
   3980                 TIntermTyped* queryLod = mipQuery ? argAggregate->getSequence()[1]->getAsTyped() :
   3981                     intermediate.addConstantUnion(0, loc, true);
   3982                 sizeQuery->getSequence().push_back(queryLod);
   3983             }
   3984 
   3985             sizeQuery->setType(TType(EbtUint, EvqTemporary, numDims));
   3986             sizeQuery->setLoc(loc);
   3987 
   3988             // Return value from size query
   3989             TVariable* tempArg = makeInternalVariable("sizeQueryTemp", sizeQuery->getType());
   3990             tempArg->getWritableType().getQualifier().makeTemporary();
   3991             TIntermTyped* sizeQueryAssign = intermediate.addAssign(EOpAssign,
   3992                                                                    intermediate.addSymbol(*tempArg, loc),
   3993                                                                    sizeQuery, loc);
   3994 
   3995             // Compound statement for assigning outputs
   3996             TIntermAggregate* compoundStatement = intermediate.makeAggregate(sizeQueryAssign, loc);
   3997             // Index of first output parameter
   3998             const int outParamBase = mipQuery ? 2 : 1;
   3999 
   4000             for (int compNum = 0; compNum < numDims; ++compNum) {
   4001                 TIntermTyped* indexedOut = nullptr;
   4002                 TIntermSymbol* sizeQueryReturn = intermediate.addSymbol(*tempArg, loc);
   4003 
   4004                 if (numDims > 1) {
   4005                     TIntermTyped* component = intermediate.addConstantUnion(compNum, loc, true);
   4006                     indexedOut = intermediate.addIndex(EOpIndexDirect, sizeQueryReturn, component, loc);
   4007                     indexedOut->setType(TType(EbtUint, EvqTemporary, 1));
   4008                     indexedOut->setLoc(loc);
   4009                 } else {
   4010                     indexedOut = sizeQueryReturn;
   4011                 }
   4012 
   4013                 TIntermTyped* outParam = argAggregate->getSequence()[outParamBase + compNum]->getAsTyped();
   4014                 TIntermTyped* compAssign = intermediate.addAssign(EOpAssign, outParam, indexedOut, loc);
   4015 
   4016                 compoundStatement = intermediate.growAggregate(compoundStatement, compAssign);
   4017             }
   4018 
   4019             // handle mip level parameter
   4020             if (mipQuery) {
   4021                 TIntermTyped* outParam = argAggregate->getSequence()[outParamBase + numDims]->getAsTyped();
   4022 
   4023                 TIntermAggregate* levelsQuery = new TIntermAggregate(EOpTextureQueryLevels);
   4024                 levelsQuery->getSequence().push_back(argTex);
   4025                 levelsQuery->setType(TType(EbtUint, EvqTemporary, 1));
   4026                 levelsQuery->setLoc(loc);
   4027 
   4028                 TIntermTyped* compAssign = intermediate.addAssign(EOpAssign, outParam, levelsQuery, loc);
   4029                 compoundStatement = intermediate.growAggregate(compoundStatement, compAssign);
   4030             }
   4031 
   4032             // 2DMS formats query # samples, which needs a different query op
   4033             if (sampler.isMultiSample()) {
   4034                 TIntermTyped* outParam = argAggregate->getSequence()[outParamBase + numDims]->getAsTyped();
   4035 
   4036                 TIntermAggregate* samplesQuery = new TIntermAggregate(EOpImageQuerySamples);
   4037                 samplesQuery->getSequence().push_back(argTex);
   4038                 samplesQuery->setType(TType(EbtUint, EvqTemporary, 1));
   4039                 samplesQuery->setLoc(loc);
   4040 
   4041                 TIntermTyped* compAssign = intermediate.addAssign(EOpAssign, outParam, samplesQuery, loc);
   4042                 compoundStatement = intermediate.growAggregate(compoundStatement, compAssign);
   4043             }
   4044 
   4045             compoundStatement->setOperator(EOpSequence);
   4046             compoundStatement->setLoc(loc);
   4047             compoundStatement->setType(TType(EbtVoid));
   4048 
   4049             node = compoundStatement;
   4050 
   4051             break;
   4052         }
   4053 
   4054     case EOpMethodSampleCmp:  // fall through...
   4055     case EOpMethodSampleCmpLevelZero:
   4056         {
   4057             TIntermTyped* argTex    = argAggregate->getSequence()[0]->getAsTyped();
   4058             TIntermTyped* argSamp   = argAggregate->getSequence()[1]->getAsTyped();
   4059             TIntermTyped* argCoord  = argAggregate->getSequence()[2]->getAsTyped();
   4060             TIntermTyped* argCmpVal = argAggregate->getSequence()[3]->getAsTyped();
   4061             TIntermTyped* argOffset = nullptr;
   4062 
   4063             // Sampler argument should be a sampler.
   4064             if (argSamp->getType().getBasicType() != EbtSampler) {
   4065                 error(loc, "expected: sampler type", "", "");
   4066                 return;
   4067             }
   4068 
   4069             // Sampler should be a SamplerComparisonState
   4070             if (! argSamp->getType().getSampler().isShadow()) {
   4071                 error(loc, "expected: SamplerComparisonState", "", "");
   4072                 return;
   4073             }
   4074 
   4075             // optional offset value
   4076             if (argAggregate->getSequence().size() > 4)
   4077                 argOffset = argAggregate->getSequence()[4]->getAsTyped();
   4078 
   4079             const int coordDimWithCmpVal = argCoord->getType().getVectorSize() + 1; // +1 for cmp
   4080 
   4081             // AST wants comparison value as one of the texture coordinates
   4082             TOperator constructOp = EOpNull;
   4083             switch (coordDimWithCmpVal) {
   4084             // 1D can't happen: there's always at least 1 coordinate dimension + 1 cmp val
   4085             case 2: constructOp = EOpConstructVec2;  break;
   4086             case 3: constructOp = EOpConstructVec3;  break;
   4087             case 4: constructOp = EOpConstructVec4;  break;
   4088             case 5: constructOp = EOpConstructVec4;  break; // cubeArrayShadow, cmp value is separate arg.
   4089             default: assert(0); break;
   4090             }
   4091 
   4092             TIntermAggregate* coordWithCmp = new TIntermAggregate(constructOp);
   4093             coordWithCmp->getSequence().push_back(argCoord);
   4094             if (coordDimWithCmpVal != 5) // cube array shadow is special.
   4095                 coordWithCmp->getSequence().push_back(argCmpVal);
   4096             coordWithCmp->setLoc(loc);
   4097             coordWithCmp->setType(TType(argCoord->getBasicType(), EvqTemporary, std::min(coordDimWithCmpVal, 4)));
   4098 
   4099             TOperator textureOp = (op == EOpMethodSampleCmpLevelZero ? EOpTextureLod : EOpTexture);
   4100             if (argOffset != nullptr)
   4101                 textureOp = (op == EOpMethodSampleCmpLevelZero ? EOpTextureLodOffset : EOpTextureOffset);
   4102 
   4103             // Create combined sampler & texture op
   4104             TIntermAggregate* txcombine = handleSamplerTextureCombine(loc, argTex, argSamp);
   4105             TIntermAggregate* txsample = new TIntermAggregate(textureOp);
   4106             txsample->getSequence().push_back(txcombine);
   4107             txsample->getSequence().push_back(coordWithCmp);
   4108 
   4109             if (coordDimWithCmpVal == 5) // cube array shadow is special: cmp val follows coord.
   4110                 txsample->getSequence().push_back(argCmpVal);
   4111 
   4112             // the LevelZero form uses 0 as an explicit LOD
   4113             if (op == EOpMethodSampleCmpLevelZero)
   4114                 txsample->getSequence().push_back(intermediate.addConstantUnion(0.0, EbtFloat, loc, true));
   4115 
   4116             // Add offset if present
   4117             if (argOffset != nullptr)
   4118                 txsample->getSequence().push_back(argOffset);
   4119 
   4120             txsample->setType(node->getType());
   4121             txsample->setLoc(loc);
   4122             node = txsample;
   4123 
   4124             break;
   4125         }
   4126 
   4127     case EOpMethodLoad:
   4128         {
   4129             TIntermTyped* argTex    = argAggregate->getSequence()[0]->getAsTyped();
   4130             TIntermTyped* argCoord  = argAggregate->getSequence()[1]->getAsTyped();
   4131             TIntermTyped* argOffset = nullptr;
   4132             TIntermTyped* lodComponent = nullptr;
   4133             TIntermTyped* coordSwizzle = nullptr;
   4134 
   4135             const TSampler& sampler = argTex->getType().getSampler();
   4136             const bool isMS = sampler.isMultiSample();
   4137             const bool isBuffer = sampler.dim == EsdBuffer;
   4138             const bool isImage = sampler.isImage();
   4139             const TBasicType coordBaseType = argCoord->getType().getBasicType();
   4140 
   4141             // Last component of coordinate is the mip level, for non-MS.  we separate them here:
   4142             if (isMS || isBuffer || isImage) {
   4143                 // MS, Buffer, and Image have no LOD
   4144                 coordSwizzle = argCoord;
   4145             } else {
   4146                 // Extract coordinate
   4147                 int swizzleSize = argCoord->getType().getVectorSize() - (isMS ? 0 : 1);
   4148                 TSwizzleSelectors<TVectorSelector> coordFields;
   4149                 for (int i = 0; i < swizzleSize; ++i)
   4150                     coordFields.push_back(i);
   4151                 TIntermTyped* coordIdx = intermediate.addSwizzle(coordFields, loc);
   4152                 coordSwizzle = intermediate.addIndex(EOpVectorSwizzle, argCoord, coordIdx, loc);
   4153                 coordSwizzle->setType(TType(coordBaseType, EvqTemporary, coordFields.size()));
   4154 
   4155                 // Extract LOD
   4156                 TIntermTyped* lodIdx = intermediate.addConstantUnion(coordFields.size(), loc, true);
   4157                 lodComponent = intermediate.addIndex(EOpIndexDirect, argCoord, lodIdx, loc);
   4158                 lodComponent->setType(TType(coordBaseType, EvqTemporary, 1));
   4159             }
   4160 
   4161             const int numArgs    = (int)argAggregate->getSequence().size();
   4162             const bool hasOffset = ((!isMS && numArgs == 3) || (isMS && numArgs == 4));
   4163 
   4164             // Create texel fetch
   4165             const TOperator fetchOp = (isImage   ? EOpImageLoad :
   4166                                        hasOffset ? EOpTextureFetchOffset :
   4167                                        EOpTextureFetch);
   4168             TIntermAggregate* txfetch = new TIntermAggregate(fetchOp);
   4169 
   4170             // Build up the fetch
   4171             txfetch->getSequence().push_back(argTex);
   4172             txfetch->getSequence().push_back(coordSwizzle);
   4173 
   4174             if (isMS) {
   4175                 // add 2DMS sample index
   4176                 TIntermTyped* argSampleIdx  = argAggregate->getSequence()[2]->getAsTyped();
   4177                 txfetch->getSequence().push_back(argSampleIdx);
   4178             } else if (isBuffer) {
   4179                 // Nothing else to do for buffers.
   4180             } else if (isImage) {
   4181                 // Nothing else to do for images.
   4182             } else {
   4183                 // 2DMS and buffer have no LOD, but everything else does.
   4184                 txfetch->getSequence().push_back(lodComponent);
   4185             }
   4186 
   4187             // Obtain offset arg, if there is one.
   4188             if (hasOffset) {
   4189                 const int offsetPos  = (isMS ? 3 : 2);
   4190                 argOffset = argAggregate->getSequence()[offsetPos]->getAsTyped();
   4191                 txfetch->getSequence().push_back(argOffset);
   4192             }
   4193 
   4194             node = convertReturn(txfetch, sampler);
   4195 
   4196             break;
   4197         }
   4198 
   4199     case EOpMethodSampleLevel:
   4200         {
   4201             TIntermTyped* argTex    = argAggregate->getSequence()[0]->getAsTyped();
   4202             TIntermTyped* argSamp   = argAggregate->getSequence()[1]->getAsTyped();
   4203             TIntermTyped* argCoord  = argAggregate->getSequence()[2]->getAsTyped();
   4204             TIntermTyped* argLod    = argAggregate->getSequence()[3]->getAsTyped();
   4205             TIntermTyped* argOffset = nullptr;
   4206             const TSampler& sampler = argTex->getType().getSampler();
   4207 
   4208             const int  numArgs = (int)argAggregate->getSequence().size();
   4209 
   4210             if (numArgs == 5) // offset, if present
   4211                 argOffset = argAggregate->getSequence()[4]->getAsTyped();
   4212 
   4213             const TOperator textureOp = (argOffset == nullptr ? EOpTextureLod : EOpTextureLodOffset);
   4214             TIntermAggregate* txsample = new TIntermAggregate(textureOp);
   4215 
   4216             TIntermAggregate* txcombine = handleSamplerTextureCombine(loc, argTex, argSamp);
   4217 
   4218             txsample->getSequence().push_back(txcombine);
   4219             txsample->getSequence().push_back(argCoord);
   4220             txsample->getSequence().push_back(argLod);
   4221 
   4222             if (argOffset != nullptr)
   4223                 txsample->getSequence().push_back(argOffset);
   4224 
   4225             node = convertReturn(txsample, sampler);
   4226 
   4227             break;
   4228         }
   4229 
   4230     case EOpMethodGather:
   4231         {
   4232             TIntermTyped* argTex    = argAggregate->getSequence()[0]->getAsTyped();
   4233             TIntermTyped* argSamp   = argAggregate->getSequence()[1]->getAsTyped();
   4234             TIntermTyped* argCoord  = argAggregate->getSequence()[2]->getAsTyped();
   4235             TIntermTyped* argOffset = nullptr;
   4236 
   4237             // Offset is optional
   4238             if (argAggregate->getSequence().size() > 3)
   4239                 argOffset = argAggregate->getSequence()[3]->getAsTyped();
   4240 
   4241             const TOperator textureOp = (argOffset == nullptr ? EOpTextureGather : EOpTextureGatherOffset);
   4242             TIntermAggregate* txgather = new TIntermAggregate(textureOp);
   4243 
   4244             TIntermAggregate* txcombine = handleSamplerTextureCombine(loc, argTex, argSamp);
   4245 
   4246             txgather->getSequence().push_back(txcombine);
   4247             txgather->getSequence().push_back(argCoord);
   4248             // Offset if not given is implicitly channel 0 (red)
   4249 
   4250             if (argOffset != nullptr)
   4251                 txgather->getSequence().push_back(argOffset);
   4252 
   4253             txgather->setType(node->getType());
   4254             txgather->setLoc(loc);
   4255             node = txgather;
   4256 
   4257             break;
   4258         }
   4259 
   4260     case EOpMethodGatherRed:      // fall through...
   4261     case EOpMethodGatherGreen:    // ...
   4262     case EOpMethodGatherBlue:     // ...
   4263     case EOpMethodGatherAlpha:    // ...
   4264     case EOpMethodGatherCmpRed:   // ...
   4265     case EOpMethodGatherCmpGreen: // ...
   4266     case EOpMethodGatherCmpBlue:  // ...
   4267     case EOpMethodGatherCmpAlpha: // ...
   4268         {
   4269             int channel = 0;    // the channel we are gathering
   4270             int cmpValues = 0;  // 1 if there is a compare value (handier than a bool below)
   4271 
   4272             switch (op) {
   4273             case EOpMethodGatherCmpRed:   cmpValues = 1;  // fall through
   4274             case EOpMethodGatherRed:      channel = 0; break;
   4275             case EOpMethodGatherCmpGreen: cmpValues = 1;  // fall through
   4276             case EOpMethodGatherGreen:    channel = 1; break;
   4277             case EOpMethodGatherCmpBlue:  cmpValues = 1;  // fall through
   4278             case EOpMethodGatherBlue:     channel = 2; break;
   4279             case EOpMethodGatherCmpAlpha: cmpValues = 1;  // fall through
   4280             case EOpMethodGatherAlpha:    channel = 3; break;
   4281             default:                      assert(0);   break;
   4282             }
   4283 
   4284             // For now, we have nothing to map the component-wise comparison forms
   4285             // to, because neither GLSL nor SPIR-V has such an opcode.  Issue an
   4286             // unimplemented error instead.  Most of the machinery is here if that
   4287             // should ever become available.  However, red can be passed through
   4288             // to OpImageDrefGather.  G/B/A cannot, because that opcode does not
   4289             // accept a component.
   4290             if (cmpValues != 0 && op != EOpMethodGatherCmpRed) {
   4291                 error(loc, "unimplemented: component-level gather compare", "", "");
   4292                 return;
   4293             }
   4294 
   4295             int arg = 0;
   4296 
   4297             TIntermTyped* argTex        = argAggregate->getSequence()[arg++]->getAsTyped();
   4298             TIntermTyped* argSamp       = argAggregate->getSequence()[arg++]->getAsTyped();
   4299             TIntermTyped* argCoord      = argAggregate->getSequence()[arg++]->getAsTyped();
   4300             TIntermTyped* argOffset     = nullptr;
   4301             TIntermTyped* argOffsets[4] = { nullptr, nullptr, nullptr, nullptr };
   4302             // TIntermTyped* argStatus     = nullptr; // TODO: residency
   4303             TIntermTyped* argCmp        = nullptr;
   4304 
   4305             const TSamplerDim dim = argTex->getType().getSampler().dim;
   4306 
   4307             const int  argSize = (int)argAggregate->getSequence().size();
   4308             bool hasStatus     = (argSize == (5+cmpValues) || argSize == (8+cmpValues));
   4309             bool hasOffset1    = false;
   4310             bool hasOffset4    = false;
   4311 
   4312             // Sampler argument should be a sampler.
   4313             if (argSamp->getType().getBasicType() != EbtSampler) {
   4314                 error(loc, "expected: sampler type", "", "");
   4315                 return;
   4316             }
   4317 
   4318             // Cmp forms require SamplerComparisonState
   4319             if (cmpValues > 0 && ! argSamp->getType().getSampler().isShadow()) {
   4320                 error(loc, "expected: SamplerComparisonState", "", "");
   4321                 return;
   4322             }
   4323 
   4324             // Only 2D forms can have offsets.  Discover if we have 0, 1 or 4 offsets.
   4325             if (dim == Esd2D) {
   4326                 hasOffset1 = (argSize == (4+cmpValues) || argSize == (5+cmpValues));
   4327                 hasOffset4 = (argSize == (7+cmpValues) || argSize == (8+cmpValues));
   4328             }
   4329 
   4330             assert(!(hasOffset1 && hasOffset4));
   4331 
   4332             TOperator textureOp = EOpTextureGather;
   4333 
   4334             // Compare forms have compare value
   4335             if (cmpValues != 0)
   4336                 argCmp = argOffset = argAggregate->getSequence()[arg++]->getAsTyped();
   4337 
   4338             // Some forms have single offset
   4339             if (hasOffset1) {
   4340                 textureOp = EOpTextureGatherOffset;   // single offset form
   4341                 argOffset = argAggregate->getSequence()[arg++]->getAsTyped();
   4342             }
   4343 
   4344             // Some forms have 4 gather offsets
   4345             if (hasOffset4) {
   4346                 textureOp = EOpTextureGatherOffsets;  // note plural, for 4 offset form
   4347                 for (int offsetNum = 0; offsetNum < 4; ++offsetNum)
   4348                     argOffsets[offsetNum] = argAggregate->getSequence()[arg++]->getAsTyped();
   4349             }
   4350 
   4351             // Residency status
   4352             if (hasStatus) {
   4353                 // argStatus = argAggregate->getSequence()[arg++]->getAsTyped();
   4354                 error(loc, "unimplemented: residency status", "", "");
   4355                 return;
   4356             }
   4357 
   4358             TIntermAggregate* txgather = new TIntermAggregate(textureOp);
   4359             TIntermAggregate* txcombine = handleSamplerTextureCombine(loc, argTex, argSamp);
   4360 
   4361             TIntermTyped* argChannel = intermediate.addConstantUnion(channel, loc, true);
   4362 
   4363             txgather->getSequence().push_back(txcombine);
   4364             txgather->getSequence().push_back(argCoord);
   4365 
   4366             // AST wants an array of 4 offsets, where HLSL has separate args.  Here
   4367             // we construct an array from the separate args.
   4368             if (hasOffset4) {
   4369                 TType arrayType(EbtInt, EvqTemporary, 2);
   4370                 TArraySizes* arraySizes = new TArraySizes;
   4371                 arraySizes->addInnerSize(4);
   4372                 arrayType.transferArraySizes(arraySizes);
   4373 
   4374                 TIntermAggregate* initList = new TIntermAggregate(EOpNull);
   4375 
   4376                 for (int offsetNum = 0; offsetNum < 4; ++offsetNum)
   4377                     initList->getSequence().push_back(argOffsets[offsetNum]);
   4378 
   4379                 argOffset = addConstructor(loc, initList, arrayType);
   4380             }
   4381 
   4382             // Add comparison value if we have one
   4383             if (argCmp != nullptr)
   4384                 txgather->getSequence().push_back(argCmp);
   4385 
   4386             // Add offset (either 1, or an array of 4) if we have one
   4387             if (argOffset != nullptr)
   4388                 txgather->getSequence().push_back(argOffset);
   4389 
   4390             // Add channel value if the sampler is not shadow
   4391             if (! argSamp->getType().getSampler().isShadow())
   4392                 txgather->getSequence().push_back(argChannel);
   4393 
   4394             txgather->setType(node->getType());
   4395             txgather->setLoc(loc);
   4396             node = txgather;
   4397 
   4398             break;
   4399         }
   4400 
   4401     case EOpMethodCalculateLevelOfDetail:
   4402     case EOpMethodCalculateLevelOfDetailUnclamped:
   4403         {
   4404             TIntermTyped* argTex    = argAggregate->getSequence()[0]->getAsTyped();
   4405             TIntermTyped* argSamp   = argAggregate->getSequence()[1]->getAsTyped();
   4406             TIntermTyped* argCoord  = argAggregate->getSequence()[2]->getAsTyped();
   4407 
   4408             TIntermAggregate* txquerylod = new TIntermAggregate(EOpTextureQueryLod);
   4409 
   4410             TIntermAggregate* txcombine = handleSamplerTextureCombine(loc, argTex, argSamp);
   4411             txquerylod->getSequence().push_back(txcombine);
   4412             txquerylod->getSequence().push_back(argCoord);
   4413 
   4414             TIntermTyped* lodComponent = intermediate.addConstantUnion(
   4415                 op == EOpMethodCalculateLevelOfDetail ? 0 : 1,
   4416                 loc, true);
   4417             TIntermTyped* lodComponentIdx = intermediate.addIndex(EOpIndexDirect, txquerylod, lodComponent, loc);
   4418             lodComponentIdx->setType(TType(EbtFloat, EvqTemporary, 1));
   4419             node = lodComponentIdx;
   4420 
   4421             break;
   4422         }
   4423 
   4424     case EOpMethodGetSamplePosition:
   4425         {
   4426             // TODO: this entire decomposition exists because there is not yet a way to query
   4427             // the sample position directly through SPIR-V.  Instead, we return fixed sample
   4428             // positions for common cases.  *** If the sample positions are set differently,
   4429             // this will be wrong. ***
   4430 
   4431             TIntermTyped* argTex     = argAggregate->getSequence()[0]->getAsTyped();
   4432             TIntermTyped* argSampIdx = argAggregate->getSequence()[1]->getAsTyped();
   4433 
   4434             TIntermAggregate* samplesQuery = new TIntermAggregate(EOpImageQuerySamples);
   4435             samplesQuery->getSequence().push_back(argTex);
   4436             samplesQuery->setType(TType(EbtUint, EvqTemporary, 1));
   4437             samplesQuery->setLoc(loc);
   4438 
   4439             TIntermAggregate* compoundStatement = nullptr;
   4440 
   4441             TVariable* outSampleCount = makeInternalVariable("@sampleCount", TType(EbtUint));
   4442             outSampleCount->getWritableType().getQualifier().makeTemporary();
   4443             TIntermTyped* compAssign = intermediate.addAssign(EOpAssign, intermediate.addSymbol(*outSampleCount, loc),
   4444                                                               samplesQuery, loc);
   4445             compoundStatement = intermediate.growAggregate(compoundStatement, compAssign);
   4446 
   4447             TIntermTyped* idxtest[4];
   4448 
   4449             // Create tests against 2, 4, 8, and 16 sample values
   4450             int count = 0;
   4451             for (int val = 2; val <= 16; val *= 2)
   4452                 idxtest[count++] =
   4453                     intermediate.addBinaryNode(EOpEqual,
   4454                                                intermediate.addSymbol(*outSampleCount, loc),
   4455                                                intermediate.addConstantUnion(val, loc),
   4456                                                loc, TType(EbtBool));
   4457 
   4458             const TOperator idxOp = (argSampIdx->getQualifier().storage == EvqConst) ? EOpIndexDirect : EOpIndexIndirect;
   4459 
   4460             // Create index ops into position arrays given sample index.
   4461             // TODO: should it be clamped?
   4462             TIntermTyped* index[4];
   4463             count = 0;
   4464             for (int val = 2; val <= 16; val *= 2) {
   4465                 index[count] = intermediate.addIndex(idxOp, getSamplePosArray(val), argSampIdx, loc);
   4466                 index[count++]->setType(TType(EbtFloat, EvqTemporary, 2));
   4467             }
   4468 
   4469             // Create expression as:
   4470             // (sampleCount == 2)  ? pos2[idx] :
   4471             // (sampleCount == 4)  ? pos4[idx] :
   4472             // (sampleCount == 8)  ? pos8[idx] :
   4473             // (sampleCount == 16) ? pos16[idx] : float2(0,0);
   4474             TIntermTyped* test =
   4475                 intermediate.addSelection(idxtest[0], index[0],
   4476                     intermediate.addSelection(idxtest[1], index[1],
   4477                         intermediate.addSelection(idxtest[2], index[2],
   4478                             intermediate.addSelection(idxtest[3], index[3],
   4479                                                       getSamplePosArray(1), loc), loc), loc), loc);
   4480 
   4481             compoundStatement = intermediate.growAggregate(compoundStatement, test);
   4482             compoundStatement->setOperator(EOpSequence);
   4483             compoundStatement->setLoc(loc);
   4484             compoundStatement->setType(TType(EbtFloat, EvqTemporary, 2));
   4485 
   4486             node = compoundStatement;
   4487 
   4488             break;
   4489         }
   4490 
   4491     case EOpSubpassLoad:
   4492         {
   4493             const TIntermTyped* argSubpass =
   4494                 argAggregate ? argAggregate->getSequence()[0]->getAsTyped() :
   4495                 arguments->getAsTyped();
   4496 
   4497             const TSampler& sampler = argSubpass->getType().getSampler();
   4498 
   4499             // subpass load: the multisample form is overloaded.  Here, we convert that to
   4500             // the EOpSubpassLoadMS opcode.
   4501             if (argAggregate != nullptr && argAggregate->getSequence().size() > 1)
   4502                 node->getAsOperator()->setOp(EOpSubpassLoadMS);
   4503 
   4504             node = convertReturn(node, sampler);
   4505 
   4506             break;
   4507         }
   4508 
   4509 
   4510     default:
   4511         break; // most pass through unchanged
   4512     }
   4513 }
   4514 
   4515 //
   4516 // Decompose geometry shader methods
   4517 //
   4518 void HlslParseContext::decomposeGeometryMethods(const TSourceLoc& loc, TIntermTyped*& node, TIntermNode* arguments)
   4519 {
   4520     if (node == nullptr || !node->getAsOperator())
   4521         return;
   4522 
   4523     const TOperator op  = node->getAsOperator()->getOp();
   4524     const TIntermAggregate* argAggregate = arguments ? arguments->getAsAggregate() : nullptr;
   4525 
   4526     switch (op) {
   4527     case EOpMethodAppend:
   4528         if (argAggregate) {
   4529             // Don't emit these for non-GS stage, since we won't have the gsStreamOutput symbol.
   4530             if (language != EShLangGeometry) {
   4531                 node = nullptr;
   4532                 return;
   4533             }
   4534 
   4535             TIntermAggregate* sequence = nullptr;
   4536             TIntermAggregate* emit = new TIntermAggregate(EOpEmitVertex);
   4537 
   4538             emit->setLoc(loc);
   4539             emit->setType(TType(EbtVoid));
   4540 
   4541             TIntermTyped* data = argAggregate->getSequence()[1]->getAsTyped();
   4542 
   4543             // This will be patched in finalization during finalizeAppendMethods()
   4544             sequence = intermediate.growAggregate(sequence, data, loc);
   4545             sequence = intermediate.growAggregate(sequence, emit);
   4546 
   4547             sequence->setOperator(EOpSequence);
   4548             sequence->setLoc(loc);
   4549             sequence->setType(TType(EbtVoid));
   4550 
   4551             gsAppends.push_back({sequence, loc});
   4552 
   4553             node = sequence;
   4554         }
   4555         break;
   4556 
   4557     case EOpMethodRestartStrip:
   4558         {
   4559             // Don't emit these for non-GS stage, since we won't have the gsStreamOutput symbol.
   4560             if (language != EShLangGeometry) {
   4561                 node = nullptr;
   4562                 return;
   4563             }
   4564 
   4565             TIntermAggregate* cut = new TIntermAggregate(EOpEndPrimitive);
   4566             cut->setLoc(loc);
   4567             cut->setType(TType(EbtVoid));
   4568             node = cut;
   4569         }
   4570         break;
   4571 
   4572     default:
   4573         break; // most pass through unchanged
   4574     }
   4575 }
   4576 
   4577 //
   4578 // Optionally decompose intrinsics to AST opcodes.
   4579 //
   4580 void HlslParseContext::decomposeIntrinsic(const TSourceLoc& loc, TIntermTyped*& node, TIntermNode* arguments)
   4581 {
   4582     // Helper to find image data for image atomics:
   4583     // OpImageLoad(image[idx])
   4584     // We take the image load apart and add its params to the atomic op aggregate node
   4585     const auto imageAtomicParams = [this, &loc, &node](TIntermAggregate* atomic, TIntermTyped* load) {
   4586         TIntermAggregate* loadOp = load->getAsAggregate();
   4587         if (loadOp == nullptr) {
   4588             error(loc, "unknown image type in atomic operation", "", "");
   4589             node = nullptr;
   4590             return;
   4591         }
   4592 
   4593         atomic->getSequence().push_back(loadOp->getSequence()[0]);
   4594         atomic->getSequence().push_back(loadOp->getSequence()[1]);
   4595     };
   4596 
   4597     // Return true if this is an imageLoad, which we will change to an image atomic.
   4598     const auto isImageParam = [](TIntermTyped* image) -> bool {
   4599         TIntermAggregate* imageAggregate = image->getAsAggregate();
   4600         return imageAggregate != nullptr && imageAggregate->getOp() == EOpImageLoad;
   4601     };
   4602 
   4603     const auto lookupBuiltinVariable = [&](const char* name, TBuiltInVariable builtin, TType& type) -> TIntermTyped* {
   4604         TSymbol* symbol = symbolTable.find(name);
   4605         if (nullptr == symbol) {
   4606             type.getQualifier().builtIn = builtin;
   4607 
   4608             TVariable* variable = new TVariable(new TString(name), type);
   4609 
   4610             symbolTable.insert(*variable);
   4611 
   4612             symbol = symbolTable.find(name);
   4613             assert(symbol && "Inserted symbol could not be found!");
   4614         }
   4615 
   4616         return intermediate.addSymbol(*(symbol->getAsVariable()), loc);
   4617     };
   4618 
   4619     // HLSL intrinsics can be pass through to native AST opcodes, or decomposed here to existing AST
   4620     // opcodes for compatibility with existing software stacks.
   4621     static const bool decomposeHlslIntrinsics = true;
   4622 
   4623     if (!decomposeHlslIntrinsics || !node || !node->getAsOperator())
   4624         return;
   4625 
   4626     const TIntermAggregate* argAggregate = arguments ? arguments->getAsAggregate() : nullptr;
   4627     TIntermUnary* fnUnary = node->getAsUnaryNode();
   4628     const TOperator op  = node->getAsOperator()->getOp();
   4629 
   4630     switch (op) {
   4631     case EOpGenMul:
   4632         {
   4633             // mul(a,b) -> MatrixTimesMatrix, MatrixTimesVector, MatrixTimesScalar, VectorTimesScalar, Dot, Mul
   4634             // Since we are treating HLSL rows like GLSL columns (the first matrix indirection),
   4635             // we must reverse the operand order here.  Hence, arg0 gets sequence[1], etc.
   4636             TIntermTyped* arg0 = argAggregate->getSequence()[1]->getAsTyped();
   4637             TIntermTyped* arg1 = argAggregate->getSequence()[0]->getAsTyped();
   4638 
   4639             if (arg0->isVector() && arg1->isVector()) {  // vec * vec
   4640                 node->getAsAggregate()->setOperator(EOpDot);
   4641             } else {
   4642                 node = handleBinaryMath(loc, "mul", EOpMul, arg0, arg1);
   4643             }
   4644 
   4645             break;
   4646         }
   4647 
   4648     case EOpRcp:
   4649         {
   4650             // rcp(a) -> 1 / a
   4651             TIntermTyped* arg0 = fnUnary->getOperand();
   4652             TBasicType   type0 = arg0->getBasicType();
   4653             TIntermTyped* one  = intermediate.addConstantUnion(1, type0, loc, true);
   4654             node  = handleBinaryMath(loc, "rcp", EOpDiv, one, arg0);
   4655 
   4656             break;
   4657         }
   4658 
   4659     case EOpAny: // fall through
   4660     case EOpAll:
   4661         {
   4662             TIntermTyped* typedArg = arguments->getAsTyped();
   4663 
   4664             // HLSL allows float/etc types here, and the SPIR-V opcode requires a bool.
   4665             // We'll convert here.  Note that for efficiency, we could add a smarter
   4666             // decomposition for some type cases, e.g, maybe by decomposing a dot product.
   4667             if (typedArg->getType().getBasicType() != EbtBool) {
   4668                 const TType boolType(EbtBool, EvqTemporary,
   4669                                      typedArg->getVectorSize(),
   4670                                      typedArg->getMatrixCols(),
   4671                                      typedArg->getMatrixRows(),
   4672                                      typedArg->isVector());
   4673 
   4674                 typedArg = intermediate.addConversion(EOpConstructBool, boolType, typedArg);
   4675                 node->getAsUnaryNode()->setOperand(typedArg);
   4676             }
   4677 
   4678             break;
   4679         }
   4680 
   4681     case EOpSaturate:
   4682         {
   4683             // saturate(a) -> clamp(a,0,1)
   4684             TIntermTyped* arg0 = fnUnary->getOperand();
   4685             TBasicType   type0 = arg0->getBasicType();
   4686             TIntermAggregate* clamp = new TIntermAggregate(EOpClamp);
   4687 
   4688             clamp->getSequence().push_back(arg0);
   4689             clamp->getSequence().push_back(intermediate.addConstantUnion(0, type0, loc, true));
   4690             clamp->getSequence().push_back(intermediate.addConstantUnion(1, type0, loc, true));
   4691             clamp->setLoc(loc);
   4692             clamp->setType(node->getType());
   4693             clamp->getWritableType().getQualifier().makeTemporary();
   4694             node = clamp;
   4695 
   4696             break;
   4697         }
   4698 
   4699     case EOpSinCos:
   4700         {
   4701             // sincos(a,b,c) -> b = sin(a), c = cos(a)
   4702             TIntermTyped* arg0 = argAggregate->getSequence()[0]->getAsTyped();
   4703             TIntermTyped* arg1 = argAggregate->getSequence()[1]->getAsTyped();
   4704             TIntermTyped* arg2 = argAggregate->getSequence()[2]->getAsTyped();
   4705 
   4706             TIntermTyped* sinStatement = handleUnaryMath(loc, "sin", EOpSin, arg0);
   4707             TIntermTyped* cosStatement = handleUnaryMath(loc, "cos", EOpCos, arg0);
   4708             TIntermTyped* sinAssign    = intermediate.addAssign(EOpAssign, arg1, sinStatement, loc);
   4709             TIntermTyped* cosAssign    = intermediate.addAssign(EOpAssign, arg2, cosStatement, loc);
   4710 
   4711             TIntermAggregate* compoundStatement = intermediate.makeAggregate(sinAssign, loc);
   4712             compoundStatement = intermediate.growAggregate(compoundStatement, cosAssign);
   4713             compoundStatement->setOperator(EOpSequence);
   4714             compoundStatement->setLoc(loc);
   4715             compoundStatement->setType(TType(EbtVoid));
   4716 
   4717             node = compoundStatement;
   4718 
   4719             break;
   4720         }
   4721 
   4722     case EOpClip:
   4723         {
   4724             // clip(a) -> if (any(a<0)) discard;
   4725             TIntermTyped*  arg0 = fnUnary->getOperand();
   4726             TBasicType     type0 = arg0->getBasicType();
   4727             TIntermTyped*  compareNode = nullptr;
   4728 
   4729             // For non-scalars: per experiment with FXC compiler, discard if any component < 0.
   4730             if (!arg0->isScalar()) {
   4731                 // component-wise compare: a < 0
   4732                 TIntermAggregate* less = new TIntermAggregate(EOpLessThan);
   4733                 less->getSequence().push_back(arg0);
   4734                 less->setLoc(loc);
   4735 
   4736                 // make vec or mat of bool matching dimensions of input
   4737                 less->setType(TType(EbtBool, EvqTemporary,
   4738                                     arg0->getType().getVectorSize(),
   4739                                     arg0->getType().getMatrixCols(),
   4740                                     arg0->getType().getMatrixRows(),
   4741                                     arg0->getType().isVector()));
   4742 
   4743                 // calculate # of components for comparison const
   4744                 const int constComponentCount =
   4745                     std::max(arg0->getType().getVectorSize(), 1) *
   4746                     std::max(arg0->getType().getMatrixCols(), 1) *
   4747                     std::max(arg0->getType().getMatrixRows(), 1);
   4748 
   4749                 TConstUnion zero;
   4750                 if (arg0->getType().isIntegerDomain())
   4751                     zero.setDConst(0);
   4752                 else
   4753                     zero.setDConst(0.0);
   4754                 TConstUnionArray zeros(constComponentCount, zero);
   4755 
   4756                 less->getSequence().push_back(intermediate.addConstantUnion(zeros, arg0->getType(), loc, true));
   4757 
   4758                 compareNode = intermediate.addBuiltInFunctionCall(loc, EOpAny, true, less, TType(EbtBool));
   4759             } else {
   4760                 TIntermTyped* zero;
   4761                 if (arg0->getType().isIntegerDomain())
   4762                     zero = intermediate.addConstantUnion(0, loc, true);
   4763                 else
   4764                     zero = intermediate.addConstantUnion(0.0, type0, loc, true);
   4765                 compareNode = handleBinaryMath(loc, "clip", EOpLessThan, arg0, zero);
   4766             }
   4767 
   4768             TIntermBranch* killNode = intermediate.addBranch(EOpKill, loc);
   4769 
   4770             node = new TIntermSelection(compareNode, killNode, nullptr);
   4771             node->setLoc(loc);
   4772 
   4773             break;
   4774         }
   4775 
   4776     case EOpLog10:
   4777         {
   4778             // log10(a) -> log2(a) * 0.301029995663981  (== 1/log2(10))
   4779             TIntermTyped* arg0 = fnUnary->getOperand();
   4780             TIntermTyped* log2 = handleUnaryMath(loc, "log2", EOpLog2, arg0);
   4781             TIntermTyped* base = intermediate.addConstantUnion(0.301029995663981f, EbtFloat, loc, true);
   4782 
   4783             node  = handleBinaryMath(loc, "mul", EOpMul, log2, base);
   4784 
   4785             break;
   4786         }
   4787 
   4788     case EOpDst:
   4789         {
   4790             // dest.x = 1;
   4791             // dest.y = src0.y * src1.y;
   4792             // dest.z = src0.z;
   4793             // dest.w = src1.w;
   4794 
   4795             TIntermTyped* arg0 = argAggregate->getSequence()[0]->getAsTyped();
   4796             TIntermTyped* arg1 = argAggregate->getSequence()[1]->getAsTyped();
   4797 
   4798             TIntermTyped* y = intermediate.addConstantUnion(1, loc, true);
   4799             TIntermTyped* z = intermediate.addConstantUnion(2, loc, true);
   4800             TIntermTyped* w = intermediate.addConstantUnion(3, loc, true);
   4801 
   4802             TIntermTyped* src0y = intermediate.addIndex(EOpIndexDirect, arg0, y, loc);
   4803             TIntermTyped* src1y = intermediate.addIndex(EOpIndexDirect, arg1, y, loc);
   4804             TIntermTyped* src0z = intermediate.addIndex(EOpIndexDirect, arg0, z, loc);
   4805             TIntermTyped* src1w = intermediate.addIndex(EOpIndexDirect, arg1, w, loc);
   4806 
   4807             TIntermAggregate* dst = new TIntermAggregate(EOpConstructVec4);
   4808 
   4809             dst->getSequence().push_back(intermediate.addConstantUnion(1.0, EbtFloat, loc, true));
   4810             dst->getSequence().push_back(handleBinaryMath(loc, "mul", EOpMul, src0y, src1y));
   4811             dst->getSequence().push_back(src0z);
   4812             dst->getSequence().push_back(src1w);
   4813             dst->setType(TType(EbtFloat, EvqTemporary, 4));
   4814             dst->setLoc(loc);
   4815             node = dst;
   4816 
   4817             break;
   4818         }
   4819 
   4820     case EOpInterlockedAdd: // optional last argument (if present) is assigned from return value
   4821     case EOpInterlockedMin: // ...
   4822     case EOpInterlockedMax: // ...
   4823     case EOpInterlockedAnd: // ...
   4824     case EOpInterlockedOr:  // ...
   4825     case EOpInterlockedXor: // ...
   4826     case EOpInterlockedExchange: // always has output arg
   4827         {
   4828             TIntermTyped* arg0 = argAggregate->getSequence()[0]->getAsTyped();  // dest
   4829             TIntermTyped* arg1 = argAggregate->getSequence()[1]->getAsTyped();  // value
   4830             TIntermTyped* arg2 = nullptr;
   4831 
   4832             if (argAggregate->getSequence().size() > 2)
   4833                 arg2 = argAggregate->getSequence()[2]->getAsTyped();
   4834 
   4835             const bool isImage = isImageParam(arg0);
   4836             const TOperator atomicOp = mapAtomicOp(loc, op, isImage);
   4837             TIntermAggregate* atomic = new TIntermAggregate(atomicOp);
   4838             atomic->setType(arg0->getType());
   4839             atomic->getWritableType().getQualifier().makeTemporary();
   4840             atomic->setLoc(loc);
   4841 
   4842             if (isImage) {
   4843                 // orig_value = imageAtomicOp(image, loc, data)
   4844                 imageAtomicParams(atomic, arg0);
   4845                 atomic->getSequence().push_back(arg1);
   4846 
   4847                 if (argAggregate->getSequence().size() > 2) {
   4848                     node = intermediate.addAssign(EOpAssign, arg2, atomic, loc);
   4849                 } else {
   4850                     node = atomic; // no assignment needed, as there was no out var.
   4851                 }
   4852             } else {
   4853                 // Normal memory variable:
   4854                 // arg0 = mem, arg1 = data, arg2(optional,out) = orig_value
   4855                 if (argAggregate->getSequence().size() > 2) {
   4856                     // optional output param is present.  return value goes to arg2.
   4857                     atomic->getSequence().push_back(arg0);
   4858                     atomic->getSequence().push_back(arg1);
   4859 
   4860                     node = intermediate.addAssign(EOpAssign, arg2, atomic, loc);
   4861                 } else {
   4862                     // Set the matching operator.  Since output is absent, this is all we need to do.
   4863                     node->getAsAggregate()->setOperator(atomicOp);
   4864                     node->setType(atomic->getType());
   4865                 }
   4866             }
   4867 
   4868             break;
   4869         }
   4870 
   4871     case EOpInterlockedCompareExchange:
   4872         {
   4873             TIntermTyped* arg0 = argAggregate->getSequence()[0]->getAsTyped();  // dest
   4874             TIntermTyped* arg1 = argAggregate->getSequence()[1]->getAsTyped();  // cmp
   4875             TIntermTyped* arg2 = argAggregate->getSequence()[2]->getAsTyped();  // value
   4876             TIntermTyped* arg3 = argAggregate->getSequence()[3]->getAsTyped();  // orig
   4877 
   4878             const bool isImage = isImageParam(arg0);
   4879             TIntermAggregate* atomic = new TIntermAggregate(mapAtomicOp(loc, op, isImage));
   4880             atomic->setLoc(loc);
   4881             atomic->setType(arg2->getType());
   4882             atomic->getWritableType().getQualifier().makeTemporary();
   4883 
   4884             if (isImage) {
   4885                 imageAtomicParams(atomic, arg0);
   4886             } else {
   4887                 atomic->getSequence().push_back(arg0);
   4888             }
   4889 
   4890             atomic->getSequence().push_back(arg1);
   4891             atomic->getSequence().push_back(arg2);
   4892             node = intermediate.addAssign(EOpAssign, arg3, atomic, loc);
   4893 
   4894             break;
   4895         }
   4896 
   4897     case EOpEvaluateAttributeSnapped:
   4898         {
   4899             // SPIR-V InterpolateAtOffset uses float vec2 offset in pixels
   4900             // HLSL uses int2 offset on a 16x16 grid in [-8..7] on x & y:
   4901             //   iU = (iU<<28)>>28
   4902             //   fU = ((float)iU)/16
   4903             // Targets might handle this natively, in which case they can disable
   4904             // decompositions.
   4905 
   4906             TIntermTyped* arg0 = argAggregate->getSequence()[0]->getAsTyped();  // value
   4907             TIntermTyped* arg1 = argAggregate->getSequence()[1]->getAsTyped();  // offset
   4908 
   4909             TIntermTyped* i28 = intermediate.addConstantUnion(28, loc, true);
   4910             TIntermTyped* iU = handleBinaryMath(loc, ">>", EOpRightShift,
   4911                                                 handleBinaryMath(loc, "<<", EOpLeftShift, arg1, i28),
   4912                                                 i28);
   4913 
   4914             TIntermTyped* recip16 = intermediate.addConstantUnion((1.0/16.0), EbtFloat, loc, true);
   4915             TIntermTyped* floatOffset = handleBinaryMath(loc, "mul", EOpMul,
   4916                                                          intermediate.addConversion(EOpConstructFloat,
   4917                                                                                     TType(EbtFloat, EvqTemporary, 2), iU),
   4918                                                          recip16);
   4919 
   4920             TIntermAggregate* interp = new TIntermAggregate(EOpInterpolateAtOffset);
   4921             interp->getSequence().push_back(arg0);
   4922             interp->getSequence().push_back(floatOffset);
   4923             interp->setLoc(loc);
   4924             interp->setType(arg0->getType());
   4925             interp->getWritableType().getQualifier().makeTemporary();
   4926 
   4927             node = interp;
   4928 
   4929             break;
   4930         }
   4931 
   4932     case EOpLit:
   4933         {
   4934             TIntermTyped* n_dot_l = argAggregate->getSequence()[0]->getAsTyped();
   4935             TIntermTyped* n_dot_h = argAggregate->getSequence()[1]->getAsTyped();
   4936             TIntermTyped* m = argAggregate->getSequence()[2]->getAsTyped();
   4937 
   4938             TIntermAggregate* dst = new TIntermAggregate(EOpConstructVec4);
   4939 
   4940             // Ambient
   4941             dst->getSequence().push_back(intermediate.addConstantUnion(1.0, EbtFloat, loc, true));
   4942 
   4943             // Diffuse:
   4944             TIntermTyped* zero = intermediate.addConstantUnion(0.0, EbtFloat, loc, true);
   4945             TIntermAggregate* diffuse = new TIntermAggregate(EOpMax);
   4946             diffuse->getSequence().push_back(n_dot_l);
   4947             diffuse->getSequence().push_back(zero);
   4948             diffuse->setLoc(loc);
   4949             diffuse->setType(TType(EbtFloat));
   4950             dst->getSequence().push_back(diffuse);
   4951 
   4952             // Specular:
   4953             TIntermAggregate* min_ndot = new TIntermAggregate(EOpMin);
   4954             min_ndot->getSequence().push_back(n_dot_l);
   4955             min_ndot->getSequence().push_back(n_dot_h);
   4956             min_ndot->setLoc(loc);
   4957             min_ndot->setType(TType(EbtFloat));
   4958 
   4959             TIntermTyped* compare = handleBinaryMath(loc, "<", EOpLessThan, min_ndot, zero);
   4960             TIntermTyped* n_dot_h_m = handleBinaryMath(loc, "mul", EOpMul, n_dot_h, m);  // n_dot_h * m
   4961 
   4962             dst->getSequence().push_back(intermediate.addSelection(compare, zero, n_dot_h_m, loc));
   4963 
   4964             // One:
   4965             dst->getSequence().push_back(intermediate.addConstantUnion(1.0, EbtFloat, loc, true));
   4966 
   4967             dst->setLoc(loc);
   4968             dst->setType(TType(EbtFloat, EvqTemporary, 4));
   4969             node = dst;
   4970             break;
   4971         }
   4972 
   4973     case EOpAsDouble:
   4974         {
   4975             // asdouble accepts two 32 bit ints.  we can use EOpUint64BitsToDouble, but must
   4976             // first construct a uint64.
   4977             TIntermTyped* arg0 = argAggregate->getSequence()[0]->getAsTyped();
   4978             TIntermTyped* arg1 = argAggregate->getSequence()[1]->getAsTyped();
   4979 
   4980             if (arg0->getType().isVector()) { // TODO: ...
   4981                 error(loc, "double2 conversion not implemented", "asdouble", "");
   4982                 break;
   4983             }
   4984 
   4985             TIntermAggregate* uint64 = new TIntermAggregate(EOpConstructUVec2);
   4986 
   4987             uint64->getSequence().push_back(arg0);
   4988             uint64->getSequence().push_back(arg1);
   4989             uint64->setType(TType(EbtUint, EvqTemporary, 2));  // convert 2 uints to a uint2
   4990             uint64->setLoc(loc);
   4991 
   4992             // bitcast uint2 to a double
   4993             TIntermTyped* convert = new TIntermUnary(EOpUint64BitsToDouble);
   4994             convert->getAsUnaryNode()->setOperand(uint64);
   4995             convert->setLoc(loc);
   4996             convert->setType(TType(EbtDouble, EvqTemporary));
   4997             node = convert;
   4998 
   4999             break;
   5000         }
   5001 
   5002     case EOpF16tof32:
   5003         {
   5004             // input uvecN with low 16 bits of each component holding a float16.  convert to float32.
   5005             TIntermTyped* argValue = node->getAsUnaryNode()->getOperand();
   5006             TIntermTyped* zero = intermediate.addConstantUnion(0, loc, true);
   5007             const int vecSize = argValue->getType().getVectorSize();
   5008 
   5009             TOperator constructOp = EOpNull;
   5010             switch (vecSize) {
   5011             case 1: constructOp = EOpNull;          break; // direct use, no construct needed
   5012             case 2: constructOp = EOpConstructVec2; break;
   5013             case 3: constructOp = EOpConstructVec3; break;
   5014             case 4: constructOp = EOpConstructVec4; break;
   5015             default: assert(0); break;
   5016             }
   5017 
   5018             // For scalar case, we don't need to construct another type.
   5019             TIntermAggregate* result = (vecSize > 1) ? new TIntermAggregate(constructOp) : nullptr;
   5020 
   5021             if (result) {
   5022                 result->setType(TType(EbtFloat, EvqTemporary, vecSize));
   5023                 result->setLoc(loc);
   5024             }
   5025 
   5026             for (int idx = 0; idx < vecSize; ++idx) {
   5027                 TIntermTyped* idxConst = intermediate.addConstantUnion(idx, loc, true);
   5028                 TIntermTyped* component = argValue->getType().isVector() ?
   5029                     intermediate.addIndex(EOpIndexDirect, argValue, idxConst, loc) : argValue;
   5030 
   5031                 if (component != argValue)
   5032                     component->setType(TType(argValue->getBasicType(), EvqTemporary));
   5033 
   5034                 TIntermTyped* unpackOp  = new TIntermUnary(EOpUnpackHalf2x16);
   5035                 unpackOp->setType(TType(EbtFloat, EvqTemporary, 2));
   5036                 unpackOp->getAsUnaryNode()->setOperand(component);
   5037                 unpackOp->setLoc(loc);
   5038 
   5039                 TIntermTyped* lowOrder  = intermediate.addIndex(EOpIndexDirect, unpackOp, zero, loc);
   5040 
   5041                 if (result != nullptr) {
   5042                     result->getSequence().push_back(lowOrder);
   5043                     node = result;
   5044                 } else {
   5045                     node = lowOrder;
   5046                 }
   5047             }
   5048 
   5049             break;
   5050         }
   5051 
   5052     case EOpF32tof16:
   5053         {
   5054             // input floatN converted to 16 bit float in low order bits of each component of uintN
   5055             TIntermTyped* argValue = node->getAsUnaryNode()->getOperand();
   5056 
   5057             TIntermTyped* zero = intermediate.addConstantUnion(0.0, EbtFloat, loc, true);
   5058             const int vecSize = argValue->getType().getVectorSize();
   5059 
   5060             TOperator constructOp = EOpNull;
   5061             switch (vecSize) {
   5062             case 1: constructOp = EOpNull;           break; // direct use, no construct needed
   5063             case 2: constructOp = EOpConstructUVec2; break;
   5064             case 3: constructOp = EOpConstructUVec3; break;
   5065             case 4: constructOp = EOpConstructUVec4; break;
   5066             default: assert(0); break;
   5067             }
   5068 
   5069             // For scalar case, we don't need to construct another type.
   5070             TIntermAggregate* result = (vecSize > 1) ? new TIntermAggregate(constructOp) : nullptr;
   5071 
   5072             if (result) {
   5073                 result->setType(TType(EbtUint, EvqTemporary, vecSize));
   5074                 result->setLoc(loc);
   5075             }
   5076 
   5077             for (int idx = 0; idx < vecSize; ++idx) {
   5078                 TIntermTyped* idxConst = intermediate.addConstantUnion(idx, loc, true);
   5079                 TIntermTyped* component = argValue->getType().isVector() ?
   5080                     intermediate.addIndex(EOpIndexDirect, argValue, idxConst, loc) : argValue;
   5081 
   5082                 if (component != argValue)
   5083                     component->setType(TType(argValue->getBasicType(), EvqTemporary));
   5084 
   5085                 TIntermAggregate* vec2ComponentAndZero = new TIntermAggregate(EOpConstructVec2);
   5086                 vec2ComponentAndZero->getSequence().push_back(component);
   5087                 vec2ComponentAndZero->getSequence().push_back(zero);
   5088                 vec2ComponentAndZero->setType(TType(EbtFloat, EvqTemporary, 2));
   5089                 vec2ComponentAndZero->setLoc(loc);
   5090 
   5091                 TIntermTyped* packOp = new TIntermUnary(EOpPackHalf2x16);
   5092                 packOp->getAsUnaryNode()->setOperand(vec2ComponentAndZero);
   5093                 packOp->setLoc(loc);
   5094                 packOp->setType(TType(EbtUint, EvqTemporary));
   5095 
   5096                 if (result != nullptr) {
   5097                     result->getSequence().push_back(packOp);
   5098                     node = result;
   5099                 } else {
   5100                     node = packOp;
   5101                 }
   5102             }
   5103 
   5104             break;
   5105         }
   5106 
   5107     case EOpD3DCOLORtoUBYTE4:
   5108         {
   5109             // ivec4 ( x.zyxw * 255.001953 );
   5110             TIntermTyped* arg0 = node->getAsUnaryNode()->getOperand();
   5111             TSwizzleSelectors<TVectorSelector> selectors;
   5112             selectors.push_back(2);
   5113             selectors.push_back(1);
   5114             selectors.push_back(0);
   5115             selectors.push_back(3);
   5116             TIntermTyped* swizzleIdx = intermediate.addSwizzle(selectors, loc);
   5117             TIntermTyped* swizzled = intermediate.addIndex(EOpVectorSwizzle, arg0, swizzleIdx, loc);
   5118             swizzled->setType(arg0->getType());
   5119             swizzled->getWritableType().getQualifier().makeTemporary();
   5120 
   5121             TIntermTyped* conversion = intermediate.addConstantUnion(255.001953f, EbtFloat, loc, true);
   5122             TIntermTyped* rangeConverted = handleBinaryMath(loc, "mul", EOpMul, conversion, swizzled);
   5123             rangeConverted->setType(arg0->getType());
   5124             rangeConverted->getWritableType().getQualifier().makeTemporary();
   5125 
   5126             node = intermediate.addConversion(EOpConstructInt, TType(EbtInt, EvqTemporary, 4), rangeConverted);
   5127             node->setLoc(loc);
   5128             node->setType(TType(EbtInt, EvqTemporary, 4));
   5129             break;
   5130         }
   5131 
   5132     case EOpIsFinite:
   5133         {
   5134             // Since OPIsFinite in SPIR-V is only supported with the Kernel capability, we translate
   5135             // it to !isnan && !isinf
   5136 
   5137             TIntermTyped* arg0 = node->getAsUnaryNode()->getOperand();
   5138 
   5139             // We'll make a temporary in case the RHS is cmoplex
   5140             TVariable* tempArg = makeInternalVariable("@finitetmp", arg0->getType());
   5141             tempArg->getWritableType().getQualifier().makeTemporary();
   5142 
   5143             TIntermTyped* tmpArgAssign = intermediate.addAssign(EOpAssign,
   5144                                                                 intermediate.addSymbol(*tempArg, loc),
   5145                                                                 arg0, loc);
   5146 
   5147             TIntermAggregate* compoundStatement = intermediate.makeAggregate(tmpArgAssign, loc);
   5148 
   5149             const TType boolType(EbtBool, EvqTemporary, arg0->getVectorSize(), arg0->getMatrixCols(),
   5150                                  arg0->getMatrixRows());
   5151 
   5152             TIntermTyped* isnan = handleUnaryMath(loc, "isnan", EOpIsNan, intermediate.addSymbol(*tempArg, loc));
   5153             isnan->setType(boolType);
   5154 
   5155             TIntermTyped* notnan = handleUnaryMath(loc, "!", EOpLogicalNot, isnan);
   5156             notnan->setType(boolType);
   5157 
   5158             TIntermTyped* isinf = handleUnaryMath(loc, "isinf", EOpIsInf, intermediate.addSymbol(*tempArg, loc));
   5159             isinf->setType(boolType);
   5160 
   5161             TIntermTyped* notinf = handleUnaryMath(loc, "!", EOpLogicalNot, isinf);
   5162             notinf->setType(boolType);
   5163 
   5164             TIntermTyped* andNode = handleBinaryMath(loc, "and", EOpLogicalAnd, notnan, notinf);
   5165             andNode->setType(boolType);
   5166 
   5167             compoundStatement = intermediate.growAggregate(compoundStatement, andNode);
   5168             compoundStatement->setOperator(EOpSequence);
   5169             compoundStatement->setLoc(loc);
   5170             compoundStatement->setType(boolType);
   5171 
   5172             node = compoundStatement;
   5173 
   5174             break;
   5175         }
   5176     case EOpWaveGetLaneCount:
   5177         {
   5178             // Mapped to gl_SubgroupSize builtin (We preprend @ to the symbol
   5179             // so that it inhabits the symbol table, but has a user-invalid name
   5180             // in-case some source HLSL defined the symbol also).
   5181             TType type(EbtUint, EvqVaryingIn);
   5182             node = lookupBuiltinVariable("@gl_SubgroupSize", EbvSubgroupSize2, type);
   5183             break;
   5184         }
   5185     case EOpWaveGetLaneIndex:
   5186         {
   5187             // Mapped to gl_SubgroupInvocationID builtin (We preprend @ to the
   5188             // symbol so that it inhabits the symbol table, but has a
   5189             // user-invalid name in-case some source HLSL defined the symbol
   5190             // also).
   5191             TType type(EbtUint, EvqVaryingIn);
   5192             node = lookupBuiltinVariable("@gl_SubgroupInvocationID", EbvSubgroupInvocation2, type);
   5193             break;
   5194         }
   5195     case EOpWaveActiveCountBits:
   5196         {
   5197             // Mapped to subgroupBallotBitCount(subgroupBallot()) builtin
   5198 
   5199             // uvec4 type.
   5200             TType uvec4Type(EbtUint, EvqTemporary, 4);
   5201 
   5202             // Get the uvec4 return from subgroupBallot().
   5203             TIntermTyped* res = intermediate.addBuiltInFunctionCall(loc,
   5204                 EOpSubgroupBallot, true, arguments, uvec4Type);
   5205 
   5206             // uint type.
   5207             TType uintType(EbtUint, EvqTemporary);
   5208 
   5209             node = intermediate.addBuiltInFunctionCall(loc,
   5210                 EOpSubgroupBallotBitCount, true, res, uintType);
   5211 
   5212             break;
   5213         }
   5214     case EOpWavePrefixCountBits:
   5215         {
   5216             // Mapped to subgroupBallotInclusiveBitCount(subgroupBallot())
   5217             // builtin
   5218 
   5219             // uvec4 type.
   5220             TType uvec4Type(EbtUint, EvqTemporary, 4);
   5221 
   5222             // Get the uvec4 return from subgroupBallot().
   5223             TIntermTyped* res = intermediate.addBuiltInFunctionCall(loc,
   5224                 EOpSubgroupBallot, true, arguments, uvec4Type);
   5225 
   5226             // uint type.
   5227             TType uintType(EbtUint, EvqTemporary);
   5228 
   5229             node = intermediate.addBuiltInFunctionCall(loc,
   5230                 EOpSubgroupBallotInclusiveBitCount, true, res, uintType);
   5231 
   5232             break;
   5233         }
   5234 
   5235     default:
   5236         break; // most pass through unchanged
   5237     }
   5238 }
   5239 
   5240 //
   5241 // Handle seeing function call syntax in the grammar, which could be any of
   5242 //  - .length() method
   5243 //  - constructor
   5244 //  - a call to a built-in function mapped to an operator
   5245 //  - a call to a built-in function that will remain a function call (e.g., texturing)
   5246 //  - user function
   5247 //  - subroutine call (not implemented yet)
   5248 //
   5249 TIntermTyped* HlslParseContext::handleFunctionCall(const TSourceLoc& loc, TFunction* function, TIntermTyped* arguments)
   5250 {
   5251     TIntermTyped* result = nullptr;
   5252 
   5253     TOperator op = function->getBuiltInOp();
   5254     if (op != EOpNull) {
   5255         //
   5256         // Then this should be a constructor.
   5257         // Don't go through the symbol table for constructors.
   5258         // Their parameters will be verified algorithmically.
   5259         //
   5260         TType type(EbtVoid);  // use this to get the type back
   5261         if (! constructorError(loc, arguments, *function, op, type)) {
   5262             //
   5263             // It's a constructor, of type 'type'.
   5264             //
   5265             result = handleConstructor(loc, arguments, type);
   5266             if (result == nullptr) {
   5267                 error(loc, "cannot construct with these arguments", type.getCompleteString().c_str(), "");
   5268                 return nullptr;
   5269             }
   5270         }
   5271     } else {
   5272         //
   5273         // Find it in the symbol table.
   5274         //
   5275         const TFunction* fnCandidate = nullptr;
   5276         bool builtIn = false;
   5277         int thisDepth = 0;
   5278 
   5279         // For mat mul, the situation is unusual: we have to compare vector sizes to mat row or col sizes,
   5280         // and clamp the opposite arg.  Since that's complex, we farm it off to a separate method.
   5281         // It doesn't naturally fall out of processing an argument at a time in isolation.
   5282         if (function->getName() == "mul")
   5283             addGenMulArgumentConversion(loc, *function, arguments);
   5284 
   5285         TIntermAggregate* aggregate = arguments ? arguments->getAsAggregate() : nullptr;
   5286 
   5287         // TODO: this needs improvement: there's no way at present to look up a signature in
   5288         // the symbol table for an arbitrary type.  This is a temporary hack until that ability exists.
   5289         // It will have false positives, since it doesn't check arg counts or types.
   5290         if (arguments) {
   5291             // Check if first argument is struct buffer type.  It may be an aggregate or a symbol, so we
   5292             // look for either case.
   5293 
   5294             TIntermTyped* arg0 = nullptr;
   5295 
   5296             if (aggregate && aggregate->getSequence().size() > 0)
   5297                 arg0 = aggregate->getSequence()[0]->getAsTyped();
   5298             else if (arguments->getAsSymbolNode())
   5299                 arg0 = arguments->getAsSymbolNode();
   5300 
   5301             if (arg0 != nullptr && isStructBufferType(arg0->getType())) {
   5302                 static const int methodPrefixSize = sizeof(BUILTIN_PREFIX)-1;
   5303 
   5304                 if (function->getName().length() > methodPrefixSize &&
   5305                     isStructBufferMethod(function->getName().substr(methodPrefixSize))) {
   5306                     const TString mangle = function->getName() + "(";
   5307                     TSymbol* symbol = symbolTable.find(mangle, &builtIn);
   5308 
   5309                     if (symbol)
   5310                         fnCandidate = symbol->getAsFunction();
   5311                 }
   5312             }
   5313         }
   5314 
   5315         if (fnCandidate == nullptr)
   5316             fnCandidate = findFunction(loc, *function, builtIn, thisDepth, arguments);
   5317 
   5318         if (fnCandidate) {
   5319             // This is a declared function that might map to
   5320             //  - a built-in operator,
   5321             //  - a built-in function not mapped to an operator, or
   5322             //  - a user function.
   5323 
   5324             // Error check for a function requiring specific extensions present.
   5325             if (builtIn && fnCandidate->getNumExtensions())
   5326                 requireExtensions(loc, fnCandidate->getNumExtensions(), fnCandidate->getExtensions(),
   5327                                   fnCandidate->getName().c_str());
   5328 
   5329             // turn an implicit member-function resolution into an explicit call
   5330             TString callerName;
   5331             if (thisDepth == 0)
   5332                 callerName = fnCandidate->getMangledName();
   5333             else {
   5334                 // get the explicit (full) name of the function
   5335                 callerName = currentTypePrefix[currentTypePrefix.size() - thisDepth];
   5336                 callerName += fnCandidate->getMangledName();
   5337                 // insert the implicit calling argument
   5338                 pushFrontArguments(intermediate.addSymbol(*getImplicitThis(thisDepth)), arguments);
   5339             }
   5340 
   5341             // Convert 'in' arguments, so that types match.
   5342             // However, skip those that need expansion, that is covered next.
   5343             if (arguments)
   5344                 addInputArgumentConversions(*fnCandidate, arguments);
   5345 
   5346             // Expand arguments.  Some arguments must physically expand to a different set
   5347             // than what the shader declared and passes.
   5348             if (arguments && !builtIn)
   5349                 expandArguments(loc, *fnCandidate, arguments);
   5350 
   5351             // Expansion may have changed the form of arguments
   5352             aggregate = arguments ? arguments->getAsAggregate() : nullptr;
   5353 
   5354             op = fnCandidate->getBuiltInOp();
   5355             if (builtIn && op != EOpNull) {
   5356                 // A function call mapped to a built-in operation.
   5357                 result = intermediate.addBuiltInFunctionCall(loc, op, fnCandidate->getParamCount() == 1, arguments,
   5358                                                              fnCandidate->getType());
   5359                 if (result == nullptr)  {
   5360                     error(arguments->getLoc(), " wrong operand type", "Internal Error",
   5361                         "built in unary operator function.  Type: %s",
   5362                         static_cast<TIntermTyped*>(arguments)->getCompleteString().c_str());
   5363                 } else if (result->getAsOperator()) {
   5364                     builtInOpCheck(loc, *fnCandidate, *result->getAsOperator());
   5365                 }
   5366             } else {
   5367                 // This is a function call not mapped to built-in operator.
   5368                 // It could still be a built-in function, but only if PureOperatorBuiltins == false.
   5369                 result = intermediate.setAggregateOperator(arguments, EOpFunctionCall, fnCandidate->getType(), loc);
   5370                 TIntermAggregate* call = result->getAsAggregate();
   5371                 call->setName(callerName);
   5372 
   5373                 // this is how we know whether the given function is a built-in function or a user-defined function
   5374                 // if builtIn == false, it's a userDefined -> could be an overloaded built-in function also
   5375                 // if builtIn == true, it's definitely a built-in function with EOpNull
   5376                 if (! builtIn) {
   5377                     call->setUserDefined();
   5378                     intermediate.addToCallGraph(infoSink, currentCaller, callerName);
   5379                 }
   5380             }
   5381 
   5382             // for decompositions, since we want to operate on the function node, not the aggregate holding
   5383             // output conversions.
   5384             const TIntermTyped* fnNode = result;
   5385 
   5386             decomposeStructBufferMethods(loc, result, arguments); // HLSL->AST struct buffer method decompositions
   5387             decomposeIntrinsic(loc, result, arguments);           // HLSL->AST intrinsic decompositions
   5388             decomposeSampleMethods(loc, result, arguments);       // HLSL->AST sample method decompositions
   5389             decomposeGeometryMethods(loc, result, arguments);     // HLSL->AST geometry method decompositions
   5390 
   5391             // Create the qualifier list, carried in the AST for the call.
   5392             // Because some arguments expand to multiple arguments, the qualifier list will
   5393             // be longer than the formal parameter list.
   5394             if (result == fnNode && result->getAsAggregate()) {
   5395                 TQualifierList& qualifierList = result->getAsAggregate()->getQualifierList();
   5396                 for (int i = 0; i < fnCandidate->getParamCount(); ++i) {
   5397                     TStorageQualifier qual = (*fnCandidate)[i].type->getQualifier().storage;
   5398                     if (hasStructBuffCounter(*(*fnCandidate)[i].type)) {
   5399                         // add buffer and counter buffer argument qualifier
   5400                         qualifierList.push_back(qual);
   5401                         qualifierList.push_back(qual);
   5402                     } else if (shouldFlatten(*(*fnCandidate)[i].type, (*fnCandidate)[i].type->getQualifier().storage,
   5403                                              true)) {
   5404                         // add structure member expansion
   5405                         for (int memb = 0; memb < (int)(*fnCandidate)[i].type->getStruct()->size(); ++memb)
   5406                             qualifierList.push_back(qual);
   5407                     } else {
   5408                         // Normal 1:1 case
   5409                         qualifierList.push_back(qual);
   5410                     }
   5411                 }
   5412             }
   5413 
   5414             // Convert 'out' arguments.  If it was a constant folded built-in, it won't be an aggregate anymore.
   5415             // Built-ins with a single argument aren't called with an aggregate, but they also don't have an output.
   5416             // Also, build the qualifier list for user function calls, which are always called with an aggregate.
   5417             // We don't do this is if there has been a decomposition, which will have added its own conversions
   5418             // for output parameters.
   5419             if (result == fnNode && result->getAsAggregate())
   5420                 result = addOutputArgumentConversions(*fnCandidate, *result->getAsOperator());
   5421         }
   5422     }
   5423 
   5424     // generic error recovery
   5425     // TODO: simplification: localize all the error recoveries that look like this, and taking type into account to
   5426     //       reduce cascades
   5427     if (result == nullptr)
   5428         result = intermediate.addConstantUnion(0.0, EbtFloat, loc);
   5429 
   5430     return result;
   5431 }
   5432 
   5433 // An initial argument list is difficult: it can be null, or a single node,
   5434 // or an aggregate if more than one argument.  Add one to the front, maintaining
   5435 // this lack of uniformity.
   5436 void HlslParseContext::pushFrontArguments(TIntermTyped* front, TIntermTyped*& arguments)
   5437 {
   5438     if (arguments == nullptr)
   5439         arguments = front;
   5440     else if (arguments->getAsAggregate() != nullptr)
   5441         arguments->getAsAggregate()->getSequence().insert(arguments->getAsAggregate()->getSequence().begin(), front);
   5442     else
   5443         arguments = intermediate.growAggregate(front, arguments);
   5444 }
   5445 
   5446 //
   5447 // HLSL allows mismatched dimensions on vec*mat, mat*vec, vec*vec, and mat*mat.  This is a
   5448 // situation not well suited to resolution in intrinsic selection, but we can do so here, since we
   5449 // can look at both arguments insert explicit shape changes if required.
   5450 //
   5451 void HlslParseContext::addGenMulArgumentConversion(const TSourceLoc& loc, TFunction& call, TIntermTyped*& args)
   5452 {
   5453     TIntermAggregate* argAggregate = args ? args->getAsAggregate() : nullptr;
   5454 
   5455     if (argAggregate == nullptr || argAggregate->getSequence().size() != 2) {
   5456         // It really ought to have two arguments.
   5457         error(loc, "expected: mul arguments", "", "");
   5458         return;
   5459     }
   5460 
   5461     TIntermTyped* arg0 = argAggregate->getSequence()[0]->getAsTyped();
   5462     TIntermTyped* arg1 = argAggregate->getSequence()[1]->getAsTyped();
   5463 
   5464     if (arg0->isVector() && arg1->isVector()) {
   5465         // For:
   5466         //    vec * vec: it's handled during intrinsic selection, so while we could do it here,
   5467         //               we can also ignore it, which is easier.
   5468     } else if (arg0->isVector() && arg1->isMatrix()) {
   5469         // vec * mat: we clamp the vec if the mat col is smaller, else clamp the mat col.
   5470         if (arg0->getVectorSize() < arg1->getMatrixCols()) {
   5471             // vec is smaller, so truncate larger mat dimension
   5472             const TType truncType(arg1->getBasicType(), arg1->getQualifier().storage, arg1->getQualifier().precision,
   5473                                   0, arg0->getVectorSize(), arg1->getMatrixRows());
   5474             arg1 = addConstructor(loc, arg1, truncType);
   5475         } else if (arg0->getVectorSize() > arg1->getMatrixCols()) {
   5476             // vec is larger, so truncate vec to mat size
   5477             const TType truncType(arg0->getBasicType(), arg0->getQualifier().storage, arg0->getQualifier().precision,
   5478                                   arg1->getMatrixCols());
   5479             arg0 = addConstructor(loc, arg0, truncType);
   5480         }
   5481     } else if (arg0->isMatrix() && arg1->isVector()) {
   5482         // mat * vec: we clamp the vec if the mat col is smaller, else clamp the mat col.
   5483         if (arg1->getVectorSize() < arg0->getMatrixRows()) {
   5484             // vec is smaller, so truncate larger mat dimension
   5485             const TType truncType(arg0->getBasicType(), arg0->getQualifier().storage, arg0->getQualifier().precision,
   5486                                   0, arg0->getMatrixCols(), arg1->getVectorSize());
   5487             arg0 = addConstructor(loc, arg0, truncType);
   5488         } else if (arg1->getVectorSize() > arg0->getMatrixRows()) {
   5489             // vec is larger, so truncate vec to mat size
   5490             const TType truncType(arg1->getBasicType(), arg1->getQualifier().storage, arg1->getQualifier().precision,
   5491                                   arg0->getMatrixRows());
   5492             arg1 = addConstructor(loc, arg1, truncType);
   5493         }
   5494     } else if (arg0->isMatrix() && arg1->isMatrix()) {
   5495         // mat * mat: we clamp the smaller inner dimension to match the other matrix size.
   5496         // Remember, HLSL Mrc = GLSL/SPIRV Mcr.
   5497         if (arg0->getMatrixRows() > arg1->getMatrixCols()) {
   5498             const TType truncType(arg0->getBasicType(), arg0->getQualifier().storage, arg0->getQualifier().precision,
   5499                                   0, arg0->getMatrixCols(), arg1->getMatrixCols());
   5500             arg0 = addConstructor(loc, arg0, truncType);
   5501         } else if (arg0->getMatrixRows() < arg1->getMatrixCols()) {
   5502             const TType truncType(arg1->getBasicType(), arg1->getQualifier().storage, arg1->getQualifier().precision,
   5503                                   0, arg0->getMatrixRows(), arg1->getMatrixRows());
   5504             arg1 = addConstructor(loc, arg1, truncType);
   5505         }
   5506     } else {
   5507         // It's something with scalars: we'll just leave it alone.  Function selection will handle it
   5508         // downstream.
   5509     }
   5510 
   5511     // Warn if we altered one of the arguments
   5512     if (arg0 != argAggregate->getSequence()[0] || arg1 != argAggregate->getSequence()[1])
   5513         warn(loc, "mul() matrix size mismatch", "", "");
   5514 
   5515     // Put arguments back.  (They might be unchanged, in which case this is harmless).
   5516     argAggregate->getSequence()[0] = arg0;
   5517     argAggregate->getSequence()[1] = arg1;
   5518 
   5519     call[0].type = &arg0->getWritableType();
   5520     call[1].type = &arg1->getWritableType();
   5521 }
   5522 
   5523 //
   5524 // Add any needed implicit conversions for function-call arguments to input parameters.
   5525 //
   5526 void HlslParseContext::addInputArgumentConversions(const TFunction& function, TIntermTyped*& arguments)
   5527 {
   5528     TIntermAggregate* aggregate = arguments->getAsAggregate();
   5529 
   5530     // Replace a single argument with a single argument.
   5531     const auto setArg = [&](int paramNum, TIntermTyped* arg) {
   5532         if (function.getParamCount() == 1)
   5533             arguments = arg;
   5534         else {
   5535             if (aggregate == nullptr)
   5536                 arguments = arg;
   5537             else
   5538                 aggregate->getSequence()[paramNum] = arg;
   5539         }
   5540     };
   5541 
   5542     // Process each argument's conversion
   5543     for (int param = 0; param < function.getParamCount(); ++param) {
   5544         if (! function[param].type->getQualifier().isParamInput())
   5545             continue;
   5546 
   5547         // At this early point there is a slight ambiguity between whether an aggregate 'arguments'
   5548         // is the single argument itself or its children are the arguments.  Only one argument
   5549         // means take 'arguments' itself as the one argument.
   5550         TIntermTyped* arg = function.getParamCount() == 1
   5551                                    ? arguments->getAsTyped()
   5552                                    : (aggregate ?
   5553                                         aggregate->getSequence()[param]->getAsTyped() :
   5554                                         arguments->getAsTyped());
   5555         if (*function[param].type != arg->getType()) {
   5556             // In-qualified arguments just need an extra node added above the argument to
   5557             // convert to the correct type.
   5558             TIntermTyped* convArg = intermediate.addConversion(EOpFunctionCall, *function[param].type, arg);
   5559             if (convArg != nullptr)
   5560                 convArg = intermediate.addUniShapeConversion(EOpFunctionCall, *function[param].type, convArg);
   5561             if (convArg != nullptr)
   5562                 setArg(param, convArg);
   5563             else
   5564                 error(arg->getLoc(), "cannot convert input argument, argument", "", "%d", param);
   5565         } else {
   5566             if (wasFlattened(arg)) {
   5567                 // If both formal and calling arg are to be flattened, leave that to argument
   5568                 // expansion, not conversion.
   5569                 if (!shouldFlatten(*function[param].type, function[param].type->getQualifier().storage, true)) {
   5570                     // Will make a two-level subtree.
   5571                     // The deepest will copy member-by-member to build the structure to pass.
   5572                     // The level above that will be a two-operand EOpComma sequence that follows the copy by the
   5573                     // object itself.
   5574                     TVariable* internalAggregate = makeInternalVariable("aggShadow", *function[param].type);
   5575                     internalAggregate->getWritableType().getQualifier().makeTemporary();
   5576                     TIntermSymbol* internalSymbolNode = new TIntermSymbol(internalAggregate->getUniqueId(),
   5577                                                                           internalAggregate->getName(),
   5578                                                                           internalAggregate->getType());
   5579                     internalSymbolNode->setLoc(arg->getLoc());
   5580                     // This makes the deepest level, the member-wise copy
   5581                     TIntermAggregate* assignAgg = handleAssign(arg->getLoc(), EOpAssign,
   5582                                                                internalSymbolNode, arg)->getAsAggregate();
   5583 
   5584                     // Now, pair that with the resulting aggregate.
   5585                     assignAgg = intermediate.growAggregate(assignAgg, internalSymbolNode, arg->getLoc());
   5586                     assignAgg->setOperator(EOpComma);
   5587                     assignAgg->setType(internalAggregate->getType());
   5588                     setArg(param, assignAgg);
   5589                 }
   5590             }
   5591         }
   5592     }
   5593 }
   5594 
   5595 //
   5596 // Add any needed implicit expansion of calling arguments from what the shader listed to what's
   5597 // internally needed for the AST (given the constraints downstream).
   5598 //
   5599 void HlslParseContext::expandArguments(const TSourceLoc& loc, const TFunction& function, TIntermTyped*& arguments)
   5600 {
   5601     TIntermAggregate* aggregate = arguments->getAsAggregate();
   5602     int functionParamNumberOffset = 0;
   5603 
   5604     // Replace a single argument with a single argument.
   5605     const auto setArg = [&](int paramNum, TIntermTyped* arg) {
   5606         if (function.getParamCount() + functionParamNumberOffset == 1)
   5607             arguments = arg;
   5608         else {
   5609             if (aggregate == nullptr)
   5610                 arguments = arg;
   5611             else
   5612                 aggregate->getSequence()[paramNum] = arg;
   5613         }
   5614     };
   5615 
   5616     // Replace a single argument with a list of arguments
   5617     const auto setArgList = [&](int paramNum, const TVector<TIntermTyped*>& args) {
   5618         if (args.size() == 1)
   5619             setArg(paramNum, args.front());
   5620         else if (args.size() > 1) {
   5621             if (function.getParamCount() + functionParamNumberOffset == 1) {
   5622                 arguments = intermediate.makeAggregate(args.front());
   5623                 std::for_each(args.begin() + 1, args.end(),
   5624                     [&](TIntermTyped* arg) {
   5625                         arguments = intermediate.growAggregate(arguments, arg);
   5626                     });
   5627             } else {
   5628                 auto it = aggregate->getSequence().erase(aggregate->getSequence().begin() + paramNum);
   5629                 aggregate->getSequence().insert(it, args.begin(), args.end());
   5630             }
   5631             functionParamNumberOffset += (int)(args.size() - 1);
   5632         }
   5633     };
   5634 
   5635     // Process each argument's conversion
   5636     for (int param = 0; param < function.getParamCount(); ++param) {
   5637         // At this early point there is a slight ambiguity between whether an aggregate 'arguments'
   5638         // is the single argument itself or its children are the arguments.  Only one argument
   5639         // means take 'arguments' itself as the one argument.
   5640         TIntermTyped* arg = function.getParamCount() == 1
   5641                                    ? arguments->getAsTyped()
   5642                                    : (aggregate ?
   5643                                         aggregate->getSequence()[param + functionParamNumberOffset]->getAsTyped() :
   5644                                         arguments->getAsTyped());
   5645 
   5646         if (wasFlattened(arg) && shouldFlatten(*function[param].type, function[param].type->getQualifier().storage, true)) {
   5647             // Need to pass the structure members instead of the structure.
   5648             TVector<TIntermTyped*> memberArgs;
   5649             for (int memb = 0; memb < (int)arg->getType().getStruct()->size(); ++memb)
   5650                 memberArgs.push_back(flattenAccess(arg, memb));
   5651             setArgList(param + functionParamNumberOffset, memberArgs);
   5652         }
   5653     }
   5654 
   5655     // TODO: if we need both hidden counter args (below) and struct expansion (above)
   5656     // the two algorithms need to be merged: Each assumes the list starts out 1:1 between
   5657     // parameters and arguments.
   5658 
   5659     // If any argument is a pass-by-reference struct buffer with an associated counter
   5660     // buffer, we have to add another hidden parameter for that counter.
   5661     if (aggregate)
   5662         addStructBuffArguments(loc, aggregate);
   5663 }
   5664 
   5665 //
   5666 // Add any needed implicit output conversions for function-call arguments.  This
   5667 // can require a new tree topology, complicated further by whether the function
   5668 // has a return value.
   5669 //
   5670 // Returns a node of a subtree that evaluates to the return value of the function.
   5671 //
   5672 TIntermTyped* HlslParseContext::addOutputArgumentConversions(const TFunction& function, TIntermOperator& intermNode)
   5673 {
   5674     assert (intermNode.getAsAggregate() != nullptr || intermNode.getAsUnaryNode() != nullptr);
   5675 
   5676     const TSourceLoc& loc = intermNode.getLoc();
   5677 
   5678     TIntermSequence argSequence; // temp sequence for unary node args
   5679 
   5680     if (intermNode.getAsUnaryNode())
   5681         argSequence.push_back(intermNode.getAsUnaryNode()->getOperand());
   5682 
   5683     TIntermSequence& arguments = argSequence.empty() ? intermNode.getAsAggregate()->getSequence() : argSequence;
   5684 
   5685     const auto needsConversion = [&](int argNum) {
   5686         return function[argNum].type->getQualifier().isParamOutput() &&
   5687                (*function[argNum].type != arguments[argNum]->getAsTyped()->getType() ||
   5688                 shouldConvertLValue(arguments[argNum]) ||
   5689                 wasFlattened(arguments[argNum]->getAsTyped()));
   5690     };
   5691 
   5692     // Will there be any output conversions?
   5693     bool outputConversions = false;
   5694     for (int i = 0; i < function.getParamCount(); ++i) {
   5695         if (needsConversion(i)) {
   5696             outputConversions = true;
   5697             break;
   5698         }
   5699     }
   5700 
   5701     if (! outputConversions)
   5702         return &intermNode;
   5703 
   5704     // Setup for the new tree, if needed:
   5705     //
   5706     // Output conversions need a different tree topology.
   5707     // Out-qualified arguments need a temporary of the correct type, with the call
   5708     // followed by an assignment of the temporary to the original argument:
   5709     //     void: function(arg, ...)  ->        (          function(tempArg, ...), arg = tempArg, ...)
   5710     //     ret = function(arg, ...)  ->  ret = (tempRet = function(tempArg, ...), arg = tempArg, ..., tempRet)
   5711     // Where the "tempArg" type needs no conversion as an argument, but will convert on assignment.
   5712     TIntermTyped* conversionTree = nullptr;
   5713     TVariable* tempRet = nullptr;
   5714     if (intermNode.getBasicType() != EbtVoid) {
   5715         // do the "tempRet = function(...), " bit from above
   5716         tempRet = makeInternalVariable("tempReturn", intermNode.getType());
   5717         TIntermSymbol* tempRetNode = intermediate.addSymbol(*tempRet, loc);
   5718         conversionTree = intermediate.addAssign(EOpAssign, tempRetNode, &intermNode, loc);
   5719     } else
   5720         conversionTree = &intermNode;
   5721 
   5722     conversionTree = intermediate.makeAggregate(conversionTree);
   5723 
   5724     // Process each argument's conversion
   5725     for (int i = 0; i < function.getParamCount(); ++i) {
   5726         if (needsConversion(i)) {
   5727             // Out-qualified arguments needing conversion need to use the topology setup above.
   5728             // Do the " ...(tempArg, ...), arg = tempArg" bit from above.
   5729 
   5730             // Make a temporary for what the function expects the argument to look like.
   5731             TVariable* tempArg = makeInternalVariable("tempArg", *function[i].type);
   5732             tempArg->getWritableType().getQualifier().makeTemporary();
   5733             TIntermSymbol* tempArgNode = intermediate.addSymbol(*tempArg, loc);
   5734 
   5735             // This makes the deepest level, the member-wise copy
   5736             TIntermTyped* tempAssign = handleAssign(arguments[i]->getLoc(), EOpAssign, arguments[i]->getAsTyped(),
   5737                                                     tempArgNode);
   5738             tempAssign = handleLvalue(arguments[i]->getLoc(), "assign", tempAssign);
   5739             conversionTree = intermediate.growAggregate(conversionTree, tempAssign, arguments[i]->getLoc());
   5740 
   5741             // replace the argument with another node for the same tempArg variable
   5742             arguments[i] = intermediate.addSymbol(*tempArg, loc);
   5743         }
   5744     }
   5745 
   5746     // Finalize the tree topology (see bigger comment above).
   5747     if (tempRet) {
   5748         // do the "..., tempRet" bit from above
   5749         TIntermSymbol* tempRetNode = intermediate.addSymbol(*tempRet, loc);
   5750         conversionTree = intermediate.growAggregate(conversionTree, tempRetNode, loc);
   5751     }
   5752 
   5753     conversionTree = intermediate.setAggregateOperator(conversionTree, EOpComma, intermNode.getType(), loc);
   5754 
   5755     return conversionTree;
   5756 }
   5757 
   5758 //
   5759 // Add any needed "hidden" counter buffer arguments for function calls.
   5760 //
   5761 // Modifies the 'aggregate' argument if needed.  Otherwise, is no-op.
   5762 //
   5763 void HlslParseContext::addStructBuffArguments(const TSourceLoc& loc, TIntermAggregate*& aggregate)
   5764 {
   5765     // See if there are any SB types with counters.
   5766     const bool hasStructBuffArg =
   5767         std::any_of(aggregate->getSequence().begin(),
   5768                     aggregate->getSequence().end(),
   5769                     [this](const TIntermNode* node) {
   5770                         return (node->getAsTyped() != nullptr) && hasStructBuffCounter(node->getAsTyped()->getType());
   5771                     });
   5772 
   5773     // Nothing to do, if we didn't find one.
   5774     if (! hasStructBuffArg)
   5775         return;
   5776 
   5777     TIntermSequence argsWithCounterBuffers;
   5778 
   5779     for (int param = 0; param < int(aggregate->getSequence().size()); ++param) {
   5780         argsWithCounterBuffers.push_back(aggregate->getSequence()[param]);
   5781 
   5782         if (hasStructBuffCounter(aggregate->getSequence()[param]->getAsTyped()->getType())) {
   5783             const TIntermSymbol* blockSym = aggregate->getSequence()[param]->getAsSymbolNode();
   5784             if (blockSym != nullptr) {
   5785                 TType counterType;
   5786                 counterBufferType(loc, counterType);
   5787 
   5788                 const TString counterBlockName(intermediate.addCounterBufferName(blockSym->getName()));
   5789 
   5790                 TVariable* variable = makeInternalVariable(counterBlockName, counterType);
   5791 
   5792                 // Mark this buffer's counter block as being in use
   5793                 structBufferCounter[counterBlockName] = true;
   5794 
   5795                 TIntermSymbol* sym = intermediate.addSymbol(*variable, loc);
   5796                 argsWithCounterBuffers.push_back(sym);
   5797             }
   5798         }
   5799     }
   5800 
   5801     // Swap with the temp list we've built up.
   5802     aggregate->getSequence().swap(argsWithCounterBuffers);
   5803 }
   5804 
   5805 
   5806 //
   5807 // Do additional checking of built-in function calls that is not caught
   5808 // by normal semantic checks on argument type, extension tagging, etc.
   5809 //
   5810 // Assumes there has been a semantically correct match to a built-in function prototype.
   5811 //
   5812 void HlslParseContext::builtInOpCheck(const TSourceLoc& loc, const TFunction& fnCandidate, TIntermOperator& callNode)
   5813 {
   5814     // Set up convenience accessors to the argument(s).  There is almost always
   5815     // multiple arguments for the cases below, but when there might be one,
   5816     // check the unaryArg first.
   5817     const TIntermSequence* argp = nullptr;   // confusing to use [] syntax on a pointer, so this is to help get a reference
   5818     const TIntermTyped* unaryArg = nullptr;
   5819     const TIntermTyped* arg0 = nullptr;
   5820     if (callNode.getAsAggregate()) {
   5821         argp = &callNode.getAsAggregate()->getSequence();
   5822         if (argp->size() > 0)
   5823             arg0 = (*argp)[0]->getAsTyped();
   5824     } else {
   5825         assert(callNode.getAsUnaryNode());
   5826         unaryArg = callNode.getAsUnaryNode()->getOperand();
   5827         arg0 = unaryArg;
   5828     }
   5829     const TIntermSequence& aggArgs = *argp;  // only valid when unaryArg is nullptr
   5830 
   5831     switch (callNode.getOp()) {
   5832     case EOpTextureGather:
   5833     case EOpTextureGatherOffset:
   5834     case EOpTextureGatherOffsets:
   5835     {
   5836         // Figure out which variants are allowed by what extensions,
   5837         // and what arguments must be constant for which situations.
   5838 
   5839         TString featureString = fnCandidate.getName() + "(...)";
   5840         const char* feature = featureString.c_str();
   5841         int compArg = -1;  // track which argument, if any, is the constant component argument
   5842         switch (callNode.getOp()) {
   5843         case EOpTextureGather:
   5844             // More than two arguments needs gpu_shader5, and rectangular or shadow needs gpu_shader5,
   5845             // otherwise, need GL_ARB_texture_gather.
   5846             if (fnCandidate.getParamCount() > 2 || fnCandidate[0].type->getSampler().dim == EsdRect ||
   5847                 fnCandidate[0].type->getSampler().shadow) {
   5848                 if (! fnCandidate[0].type->getSampler().shadow)
   5849                     compArg = 2;
   5850             }
   5851             break;
   5852         case EOpTextureGatherOffset:
   5853             // GL_ARB_texture_gather is good enough for 2D non-shadow textures with no component argument
   5854             if (! fnCandidate[0].type->getSampler().shadow)
   5855                 compArg = 3;
   5856             break;
   5857         case EOpTextureGatherOffsets:
   5858             if (! fnCandidate[0].type->getSampler().shadow)
   5859                 compArg = 3;
   5860             break;
   5861         default:
   5862             break;
   5863         }
   5864 
   5865         if (compArg > 0 && compArg < fnCandidate.getParamCount()) {
   5866             if (aggArgs[compArg]->getAsConstantUnion()) {
   5867                 int value = aggArgs[compArg]->getAsConstantUnion()->getConstArray()[0].getIConst();
   5868                 if (value < 0 || value > 3)
   5869                     error(loc, "must be 0, 1, 2, or 3:", feature, "component argument");
   5870             } else
   5871                 error(loc, "must be a compile-time constant:", feature, "component argument");
   5872         }
   5873 
   5874         break;
   5875     }
   5876 
   5877     case EOpTextureOffset:
   5878     case EOpTextureFetchOffset:
   5879     case EOpTextureProjOffset:
   5880     case EOpTextureLodOffset:
   5881     case EOpTextureProjLodOffset:
   5882     case EOpTextureGradOffset:
   5883     case EOpTextureProjGradOffset:
   5884     {
   5885         // Handle texture-offset limits checking
   5886         // Pick which argument has to hold constant offsets
   5887         int arg = -1;
   5888         switch (callNode.getOp()) {
   5889         case EOpTextureOffset:          arg = 2;  break;
   5890         case EOpTextureFetchOffset:     arg = (arg0->getType().getSampler().dim != EsdRect) ? 3 : 2; break;
   5891         case EOpTextureProjOffset:      arg = 2;  break;
   5892         case EOpTextureLodOffset:       arg = 3;  break;
   5893         case EOpTextureProjLodOffset:   arg = 3;  break;
   5894         case EOpTextureGradOffset:      arg = 4;  break;
   5895         case EOpTextureProjGradOffset:  arg = 4;  break;
   5896         default:
   5897             assert(0);
   5898             break;
   5899         }
   5900 
   5901         if (arg > 0) {
   5902             if (aggArgs[arg]->getAsConstantUnion() == nullptr)
   5903                 error(loc, "argument must be compile-time constant", "texel offset", "");
   5904             else {
   5905                 const TType& type = aggArgs[arg]->getAsTyped()->getType();
   5906                 for (int c = 0; c < type.getVectorSize(); ++c) {
   5907                     int offset = aggArgs[arg]->getAsConstantUnion()->getConstArray()[c].getIConst();
   5908                     if (offset > resources.maxProgramTexelOffset || offset < resources.minProgramTexelOffset)
   5909                         error(loc, "value is out of range:", "texel offset",
   5910                               "[gl_MinProgramTexelOffset, gl_MaxProgramTexelOffset]");
   5911                 }
   5912             }
   5913         }
   5914 
   5915         break;
   5916     }
   5917 
   5918     case EOpTextureQuerySamples:
   5919     case EOpImageQuerySamples:
   5920         break;
   5921 
   5922     case EOpImageAtomicAdd:
   5923     case EOpImageAtomicMin:
   5924     case EOpImageAtomicMax:
   5925     case EOpImageAtomicAnd:
   5926     case EOpImageAtomicOr:
   5927     case EOpImageAtomicXor:
   5928     case EOpImageAtomicExchange:
   5929     case EOpImageAtomicCompSwap:
   5930         break;
   5931 
   5932     case EOpInterpolateAtCentroid:
   5933     case EOpInterpolateAtSample:
   5934     case EOpInterpolateAtOffset:
   5935         // Make sure the first argument is an interpolant, or an array element of an interpolant
   5936         if (arg0->getType().getQualifier().storage != EvqVaryingIn) {
   5937             // It might still be an array element.
   5938             //
   5939             // We could check more, but the semantics of the first argument are already met; the
   5940             // only way to turn an array into a float/vec* is array dereference and swizzle.
   5941             //
   5942             // ES and desktop 4.3 and earlier:  swizzles may not be used
   5943             // desktop 4.4 and later: swizzles may be used
   5944             const TIntermTyped* base = TIntermediate::findLValueBase(arg0, true);
   5945             if (base == nullptr || base->getType().getQualifier().storage != EvqVaryingIn)
   5946                 error(loc, "first argument must be an interpolant, or interpolant-array element",
   5947                       fnCandidate.getName().c_str(), "");
   5948         }
   5949         break;
   5950 
   5951     default:
   5952         break;
   5953     }
   5954 }
   5955 
   5956 //
   5957 // Handle seeing something in a grammar production that can be done by calling
   5958 // a constructor.
   5959 //
   5960 // The constructor still must be "handled" by handleFunctionCall(), which will
   5961 // then call handleConstructor().
   5962 //
   5963 TFunction* HlslParseContext::makeConstructorCall(const TSourceLoc& loc, const TType& type)
   5964 {
   5965     TOperator op = intermediate.mapTypeToConstructorOp(type);
   5966 
   5967     if (op == EOpNull) {
   5968         error(loc, "cannot construct this type", type.getBasicString(), "");
   5969         return nullptr;
   5970     }
   5971 
   5972     TString empty("");
   5973 
   5974     return new TFunction(&empty, type, op);
   5975 }
   5976 
   5977 //
   5978 // Handle seeing a "COLON semantic" at the end of a type declaration,
   5979 // by updating the type according to the semantic.
   5980 //
   5981 void HlslParseContext::handleSemantic(TSourceLoc loc, TQualifier& qualifier, TBuiltInVariable builtIn,
   5982                                       const TString& upperCase)
   5983 {
   5984     // Parse and return semantic number.  If limit is 0, it will be ignored.  Otherwise, if the parsed
   5985     // semantic number is >= limit, errorMsg is issued and 0 is returned.
   5986     // TODO: it would be nicer if limit and errorMsg had default parameters, but some compilers don't yet
   5987     // accept those in lambda functions.
   5988     const auto getSemanticNumber = [this, loc](const TString& semantic, unsigned int limit, const char* errorMsg) -> unsigned int {
   5989         size_t pos = semantic.find_last_not_of("0123456789");
   5990         if (pos == std::string::npos)
   5991             return 0u;
   5992 
   5993         unsigned int semanticNum = (unsigned int)atoi(semantic.c_str() + pos + 1);
   5994 
   5995         if (limit != 0 && semanticNum >= limit) {
   5996             error(loc, errorMsg, semantic.c_str(), "");
   5997             return 0u;
   5998         }
   5999 
   6000         return semanticNum;
   6001     };
   6002 
   6003     switch(builtIn) {
   6004     case EbvNone:
   6005         // Get location numbers from fragment outputs, instead of
   6006         // auto-assigning them.
   6007         if (language == EShLangFragment && upperCase.compare(0, 9, "SV_TARGET") == 0) {
   6008             qualifier.layoutLocation = getSemanticNumber(upperCase, 0, nullptr);
   6009             nextOutLocation = std::max(nextOutLocation, qualifier.layoutLocation + 1u);
   6010         } else if (upperCase.compare(0, 15, "SV_CLIPDISTANCE") == 0) {
   6011             builtIn = EbvClipDistance;
   6012             qualifier.layoutLocation = getSemanticNumber(upperCase, maxClipCullRegs, "invalid clip semantic");
   6013         } else if (upperCase.compare(0, 15, "SV_CULLDISTANCE") == 0) {
   6014             builtIn = EbvCullDistance;
   6015             qualifier.layoutLocation = getSemanticNumber(upperCase, maxClipCullRegs, "invalid cull semantic");
   6016         }
   6017         break;
   6018     case EbvPosition:
   6019         // adjust for stage in/out
   6020         if (language == EShLangFragment)
   6021             builtIn = EbvFragCoord;
   6022         break;
   6023     case EbvFragStencilRef:
   6024         error(loc, "unimplemented; need ARB_shader_stencil_export", "SV_STENCILREF", "");
   6025         break;
   6026     case EbvTessLevelInner:
   6027     case EbvTessLevelOuter:
   6028         qualifier.patch = true;
   6029         break;
   6030     default:
   6031         break;
   6032     }
   6033 
   6034     if (qualifier.builtIn == EbvNone)
   6035         qualifier.builtIn = builtIn;
   6036     qualifier.semanticName = intermediate.addSemanticName(upperCase);
   6037 }
   6038 
   6039 //
   6040 // Handle seeing something like "PACKOFFSET LEFT_PAREN c[Subcomponent][.component] RIGHT_PAREN"
   6041 //
   6042 // 'location' has the "c[Subcomponent]" part.
   6043 // 'component' points to the "component" part, or nullptr if not present.
   6044 //
   6045 void HlslParseContext::handlePackOffset(const TSourceLoc& loc, TQualifier& qualifier, const glslang::TString& location,
   6046                                         const glslang::TString* component)
   6047 {
   6048     if (location.size() == 0 || location[0] != 'c') {
   6049         error(loc, "expected 'c'", "packoffset", "");
   6050         return;
   6051     }
   6052     if (location.size() == 1)
   6053         return;
   6054     if (! isdigit(location[1])) {
   6055         error(loc, "expected number after 'c'", "packoffset", "");
   6056         return;
   6057     }
   6058 
   6059     qualifier.layoutOffset = 16 * atoi(location.substr(1, location.size()).c_str());
   6060     if (component != nullptr) {
   6061         int componentOffset = 0;
   6062         switch ((*component)[0]) {
   6063         case 'x': componentOffset =  0; break;
   6064         case 'y': componentOffset =  4; break;
   6065         case 'z': componentOffset =  8; break;
   6066         case 'w': componentOffset = 12; break;
   6067         default:
   6068             componentOffset = -1;
   6069             break;
   6070         }
   6071         if (componentOffset < 0 || component->size() > 1) {
   6072             error(loc, "expected {x, y, z, w} for component", "packoffset", "");
   6073             return;
   6074         }
   6075         qualifier.layoutOffset += componentOffset;
   6076     }
   6077 }
   6078 
   6079 //
   6080 // Handle seeing something like "REGISTER LEFT_PAREN [shader_profile,] Type# RIGHT_PAREN"
   6081 //
   6082 // 'profile' points to the shader_profile part, or nullptr if not present.
   6083 // 'desc' is the type# part.
   6084 //
   6085 void HlslParseContext::handleRegister(const TSourceLoc& loc, TQualifier& qualifier, const glslang::TString* profile,
   6086                                       const glslang::TString& desc, int subComponent, const glslang::TString* spaceDesc)
   6087 {
   6088     if (profile != nullptr)
   6089         warn(loc, "ignoring shader_profile", "register", "");
   6090 
   6091     if (desc.size() < 1) {
   6092         error(loc, "expected register type", "register", "");
   6093         return;
   6094     }
   6095 
   6096     int regNumber = 0;
   6097     if (desc.size() > 1) {
   6098         if (isdigit(desc[1]))
   6099             regNumber = atoi(desc.substr(1, desc.size()).c_str());
   6100         else {
   6101             error(loc, "expected register number after register type", "register", "");
   6102             return;
   6103         }
   6104     }
   6105 
   6106     // more information about register types see
   6107     // https://docs.microsoft.com/en-us/windows/desktop/direct3dhlsl/dx-graphics-hlsl-variable-register
   6108     const std::vector<std::string>& resourceInfo = intermediate.getResourceSetBinding();
   6109     switch (std::tolower(desc[0])) {
   6110     case 'c':
   6111         // c register is the register slot in the global const buffer
   6112         // each slot is a vector of 4 32 bit components
   6113         qualifier.layoutOffset = regNumber * 4 * 4;
   6114         break;
   6115         // const buffer register slot
   6116     case 'b':
   6117         // textrues and structured buffers
   6118     case 't':
   6119         // samplers
   6120     case 's':
   6121         // uav resources
   6122     case 'u':
   6123         // if nothing else has set the binding, do so now
   6124         // (other mechanisms override this one)
   6125         if (!qualifier.hasBinding())
   6126             qualifier.layoutBinding = regNumber + subComponent;
   6127 
   6128         // This handles per-register layout sets numbers.  For the global mode which sets
   6129         // every symbol to the same value, see setLinkageLayoutSets().
   6130         if ((resourceInfo.size() % 3) == 0) {
   6131             // Apply per-symbol resource set and binding.
   6132             for (auto it = resourceInfo.cbegin(); it != resourceInfo.cend(); it = it + 3) {
   6133                 if (strcmp(desc.c_str(), it[0].c_str()) == 0) {
   6134                     qualifier.layoutSet = atoi(it[1].c_str());
   6135                     qualifier.layoutBinding = atoi(it[2].c_str()) + subComponent;
   6136                     break;
   6137                 }
   6138             }
   6139         }
   6140         break;
   6141     default:
   6142         warn(loc, "ignoring unrecognized register type", "register", "%c", desc[0]);
   6143         break;
   6144     }
   6145 
   6146     // space
   6147     unsigned int setNumber;
   6148     const auto crackSpace = [&]() -> bool {
   6149         const int spaceLen = 5;
   6150         if (spaceDesc->size() < spaceLen + 1)
   6151             return false;
   6152         if (spaceDesc->compare(0, spaceLen, "space") != 0)
   6153             return false;
   6154         if (! isdigit((*spaceDesc)[spaceLen]))
   6155             return false;
   6156         setNumber = atoi(spaceDesc->substr(spaceLen, spaceDesc->size()).c_str());
   6157         return true;
   6158     };
   6159 
   6160     // if nothing else has set the set, do so now
   6161     // (other mechanisms override this one)
   6162     if (spaceDesc && !qualifier.hasSet()) {
   6163         if (! crackSpace()) {
   6164             error(loc, "expected spaceN", "register", "");
   6165             return;
   6166         }
   6167         qualifier.layoutSet = setNumber;
   6168     }
   6169 }
   6170 
   6171 // Convert to a scalar boolean, or if not allowed by HLSL semantics,
   6172 // report an error and return nullptr.
   6173 TIntermTyped* HlslParseContext::convertConditionalExpression(const TSourceLoc& loc, TIntermTyped* condition,
   6174                                                              bool mustBeScalar)
   6175 {
   6176     if (mustBeScalar && !condition->getType().isScalarOrVec1()) {
   6177         error(loc, "requires a scalar", "conditional expression", "");
   6178         return nullptr;
   6179     }
   6180 
   6181     return intermediate.addConversion(EOpConstructBool, TType(EbtBool, EvqTemporary, condition->getVectorSize()),
   6182                                       condition);
   6183 }
   6184 
   6185 //
   6186 // Same error message for all places assignments don't work.
   6187 //
   6188 void HlslParseContext::assignError(const TSourceLoc& loc, const char* op, TString left, TString right)
   6189 {
   6190     error(loc, "", op, "cannot convert from '%s' to '%s'",
   6191         right.c_str(), left.c_str());
   6192 }
   6193 
   6194 //
   6195 // Same error message for all places unary operations don't work.
   6196 //
   6197 void HlslParseContext::unaryOpError(const TSourceLoc& loc, const char* op, TString operand)
   6198 {
   6199     error(loc, " wrong operand type", op,
   6200         "no operation '%s' exists that takes an operand of type %s (or there is no acceptable conversion)",
   6201         op, operand.c_str());
   6202 }
   6203 
   6204 //
   6205 // Same error message for all binary operations don't work.
   6206 //
   6207 void HlslParseContext::binaryOpError(const TSourceLoc& loc, const char* op, TString left, TString right)
   6208 {
   6209     error(loc, " wrong operand types:", op,
   6210         "no operation '%s' exists that takes a left-hand operand of type '%s' and "
   6211         "a right operand of type '%s' (or there is no acceptable conversion)",
   6212         op, left.c_str(), right.c_str());
   6213 }
   6214 
   6215 //
   6216 // A basic type of EbtVoid is a key that the name string was seen in the source, but
   6217 // it was not found as a variable in the symbol table.  If so, give the error
   6218 // message and insert a dummy variable in the symbol table to prevent future errors.
   6219 //
   6220 void HlslParseContext::variableCheck(TIntermTyped*& nodePtr)
   6221 {
   6222     TIntermSymbol* symbol = nodePtr->getAsSymbolNode();
   6223     if (! symbol)
   6224         return;
   6225 
   6226     if (symbol->getType().getBasicType() == EbtVoid) {
   6227         error(symbol->getLoc(), "undeclared identifier", symbol->getName().c_str(), "");
   6228 
   6229         // Add to symbol table to prevent future error messages on the same name
   6230         if (symbol->getName().size() > 0) {
   6231             TVariable* fakeVariable = new TVariable(&symbol->getName(), TType(EbtFloat));
   6232             symbolTable.insert(*fakeVariable);
   6233 
   6234             // substitute a symbol node for this new variable
   6235             nodePtr = intermediate.addSymbol(*fakeVariable, symbol->getLoc());
   6236         }
   6237     }
   6238 }
   6239 
   6240 //
   6241 // Both test, and if necessary spit out an error, to see if the node is really
   6242 // a constant.
   6243 //
   6244 void HlslParseContext::constantValueCheck(TIntermTyped* node, const char* token)
   6245 {
   6246     if (node->getQualifier().storage != EvqConst)
   6247         error(node->getLoc(), "constant expression required", token, "");
   6248 }
   6249 
   6250 //
   6251 // Both test, and if necessary spit out an error, to see if the node is really
   6252 // an integer.
   6253 //
   6254 void HlslParseContext::integerCheck(const TIntermTyped* node, const char* token)
   6255 {
   6256     if ((node->getBasicType() == EbtInt || node->getBasicType() == EbtUint) && node->isScalar())
   6257         return;
   6258 
   6259     error(node->getLoc(), "scalar integer expression required", token, "");
   6260 }
   6261 
   6262 //
   6263 // Both test, and if necessary spit out an error, to see if we are currently
   6264 // globally scoped.
   6265 //
   6266 void HlslParseContext::globalCheck(const TSourceLoc& loc, const char* token)
   6267 {
   6268     if (! symbolTable.atGlobalLevel())
   6269         error(loc, "not allowed in nested scope", token, "");
   6270 }
   6271 
   6272 bool HlslParseContext::builtInName(const TString& /*identifier*/)
   6273 {
   6274     return false;
   6275 }
   6276 
   6277 //
   6278 // Make sure there is enough data and not too many arguments provided to the
   6279 // constructor to build something of the type of the constructor.  Also returns
   6280 // the type of the constructor.
   6281 //
   6282 // Returns true if there was an error in construction.
   6283 //
   6284 bool HlslParseContext::constructorError(const TSourceLoc& loc, TIntermNode* node, TFunction& function,
   6285                                         TOperator op, TType& type)
   6286 {
   6287     type.shallowCopy(function.getType());
   6288 
   6289     bool constructingMatrix = false;
   6290     switch (op) {
   6291     case EOpConstructTextureSampler:
   6292         error(loc, "unhandled texture constructor", "constructor", "");
   6293         return true;
   6294     case EOpConstructMat2x2:
   6295     case EOpConstructMat2x3:
   6296     case EOpConstructMat2x4:
   6297     case EOpConstructMat3x2:
   6298     case EOpConstructMat3x3:
   6299     case EOpConstructMat3x4:
   6300     case EOpConstructMat4x2:
   6301     case EOpConstructMat4x3:
   6302     case EOpConstructMat4x4:
   6303     case EOpConstructDMat2x2:
   6304     case EOpConstructDMat2x3:
   6305     case EOpConstructDMat2x4:
   6306     case EOpConstructDMat3x2:
   6307     case EOpConstructDMat3x3:
   6308     case EOpConstructDMat3x4:
   6309     case EOpConstructDMat4x2:
   6310     case EOpConstructDMat4x3:
   6311     case EOpConstructDMat4x4:
   6312     case EOpConstructIMat2x2:
   6313     case EOpConstructIMat2x3:
   6314     case EOpConstructIMat2x4:
   6315     case EOpConstructIMat3x2:
   6316     case EOpConstructIMat3x3:
   6317     case EOpConstructIMat3x4:
   6318     case EOpConstructIMat4x2:
   6319     case EOpConstructIMat4x3:
   6320     case EOpConstructIMat4x4:
   6321     case EOpConstructUMat2x2:
   6322     case EOpConstructUMat2x3:
   6323     case EOpConstructUMat2x4:
   6324     case EOpConstructUMat3x2:
   6325     case EOpConstructUMat3x3:
   6326     case EOpConstructUMat3x4:
   6327     case EOpConstructUMat4x2:
   6328     case EOpConstructUMat4x3:
   6329     case EOpConstructUMat4x4:
   6330     case EOpConstructBMat2x2:
   6331     case EOpConstructBMat2x3:
   6332     case EOpConstructBMat2x4:
   6333     case EOpConstructBMat3x2:
   6334     case EOpConstructBMat3x3:
   6335     case EOpConstructBMat3x4:
   6336     case EOpConstructBMat4x2:
   6337     case EOpConstructBMat4x3:
   6338     case EOpConstructBMat4x4:
   6339         constructingMatrix = true;
   6340         break;
   6341     default:
   6342         break;
   6343     }
   6344 
   6345     //
   6346     // Walk the arguments for first-pass checks and collection of information.
   6347     //
   6348 
   6349     int size = 0;
   6350     bool constType = true;
   6351     bool full = false;
   6352     bool overFull = false;
   6353     bool matrixInMatrix = false;
   6354     bool arrayArg = false;
   6355     for (int arg = 0; arg < function.getParamCount(); ++arg) {
   6356         if (function[arg].type->isArray()) {
   6357             if (function[arg].type->isUnsizedArray()) {
   6358                 // Can't construct from an unsized array.
   6359                 error(loc, "array argument must be sized", "constructor", "");
   6360                 return true;
   6361             }
   6362             arrayArg = true;
   6363         }
   6364         if (constructingMatrix && function[arg].type->isMatrix())
   6365             matrixInMatrix = true;
   6366 
   6367         // 'full' will go to true when enough args have been seen.  If we loop
   6368         // again, there is an extra argument.
   6369         if (full) {
   6370             // For vectors and matrices, it's okay to have too many components
   6371             // available, but not okay to have unused arguments.
   6372             overFull = true;
   6373         }
   6374 
   6375         size += function[arg].type->computeNumComponents();
   6376         if (op != EOpConstructStruct && ! type.isArray() && size >= type.computeNumComponents())
   6377             full = true;
   6378 
   6379         if (function[arg].type->getQualifier().storage != EvqConst)
   6380             constType = false;
   6381     }
   6382 
   6383     if (constType)
   6384         type.getQualifier().storage = EvqConst;
   6385 
   6386     if (type.isArray()) {
   6387         if (function.getParamCount() == 0) {
   6388             error(loc, "array constructor must have at least one argument", "constructor", "");
   6389             return true;
   6390         }
   6391 
   6392         if (type.isUnsizedArray()) {
   6393             // auto adapt the constructor type to the number of arguments
   6394             type.changeOuterArraySize(function.getParamCount());
   6395         } else if (type.getOuterArraySize() != function.getParamCount() && type.computeNumComponents() > size) {
   6396             error(loc, "array constructor needs one argument per array element", "constructor", "");
   6397             return true;
   6398         }
   6399 
   6400         if (type.isArrayOfArrays()) {
   6401             // Types have to match, but we're still making the type.
   6402             // Finish making the type, and the comparison is done later
   6403             // when checking for conversion.
   6404             TArraySizes& arraySizes = *type.getArraySizes();
   6405 
   6406             // At least the dimensionalities have to match.
   6407             if (! function[0].type->isArray() ||
   6408                 arraySizes.getNumDims() != function[0].type->getArraySizes()->getNumDims() + 1) {
   6409                 error(loc, "array constructor argument not correct type to construct array element", "constructor", "");
   6410                 return true;
   6411             }
   6412 
   6413             if (arraySizes.isInnerUnsized()) {
   6414                 // "Arrays of arrays ..., and the size for any dimension is optional"
   6415                 // That means we need to adopt (from the first argument) the other array sizes into the type.
   6416                 for (int d = 1; d < arraySizes.getNumDims(); ++d) {
   6417                     if (arraySizes.getDimSize(d) == UnsizedArraySize) {
   6418                         arraySizes.setDimSize(d, function[0].type->getArraySizes()->getDimSize(d - 1));
   6419                     }
   6420                 }
   6421             }
   6422         }
   6423     }
   6424 
   6425     // Some array -> array type casts are okay
   6426     if (arrayArg && function.getParamCount() == 1 && op != EOpConstructStruct && type.isArray() &&
   6427         !type.isArrayOfArrays() && !function[0].type->isArrayOfArrays() &&
   6428         type.getVectorSize() >= 1 && function[0].type->getVectorSize() >= 1)
   6429         return false;
   6430 
   6431     if (arrayArg && op != EOpConstructStruct && ! type.isArrayOfArrays()) {
   6432         error(loc, "constructing non-array constituent from array argument", "constructor", "");
   6433         return true;
   6434     }
   6435 
   6436     if (matrixInMatrix && ! type.isArray()) {
   6437         return false;
   6438     }
   6439 
   6440     if (overFull) {
   6441         error(loc, "too many arguments", "constructor", "");
   6442         return true;
   6443     }
   6444 
   6445     if (op == EOpConstructStruct && ! type.isArray()) {
   6446         if (isScalarConstructor(node))
   6447             return false;
   6448 
   6449         // Self-type construction: e.g, we can construct a struct from a single identically typed object.
   6450         if (function.getParamCount() == 1 && type == *function[0].type)
   6451             return false;
   6452 
   6453         if ((int)type.getStruct()->size() != function.getParamCount()) {
   6454             error(loc, "Number of constructor parameters does not match the number of structure fields", "constructor", "");
   6455             return true;
   6456         }
   6457     }
   6458 
   6459     if ((op != EOpConstructStruct && size != 1 && size < type.computeNumComponents()) ||
   6460         (op == EOpConstructStruct && size < type.computeNumComponents())) {
   6461         error(loc, "not enough data provided for construction", "constructor", "");
   6462         return true;
   6463     }
   6464 
   6465     return false;
   6466 }
   6467 
   6468 // See if 'node', in the context of constructing aggregates, is a scalar argument
   6469 // to a constructor.
   6470 //
   6471 bool HlslParseContext::isScalarConstructor(const TIntermNode* node)
   6472 {
   6473     // Obviously, it must be a scalar, but an aggregate node might not be fully
   6474     // completed yet: holding a sequence of initializers under an aggregate
   6475     // would not yet be typed, so don't check it's type.  This corresponds to
   6476     // the aggregate operator also not being set yet. (An aggregate operation
   6477     // that legitimately yields a scalar will have a getOp() of that operator,
   6478     // not EOpNull.)
   6479 
   6480     return node->getAsTyped() != nullptr &&
   6481            node->getAsTyped()->isScalar() &&
   6482            (node->getAsAggregate() == nullptr || node->getAsAggregate()->getOp() != EOpNull);
   6483 }
   6484 
   6485 // Checks to see if a void variable has been declared and raise an error message for such a case
   6486 //
   6487 // returns true in case of an error
   6488 //
   6489 bool HlslParseContext::voidErrorCheck(const TSourceLoc& loc, const TString& identifier, const TBasicType basicType)
   6490 {
   6491     if (basicType == EbtVoid) {
   6492         error(loc, "illegal use of type 'void'", identifier.c_str(), "");
   6493         return true;
   6494     }
   6495 
   6496     return false;
   6497 }
   6498 
   6499 //
   6500 // Fix just a full qualifier (no variables or types yet, but qualifier is complete) at global level.
   6501 //
   6502 void HlslParseContext::globalQualifierFix(const TSourceLoc&, TQualifier& qualifier)
   6503 {
   6504     // move from parameter/unknown qualifiers to pipeline in/out qualifiers
   6505     switch (qualifier.storage) {
   6506     case EvqIn:
   6507         qualifier.storage = EvqVaryingIn;
   6508         break;
   6509     case EvqOut:
   6510         qualifier.storage = EvqVaryingOut;
   6511         break;
   6512     default:
   6513         break;
   6514     }
   6515 }
   6516 
   6517 //
   6518 // Merge characteristics of the 'src' qualifier into the 'dst'.
   6519 // If there is duplication, issue error messages, unless 'force'
   6520 // is specified, which means to just override default settings.
   6521 //
   6522 // Also, when force is false, it will be assumed that 'src' follows
   6523 // 'dst', for the purpose of error checking order for versions
   6524 // that require specific orderings of qualifiers.
   6525 //
   6526 void HlslParseContext::mergeQualifiers(TQualifier& dst, const TQualifier& src)
   6527 {
   6528     // Storage qualification
   6529     if (dst.storage == EvqTemporary || dst.storage == EvqGlobal)
   6530         dst.storage = src.storage;
   6531     else if ((dst.storage == EvqIn  && src.storage == EvqOut) ||
   6532              (dst.storage == EvqOut && src.storage == EvqIn))
   6533         dst.storage = EvqInOut;
   6534     else if ((dst.storage == EvqIn    && src.storage == EvqConst) ||
   6535              (dst.storage == EvqConst && src.storage == EvqIn))
   6536         dst.storage = EvqConstReadOnly;
   6537 
   6538     // Layout qualifiers
   6539     mergeObjectLayoutQualifiers(dst, src, false);
   6540 
   6541     // individual qualifiers
   6542     bool repeated = false;
   6543 #define MERGE_SINGLETON(field) repeated |= dst.field && src.field; dst.field |= src.field;
   6544     MERGE_SINGLETON(invariant);
   6545     MERGE_SINGLETON(noContraction);
   6546     MERGE_SINGLETON(centroid);
   6547     MERGE_SINGLETON(smooth);
   6548     MERGE_SINGLETON(flat);
   6549     MERGE_SINGLETON(nopersp);
   6550     MERGE_SINGLETON(patch);
   6551     MERGE_SINGLETON(sample);
   6552     MERGE_SINGLETON(coherent);
   6553     MERGE_SINGLETON(volatil);
   6554     MERGE_SINGLETON(restrict);
   6555     MERGE_SINGLETON(readonly);
   6556     MERGE_SINGLETON(writeonly);
   6557     MERGE_SINGLETON(specConstant);
   6558     MERGE_SINGLETON(nonUniform);
   6559 }
   6560 
   6561 // used to flatten the sampler type space into a single dimension
   6562 // correlates with the declaration of defaultSamplerPrecision[]
   6563 int HlslParseContext::computeSamplerTypeIndex(TSampler& sampler)
   6564 {
   6565     int arrayIndex = sampler.arrayed ? 1 : 0;
   6566     int shadowIndex = sampler.shadow ? 1 : 0;
   6567     int externalIndex = sampler.external ? 1 : 0;
   6568 
   6569     return EsdNumDims *
   6570            (EbtNumTypes * (2 * (2 * arrayIndex + shadowIndex) + externalIndex) + sampler.type) + sampler.dim;
   6571 }
   6572 
   6573 //
   6574 // Do size checking for an array type's size.
   6575 //
   6576 void HlslParseContext::arraySizeCheck(const TSourceLoc& loc, TIntermTyped* expr, TArraySize& sizePair)
   6577 {
   6578     bool isConst = false;
   6579     sizePair.size = 1;
   6580     sizePair.node = nullptr;
   6581 
   6582     TIntermConstantUnion* constant = expr->getAsConstantUnion();
   6583     if (constant) {
   6584         // handle true (non-specialization) constant
   6585         sizePair.size = constant->getConstArray()[0].getIConst();
   6586         isConst = true;
   6587     } else {
   6588         // see if it's a specialization constant instead
   6589         if (expr->getQualifier().isSpecConstant()) {
   6590             isConst = true;
   6591             sizePair.node = expr;
   6592             TIntermSymbol* symbol = expr->getAsSymbolNode();
   6593             if (symbol && symbol->getConstArray().size() > 0)
   6594                 sizePair.size = symbol->getConstArray()[0].getIConst();
   6595         }
   6596     }
   6597 
   6598     if (! isConst || (expr->getBasicType() != EbtInt && expr->getBasicType() != EbtUint)) {
   6599         error(loc, "array size must be a constant integer expression", "", "");
   6600         return;
   6601     }
   6602 
   6603     if (sizePair.size <= 0) {
   6604         error(loc, "array size must be a positive integer", "", "");
   6605         return;
   6606     }
   6607 }
   6608 
   6609 //
   6610 // Require array to be completely sized
   6611 //
   6612 void HlslParseContext::arraySizeRequiredCheck(const TSourceLoc& loc, const TArraySizes& arraySizes)
   6613 {
   6614     if (arraySizes.hasUnsized())
   6615         error(loc, "array size required", "", "");
   6616 }
   6617 
   6618 void HlslParseContext::structArrayCheck(const TSourceLoc& /*loc*/, const TType& type)
   6619 {
   6620     const TTypeList& structure = *type.getStruct();
   6621     for (int m = 0; m < (int)structure.size(); ++m) {
   6622         const TType& member = *structure[m].type;
   6623         if (member.isArray())
   6624             arraySizeRequiredCheck(structure[m].loc, *member.getArraySizes());
   6625     }
   6626 }
   6627 
   6628 //
   6629 // Do all the semantic checking for declaring or redeclaring an array, with and
   6630 // without a size, and make the right changes to the symbol table.
   6631 //
   6632 void HlslParseContext::declareArray(const TSourceLoc& loc, const TString& identifier, const TType& type,
   6633                                     TSymbol*& symbol, bool track)
   6634 {
   6635     if (symbol == nullptr) {
   6636         bool currentScope;
   6637         symbol = symbolTable.find(identifier, nullptr, &currentScope);
   6638 
   6639         if (symbol && builtInName(identifier) && ! symbolTable.atBuiltInLevel()) {
   6640             // bad shader (errors already reported) trying to redeclare a built-in name as an array
   6641             return;
   6642         }
   6643         if (symbol == nullptr || ! currentScope) {
   6644             //
   6645             // Successfully process a new definition.
   6646             // (Redeclarations have to take place at the same scope; otherwise they are hiding declarations)
   6647             //
   6648             symbol = new TVariable(&identifier, type);
   6649             symbolTable.insert(*symbol);
   6650             if (track && symbolTable.atGlobalLevel())
   6651                 trackLinkage(*symbol);
   6652 
   6653             return;
   6654         }
   6655         if (symbol->getAsAnonMember()) {
   6656             error(loc, "cannot redeclare a user-block member array", identifier.c_str(), "");
   6657             symbol = nullptr;
   6658             return;
   6659         }
   6660     }
   6661 
   6662     //
   6663     // Process a redeclaration.
   6664     //
   6665 
   6666     if (symbol == nullptr) {
   6667         error(loc, "array variable name expected", identifier.c_str(), "");
   6668         return;
   6669     }
   6670 
   6671     // redeclareBuiltinVariable() should have already done the copyUp()
   6672     TType& existingType = symbol->getWritableType();
   6673 
   6674     if (existingType.isSizedArray()) {
   6675         // be more lenient for input arrays to geometry shaders and tessellation control outputs,
   6676         // where the redeclaration is the same size
   6677         return;
   6678     }
   6679 
   6680     existingType.updateArraySizes(type);
   6681 }
   6682 
   6683 //
   6684 // Enforce non-initializer type/qualifier rules.
   6685 //
   6686 void HlslParseContext::fixConstInit(const TSourceLoc& loc, const TString& identifier, TType& type,
   6687                                     TIntermTyped*& initializer)
   6688 {
   6689     //
   6690     // Make the qualifier make sense, given that there is an initializer.
   6691     //
   6692     if (initializer == nullptr) {
   6693         if (type.getQualifier().storage == EvqConst ||
   6694             type.getQualifier().storage == EvqConstReadOnly) {
   6695             initializer = intermediate.makeAggregate(loc);
   6696             warn(loc, "variable with qualifier 'const' not initialized; zero initializing", identifier.c_str(), "");
   6697         }
   6698     }
   6699 }
   6700 
   6701 //
   6702 // See if the identifier is a built-in symbol that can be redeclared, and if so,
   6703 // copy the symbol table's read-only built-in variable to the current
   6704 // global level, where it can be modified based on the passed in type.
   6705 //
   6706 // Returns nullptr if no redeclaration took place; meaning a normal declaration still
   6707 // needs to occur for it, not necessarily an error.
   6708 //
   6709 // Returns a redeclared and type-modified variable if a redeclared occurred.
   6710 //
   6711 TSymbol* HlslParseContext::redeclareBuiltinVariable(const TSourceLoc& /*loc*/, const TString& identifier,
   6712                                                     const TQualifier& /*qualifier*/,
   6713                                                     const TShaderQualifiers& /*publicType*/)
   6714 {
   6715     if (! builtInName(identifier) || symbolTable.atBuiltInLevel() || ! symbolTable.atGlobalLevel())
   6716         return nullptr;
   6717 
   6718     return nullptr;
   6719 }
   6720 
   6721 //
   6722 // Generate index to the array element in a structure buffer (SSBO)
   6723 //
   6724 TIntermTyped* HlslParseContext::indexStructBufferContent(const TSourceLoc& loc, TIntermTyped* buffer) const
   6725 {
   6726     // Bail out if not a struct buffer
   6727     if (buffer == nullptr || ! isStructBufferType(buffer->getType()))
   6728         return nullptr;
   6729 
   6730     // Runtime sized array is always the last element.
   6731     const TTypeList* bufferStruct = buffer->getType().getStruct();
   6732     TIntermTyped* arrayPosition = intermediate.addConstantUnion(unsigned(bufferStruct->size()-1), loc);
   6733 
   6734     TIntermTyped* argArray = intermediate.addIndex(EOpIndexDirectStruct, buffer, arrayPosition, loc);
   6735     argArray->setType(*(*bufferStruct)[bufferStruct->size()-1].type);
   6736 
   6737     return argArray;
   6738 }
   6739 
   6740 //
   6741 // IFF type is a structuredbuffer/byteaddressbuffer type, return the content
   6742 // (template) type.   E.g, StructuredBuffer<MyType> -> MyType.  Else return nullptr.
   6743 //
   6744 TType* HlslParseContext::getStructBufferContentType(const TType& type) const
   6745 {
   6746     if (type.getBasicType() != EbtBlock || type.getQualifier().storage != EvqBuffer)
   6747         return nullptr;
   6748 
   6749     const int memberCount = (int)type.getStruct()->size();
   6750     assert(memberCount > 0);
   6751 
   6752     TType* contentType = (*type.getStruct())[memberCount-1].type;
   6753 
   6754     return contentType->isUnsizedArray() ? contentType : nullptr;
   6755 }
   6756 
   6757 //
   6758 // If an existing struct buffer has a sharable type, then share it.
   6759 //
   6760 void HlslParseContext::shareStructBufferType(TType& type)
   6761 {
   6762     // PackOffset must be equivalent to share types on a per-member basis.
   6763     // Note: cannot use auto type due to recursion.  Thus, this is a std::function.
   6764     const std::function<bool(TType& lhs, TType& rhs)>
   6765     compareQualifiers = [&](TType& lhs, TType& rhs) -> bool {
   6766         if (lhs.getQualifier().layoutOffset != rhs.getQualifier().layoutOffset)
   6767             return false;
   6768 
   6769         if (lhs.isStruct() != rhs.isStruct())
   6770             return false;
   6771 
   6772         if (lhs.isStruct() && rhs.isStruct()) {
   6773             if (lhs.getStruct()->size() != rhs.getStruct()->size())
   6774                 return false;
   6775 
   6776             for (int i = 0; i < int(lhs.getStruct()->size()); ++i)
   6777                 if (!compareQualifiers(*(*lhs.getStruct())[i].type, *(*rhs.getStruct())[i].type))
   6778                     return false;
   6779         }
   6780 
   6781         return true;
   6782     };
   6783 
   6784     // We need to compare certain qualifiers in addition to the type.
   6785     const auto typeEqual = [compareQualifiers](TType& lhs, TType& rhs) -> bool {
   6786         if (lhs.getQualifier().readonly != rhs.getQualifier().readonly)
   6787             return false;
   6788 
   6789         // If both are structures, recursively look for packOffset equality
   6790         // as well as type equality.
   6791         return compareQualifiers(lhs, rhs) && lhs == rhs;
   6792     };
   6793 
   6794     // This is an exhaustive O(N) search, but real world shaders have
   6795     // only a small number of these.
   6796     for (int idx = 0; idx < int(structBufferTypes.size()); ++idx) {
   6797         // If the deep structure matches, modulo qualifiers, use it
   6798         if (typeEqual(*structBufferTypes[idx], type)) {
   6799             type.shallowCopy(*structBufferTypes[idx]);
   6800             return;
   6801         }
   6802     }
   6803 
   6804     // Otherwise, remember it:
   6805     TType* typeCopy = new TType;
   6806     typeCopy->shallowCopy(type);
   6807     structBufferTypes.push_back(typeCopy);
   6808 }
   6809 
   6810 void HlslParseContext::paramFix(TType& type)
   6811 {
   6812     switch (type.getQualifier().storage) {
   6813     case EvqConst:
   6814         type.getQualifier().storage = EvqConstReadOnly;
   6815         break;
   6816     case EvqGlobal:
   6817     case EvqUniform:
   6818     case EvqTemporary:
   6819         type.getQualifier().storage = EvqIn;
   6820         break;
   6821     case EvqBuffer:
   6822         {
   6823             // SSBO parameter.  These do not go through the declareBlock path since they are fn parameters.
   6824             correctUniform(type.getQualifier());
   6825             TQualifier bufferQualifier = globalBufferDefaults;
   6826             mergeObjectLayoutQualifiers(bufferQualifier, type.getQualifier(), true);
   6827             bufferQualifier.storage = type.getQualifier().storage;
   6828             bufferQualifier.readonly = type.getQualifier().readonly;
   6829             bufferQualifier.coherent = type.getQualifier().coherent;
   6830             bufferQualifier.declaredBuiltIn = type.getQualifier().declaredBuiltIn;
   6831             type.getQualifier() = bufferQualifier;
   6832             break;
   6833         }
   6834     default:
   6835         break;
   6836     }
   6837 }
   6838 
   6839 void HlslParseContext::specializationCheck(const TSourceLoc& loc, const TType& type, const char* op)
   6840 {
   6841     if (type.containsSpecializationSize())
   6842         error(loc, "can't use with types containing arrays sized with a specialization constant", op, "");
   6843 }
   6844 
   6845 //
   6846 // Layout qualifier stuff.
   6847 //
   6848 
   6849 // Put the id's layout qualification into the public type, for qualifiers not having a number set.
   6850 // This is before we know any type information for error checking.
   6851 void HlslParseContext::setLayoutQualifier(const TSourceLoc& loc, TQualifier& qualifier, TString& id)
   6852 {
   6853     std::transform(id.begin(), id.end(), id.begin(), ::tolower);
   6854 
   6855     if (id == TQualifier::getLayoutMatrixString(ElmColumnMajor)) {
   6856         qualifier.layoutMatrix = ElmRowMajor;
   6857         return;
   6858     }
   6859     if (id == TQualifier::getLayoutMatrixString(ElmRowMajor)) {
   6860         qualifier.layoutMatrix = ElmColumnMajor;
   6861         return;
   6862     }
   6863     if (id == "push_constant") {
   6864         requireVulkan(loc, "push_constant");
   6865         qualifier.layoutPushConstant = true;
   6866         return;
   6867     }
   6868     if (language == EShLangGeometry || language == EShLangTessEvaluation) {
   6869         if (id == TQualifier::getGeometryString(ElgTriangles)) {
   6870             // publicType.shaderQualifiers.geometry = ElgTriangles;
   6871             warn(loc, "ignored", id.c_str(), "");
   6872             return;
   6873         }
   6874         if (language == EShLangGeometry) {
   6875             if (id == TQualifier::getGeometryString(ElgPoints)) {
   6876                 // publicType.shaderQualifiers.geometry = ElgPoints;
   6877                 warn(loc, "ignored", id.c_str(), "");
   6878                 return;
   6879             }
   6880             if (id == TQualifier::getGeometryString(ElgLineStrip)) {
   6881                 // publicType.shaderQualifiers.geometry = ElgLineStrip;
   6882                 warn(loc, "ignored", id.c_str(), "");
   6883                 return;
   6884             }
   6885             if (id == TQualifier::getGeometryString(ElgLines)) {
   6886                 // publicType.shaderQualifiers.geometry = ElgLines;
   6887                 warn(loc, "ignored", id.c_str(), "");
   6888                 return;
   6889             }
   6890             if (id == TQualifier::getGeometryString(ElgLinesAdjacency)) {
   6891                 // publicType.shaderQualifiers.geometry = ElgLinesAdjacency;
   6892                 warn(loc, "ignored", id.c_str(), "");
   6893                 return;
   6894             }
   6895             if (id == TQualifier::getGeometryString(ElgTrianglesAdjacency)) {
   6896                 // publicType.shaderQualifiers.geometry = ElgTrianglesAdjacency;
   6897                 warn(loc, "ignored", id.c_str(), "");
   6898                 return;
   6899             }
   6900             if (id == TQualifier::getGeometryString(ElgTriangleStrip)) {
   6901                 // publicType.shaderQualifiers.geometry = ElgTriangleStrip;
   6902                 warn(loc, "ignored", id.c_str(), "");
   6903                 return;
   6904             }
   6905         } else {
   6906             assert(language == EShLangTessEvaluation);
   6907 
   6908             // input primitive
   6909             if (id == TQualifier::getGeometryString(ElgTriangles)) {
   6910                 // publicType.shaderQualifiers.geometry = ElgTriangles;
   6911                 warn(loc, "ignored", id.c_str(), "");
   6912                 return;
   6913             }
   6914             if (id == TQualifier::getGeometryString(ElgQuads)) {
   6915                 // publicType.shaderQualifiers.geometry = ElgQuads;
   6916                 warn(loc, "ignored", id.c_str(), "");
   6917                 return;
   6918             }
   6919             if (id == TQualifier::getGeometryString(ElgIsolines)) {
   6920                 // publicType.shaderQualifiers.geometry = ElgIsolines;
   6921                 warn(loc, "ignored", id.c_str(), "");
   6922                 return;
   6923             }
   6924 
   6925             // vertex spacing
   6926             if (id == TQualifier::getVertexSpacingString(EvsEqual)) {
   6927                 // publicType.shaderQualifiers.spacing = EvsEqual;
   6928                 warn(loc, "ignored", id.c_str(), "");
   6929                 return;
   6930             }
   6931             if (id == TQualifier::getVertexSpacingString(EvsFractionalEven)) {
   6932                 // publicType.shaderQualifiers.spacing = EvsFractionalEven;
   6933                 warn(loc, "ignored", id.c_str(), "");
   6934                 return;
   6935             }
   6936             if (id == TQualifier::getVertexSpacingString(EvsFractionalOdd)) {
   6937                 // publicType.shaderQualifiers.spacing = EvsFractionalOdd;
   6938                 warn(loc, "ignored", id.c_str(), "");
   6939                 return;
   6940             }
   6941 
   6942             // triangle order
   6943             if (id == TQualifier::getVertexOrderString(EvoCw)) {
   6944                 // publicType.shaderQualifiers.order = EvoCw;
   6945                 warn(loc, "ignored", id.c_str(), "");
   6946                 return;
   6947             }
   6948             if (id == TQualifier::getVertexOrderString(EvoCcw)) {
   6949                 // publicType.shaderQualifiers.order = EvoCcw;
   6950                 warn(loc, "ignored", id.c_str(), "");
   6951                 return;
   6952             }
   6953 
   6954             // point mode
   6955             if (id == "point_mode") {
   6956                 // publicType.shaderQualifiers.pointMode = true;
   6957                 warn(loc, "ignored", id.c_str(), "");
   6958                 return;
   6959             }
   6960         }
   6961     }
   6962     if (language == EShLangFragment) {
   6963         if (id == "origin_upper_left") {
   6964             // publicType.shaderQualifiers.originUpperLeft = true;
   6965             warn(loc, "ignored", id.c_str(), "");
   6966             return;
   6967         }
   6968         if (id == "pixel_center_integer") {
   6969             // publicType.shaderQualifiers.pixelCenterInteger = true;
   6970             warn(loc, "ignored", id.c_str(), "");
   6971             return;
   6972         }
   6973         if (id == "early_fragment_tests") {
   6974             // publicType.shaderQualifiers.earlyFragmentTests = true;
   6975             warn(loc, "ignored", id.c_str(), "");
   6976             return;
   6977         }
   6978         for (TLayoutDepth depth = (TLayoutDepth)(EldNone + 1); depth < EldCount; depth = (TLayoutDepth)(depth + 1)) {
   6979             if (id == TQualifier::getLayoutDepthString(depth)) {
   6980                 // publicType.shaderQualifiers.layoutDepth = depth;
   6981                 warn(loc, "ignored", id.c_str(), "");
   6982                 return;
   6983             }
   6984         }
   6985         if (id.compare(0, 13, "blend_support") == 0) {
   6986             bool found = false;
   6987             for (TBlendEquationShift be = (TBlendEquationShift)0; be < EBlendCount; be = (TBlendEquationShift)(be + 1)) {
   6988                 if (id == TQualifier::getBlendEquationString(be)) {
   6989                     requireExtensions(loc, 1, &E_GL_KHR_blend_equation_advanced, "blend equation");
   6990                     intermediate.addBlendEquation(be);
   6991                     // publicType.shaderQualifiers.blendEquation = true;
   6992                     warn(loc, "ignored", id.c_str(), "");
   6993                     found = true;
   6994                     break;
   6995                 }
   6996             }
   6997             if (! found)
   6998                 error(loc, "unknown blend equation", "blend_support", "");
   6999             return;
   7000         }
   7001     }
   7002     error(loc, "unrecognized layout identifier, or qualifier requires assignment (e.g., binding = 4)", id.c_str(), "");
   7003 }
   7004 
   7005 // Put the id's layout qualifier value into the public type, for qualifiers having a number set.
   7006 // This is before we know any type information for error checking.
   7007 void HlslParseContext::setLayoutQualifier(const TSourceLoc& loc, TQualifier& qualifier, TString& id,
   7008                                           const TIntermTyped* node)
   7009 {
   7010     const char* feature = "layout-id value";
   7011     // const char* nonLiteralFeature = "non-literal layout-id value";
   7012 
   7013     integerCheck(node, feature);
   7014     const TIntermConstantUnion* constUnion = node->getAsConstantUnion();
   7015     int value = 0;
   7016     if (constUnion) {
   7017         value = constUnion->getConstArray()[0].getIConst();
   7018     }
   7019 
   7020     std::transform(id.begin(), id.end(), id.begin(), ::tolower);
   7021 
   7022     if (id == "offset") {
   7023         qualifier.layoutOffset = value;
   7024         return;
   7025     } else if (id == "align") {
   7026         // "The specified alignment must be a power of 2, or a compile-time error results."
   7027         if (! IsPow2(value))
   7028             error(loc, "must be a power of 2", "align", "");
   7029         else
   7030             qualifier.layoutAlign = value;
   7031         return;
   7032     } else if (id == "location") {
   7033         if ((unsigned int)value >= TQualifier::layoutLocationEnd)
   7034             error(loc, "location is too large", id.c_str(), "");
   7035         else
   7036             qualifier.layoutLocation = value;
   7037         return;
   7038     } else if (id == "set") {
   7039         if ((unsigned int)value >= TQualifier::layoutSetEnd)
   7040             error(loc, "set is too large", id.c_str(), "");
   7041         else
   7042             qualifier.layoutSet = value;
   7043         return;
   7044     } else if (id == "binding") {
   7045         if ((unsigned int)value >= TQualifier::layoutBindingEnd)
   7046             error(loc, "binding is too large", id.c_str(), "");
   7047         else
   7048             qualifier.layoutBinding = value;
   7049         return;
   7050     } else if (id == "component") {
   7051         if ((unsigned)value >= TQualifier::layoutComponentEnd)
   7052             error(loc, "component is too large", id.c_str(), "");
   7053         else
   7054             qualifier.layoutComponent = value;
   7055         return;
   7056     } else if (id.compare(0, 4, "xfb_") == 0) {
   7057         // "Any shader making any static use (after preprocessing) of any of these
   7058         // *xfb_* qualifiers will cause the shader to be in a transform feedback
   7059         // capturing mode and hence responsible for describing the transform feedback
   7060         // setup."
   7061         intermediate.setXfbMode();
   7062         if (id == "xfb_buffer") {
   7063             // "It is a compile-time error to specify an *xfb_buffer* that is greater than
   7064             // the implementation-dependent constant gl_MaxTransformFeedbackBuffers."
   7065             if (value >= resources.maxTransformFeedbackBuffers)
   7066                 error(loc, "buffer is too large:", id.c_str(), "gl_MaxTransformFeedbackBuffers is %d",
   7067                       resources.maxTransformFeedbackBuffers);
   7068             if (value >= (int)TQualifier::layoutXfbBufferEnd)
   7069                 error(loc, "buffer is too large:", id.c_str(), "internal max is %d", TQualifier::layoutXfbBufferEnd - 1);
   7070             else
   7071                 qualifier.layoutXfbBuffer = value;
   7072             return;
   7073         } else if (id == "xfb_offset") {
   7074             if (value >= (int)TQualifier::layoutXfbOffsetEnd)
   7075                 error(loc, "offset is too large:", id.c_str(), "internal max is %d", TQualifier::layoutXfbOffsetEnd - 1);
   7076             else
   7077                 qualifier.layoutXfbOffset = value;
   7078             return;
   7079         } else if (id == "xfb_stride") {
   7080             // "The resulting stride (implicit or explicit), when divided by 4, must be less than or equal to the
   7081             // implementation-dependent constant gl_MaxTransformFeedbackInterleavedComponents."
   7082             if (value > 4 * resources.maxTransformFeedbackInterleavedComponents)
   7083                 error(loc, "1/4 stride is too large:", id.c_str(), "gl_MaxTransformFeedbackInterleavedComponents is %d",
   7084                       resources.maxTransformFeedbackInterleavedComponents);
   7085             else if (value >= (int)TQualifier::layoutXfbStrideEnd)
   7086                 error(loc, "stride is too large:", id.c_str(), "internal max is %d", TQualifier::layoutXfbStrideEnd - 1);
   7087             if (value < (int)TQualifier::layoutXfbStrideEnd)
   7088                 qualifier.layoutXfbStride = value;
   7089             return;
   7090         }
   7091     }
   7092 
   7093     if (id == "input_attachment_index") {
   7094         requireVulkan(loc, "input_attachment_index");
   7095         if (value >= (int)TQualifier::layoutAttachmentEnd)
   7096             error(loc, "attachment index is too large", id.c_str(), "");
   7097         else
   7098             qualifier.layoutAttachment = value;
   7099         return;
   7100     }
   7101     if (id == "constant_id") {
   7102         setSpecConstantId(loc, qualifier, value);
   7103         return;
   7104     }
   7105 
   7106     switch (language) {
   7107     case EShLangVertex:
   7108         break;
   7109 
   7110     case EShLangTessControl:
   7111         if (id == "vertices") {
   7112             if (value == 0)
   7113                 error(loc, "must be greater than 0", "vertices", "");
   7114             else
   7115                 // publicType.shaderQualifiers.vertices = value;
   7116                 warn(loc, "ignored", id.c_str(), "");
   7117             return;
   7118         }
   7119         break;
   7120 
   7121     case EShLangTessEvaluation:
   7122         break;
   7123 
   7124     case EShLangGeometry:
   7125         if (id == "invocations") {
   7126             if (value == 0)
   7127                 error(loc, "must be at least 1", "invocations", "");
   7128             else
   7129                 // publicType.shaderQualifiers.invocations = value;
   7130                 warn(loc, "ignored", id.c_str(), "");
   7131             return;
   7132         }
   7133         if (id == "max_vertices") {
   7134             // publicType.shaderQualifiers.vertices = value;
   7135             warn(loc, "ignored", id.c_str(), "");
   7136             if (value > resources.maxGeometryOutputVertices)
   7137                 error(loc, "too large, must be less than gl_MaxGeometryOutputVertices", "max_vertices", "");
   7138             return;
   7139         }
   7140         if (id == "stream") {
   7141             qualifier.layoutStream = value;
   7142             return;
   7143         }
   7144         break;
   7145 
   7146     case EShLangFragment:
   7147         if (id == "index") {
   7148             qualifier.layoutIndex = value;
   7149             return;
   7150         }
   7151         break;
   7152 
   7153     case EShLangCompute:
   7154         if (id.compare(0, 11, "local_size_") == 0) {
   7155             if (id == "local_size_x") {
   7156                 // publicType.shaderQualifiers.localSize[0] = value;
   7157                 warn(loc, "ignored", id.c_str(), "");
   7158                 return;
   7159             }
   7160             if (id == "local_size_y") {
   7161                 // publicType.shaderQualifiers.localSize[1] = value;
   7162                 warn(loc, "ignored", id.c_str(), "");
   7163                 return;
   7164             }
   7165             if (id == "local_size_z") {
   7166                 // publicType.shaderQualifiers.localSize[2] = value;
   7167                 warn(loc, "ignored", id.c_str(), "");
   7168                 return;
   7169             }
   7170             if (spvVersion.spv != 0) {
   7171                 if (id == "local_size_x_id") {
   7172                     // publicType.shaderQualifiers.localSizeSpecId[0] = value;
   7173                     warn(loc, "ignored", id.c_str(), "");
   7174                     return;
   7175                 }
   7176                 if (id == "local_size_y_id") {
   7177                     // publicType.shaderQualifiers.localSizeSpecId[1] = value;
   7178                     warn(loc, "ignored", id.c_str(), "");
   7179                     return;
   7180                 }
   7181                 if (id == "local_size_z_id") {
   7182                     // publicType.shaderQualifiers.localSizeSpecId[2] = value;
   7183                     warn(loc, "ignored", id.c_str(), "");
   7184                     return;
   7185                 }
   7186             }
   7187         }
   7188         break;
   7189 
   7190     default:
   7191         break;
   7192     }
   7193 
   7194     error(loc, "there is no such layout identifier for this stage taking an assigned value", id.c_str(), "");
   7195 }
   7196 
   7197 void HlslParseContext::setSpecConstantId(const TSourceLoc& loc, TQualifier& qualifier, int value)
   7198 {
   7199     if (value >= (int)TQualifier::layoutSpecConstantIdEnd) {
   7200         error(loc, "specialization-constant id is too large", "constant_id", "");
   7201     } else {
   7202         qualifier.layoutSpecConstantId = value;
   7203         qualifier.specConstant = true;
   7204         if (! intermediate.addUsedConstantId(value))
   7205             error(loc, "specialization-constant id already used", "constant_id", "");
   7206     }
   7207     return;
   7208 }
   7209 
   7210 // Merge any layout qualifier information from src into dst, leaving everything else in dst alone
   7211 //
   7212 // "More than one layout qualifier may appear in a single declaration.
   7213 // Additionally, the same layout-qualifier-name can occur multiple times
   7214 // within a layout qualifier or across multiple layout qualifiers in the
   7215 // same declaration. When the same layout-qualifier-name occurs
   7216 // multiple times, in a single declaration, the last occurrence overrides
   7217 // the former occurrence(s).  Further, if such a layout-qualifier-name
   7218 // will effect subsequent declarations or other observable behavior, it
   7219 // is only the last occurrence that will have any effect, behaving as if
   7220 // the earlier occurrence(s) within the declaration are not present.
   7221 // This is also true for overriding layout-qualifier-names, where one
   7222 // overrides the other (e.g., row_major vs. column_major); only the last
   7223 // occurrence has any effect."
   7224 //
   7225 void HlslParseContext::mergeObjectLayoutQualifiers(TQualifier& dst, const TQualifier& src, bool inheritOnly)
   7226 {
   7227     if (src.hasMatrix())
   7228         dst.layoutMatrix = src.layoutMatrix;
   7229     if (src.hasPacking())
   7230         dst.layoutPacking = src.layoutPacking;
   7231 
   7232     if (src.hasStream())
   7233         dst.layoutStream = src.layoutStream;
   7234 
   7235     if (src.hasFormat())
   7236         dst.layoutFormat = src.layoutFormat;
   7237 
   7238     if (src.hasXfbBuffer())
   7239         dst.layoutXfbBuffer = src.layoutXfbBuffer;
   7240 
   7241     if (src.hasAlign())
   7242         dst.layoutAlign = src.layoutAlign;
   7243 
   7244     if (! inheritOnly) {
   7245         if (src.hasLocation())
   7246             dst.layoutLocation = src.layoutLocation;
   7247         if (src.hasComponent())
   7248             dst.layoutComponent = src.layoutComponent;
   7249         if (src.hasIndex())
   7250             dst.layoutIndex = src.layoutIndex;
   7251 
   7252         if (src.hasOffset())
   7253             dst.layoutOffset = src.layoutOffset;
   7254 
   7255         if (src.hasSet())
   7256             dst.layoutSet = src.layoutSet;
   7257         if (src.layoutBinding != TQualifier::layoutBindingEnd)
   7258             dst.layoutBinding = src.layoutBinding;
   7259 
   7260         if (src.hasXfbStride())
   7261             dst.layoutXfbStride = src.layoutXfbStride;
   7262         if (src.hasXfbOffset())
   7263             dst.layoutXfbOffset = src.layoutXfbOffset;
   7264         if (src.hasAttachment())
   7265             dst.layoutAttachment = src.layoutAttachment;
   7266         if (src.hasSpecConstantId())
   7267             dst.layoutSpecConstantId = src.layoutSpecConstantId;
   7268 
   7269         if (src.layoutPushConstant)
   7270             dst.layoutPushConstant = true;
   7271     }
   7272 }
   7273 
   7274 
   7275 //
   7276 // Look up a function name in the symbol table, and make sure it is a function.
   7277 //
   7278 // First, look for an exact match.  If there is none, use the generic selector
   7279 // TParseContextBase::selectFunction() to find one, parameterized by the
   7280 // convertible() and better() predicates defined below.
   7281 //
   7282 // Return the function symbol if found, otherwise nullptr.
   7283 //
   7284 const TFunction* HlslParseContext::findFunction(const TSourceLoc& loc, TFunction& call, bool& builtIn, int& thisDepth,
   7285                                                 TIntermTyped*& args)
   7286 {
   7287     if (symbolTable.isFunctionNameVariable(call.getName())) {
   7288         error(loc, "can't use function syntax on variable", call.getName().c_str(), "");
   7289         return nullptr;
   7290     }
   7291 
   7292     // first, look for an exact match
   7293     bool dummyScope;
   7294     TSymbol* symbol = symbolTable.find(call.getMangledName(), &builtIn, &dummyScope, &thisDepth);
   7295     if (symbol)
   7296         return symbol->getAsFunction();
   7297 
   7298     // no exact match, use the generic selector, parameterized by the GLSL rules
   7299 
   7300     // create list of candidates to send
   7301     TVector<const TFunction*> candidateList;
   7302     symbolTable.findFunctionNameList(call.getMangledName(), candidateList, builtIn);
   7303 
   7304     // These built-in ops can accept any type, so we bypass the argument selection
   7305     if (candidateList.size() == 1 && builtIn &&
   7306         (candidateList[0]->getBuiltInOp() == EOpMethodAppend ||
   7307          candidateList[0]->getBuiltInOp() == EOpMethodRestartStrip ||
   7308          candidateList[0]->getBuiltInOp() == EOpMethodIncrementCounter ||
   7309          candidateList[0]->getBuiltInOp() == EOpMethodDecrementCounter ||
   7310          candidateList[0]->getBuiltInOp() == EOpMethodAppend ||
   7311          candidateList[0]->getBuiltInOp() == EOpMethodConsume)) {
   7312         return candidateList[0];
   7313     }
   7314 
   7315     bool allowOnlyUpConversions = true;
   7316 
   7317     // can 'from' convert to 'to'?
   7318     const auto convertible = [&](const TType& from, const TType& to, TOperator op, int arg) -> bool {
   7319         if (from == to)
   7320             return true;
   7321 
   7322         // no aggregate conversions
   7323         if (from.isArray()  || to.isArray() ||
   7324             from.isStruct() || to.isStruct())
   7325             return false;
   7326 
   7327         switch (op) {
   7328         case EOpInterlockedAdd:
   7329         case EOpInterlockedAnd:
   7330         case EOpInterlockedCompareExchange:
   7331         case EOpInterlockedCompareStore:
   7332         case EOpInterlockedExchange:
   7333         case EOpInterlockedMax:
   7334         case EOpInterlockedMin:
   7335         case EOpInterlockedOr:
   7336         case EOpInterlockedXor:
   7337             // We do not promote the texture or image type for these ocodes.  Normally that would not
   7338             // be an issue because it's a buffer, but we haven't decomposed the opcode yet, and at this
   7339             // stage it's merely e.g, a basic integer type.
   7340             //
   7341             // Instead, we want to promote other arguments, but stay within the same family.  In other
   7342             // words, InterlockedAdd(RWBuffer<int>, ...) will always use the int flavor, never the uint flavor,
   7343             // but it is allowed to promote its other arguments.
   7344             if (arg == 0)
   7345                 return false;
   7346             break;
   7347         case EOpMethodSample:
   7348         case EOpMethodSampleBias:
   7349         case EOpMethodSampleCmp:
   7350         case EOpMethodSampleCmpLevelZero:
   7351         case EOpMethodSampleGrad:
   7352         case EOpMethodSampleLevel:
   7353         case EOpMethodLoad:
   7354         case EOpMethodGetDimensions:
   7355         case EOpMethodGetSamplePosition:
   7356         case EOpMethodGather:
   7357         case EOpMethodCalculateLevelOfDetail:
   7358         case EOpMethodCalculateLevelOfDetailUnclamped:
   7359         case EOpMethodGatherRed:
   7360         case EOpMethodGatherGreen:
   7361         case EOpMethodGatherBlue:
   7362         case EOpMethodGatherAlpha:
   7363         case EOpMethodGatherCmp:
   7364         case EOpMethodGatherCmpRed:
   7365         case EOpMethodGatherCmpGreen:
   7366         case EOpMethodGatherCmpBlue:
   7367         case EOpMethodGatherCmpAlpha:
   7368         case EOpMethodAppend:
   7369         case EOpMethodRestartStrip:
   7370             // those are method calls, the object type can not be changed
   7371             // they are equal if the dim and type match (is dim sufficient?)
   7372             if (arg == 0)
   7373                 return from.getSampler().type == to.getSampler().type &&
   7374                        from.getSampler().arrayed == to.getSampler().arrayed &&
   7375                        from.getSampler().shadow == to.getSampler().shadow &&
   7376                        from.getSampler().ms == to.getSampler().ms &&
   7377                        from.getSampler().dim == to.getSampler().dim;
   7378             break;
   7379         default:
   7380             break;
   7381         }
   7382 
   7383         // basic types have to be convertible
   7384         if (allowOnlyUpConversions)
   7385             if (! intermediate.canImplicitlyPromote(from.getBasicType(), to.getBasicType(), EOpFunctionCall))
   7386                 return false;
   7387 
   7388         // shapes have to be convertible
   7389         if ((from.isScalarOrVec1() && to.isScalarOrVec1()) ||
   7390             (from.isScalarOrVec1() && to.isVector())    ||
   7391             (from.isScalarOrVec1() && to.isMatrix())    ||
   7392             (from.isVector() && to.isVector() && from.getVectorSize() >= to.getVectorSize()))
   7393             return true;
   7394 
   7395         // TODO: what are the matrix rules? they go here
   7396 
   7397         return false;
   7398     };
   7399 
   7400     // Is 'to2' a better conversion than 'to1'?
   7401     // Ties should not be considered as better.
   7402     // Assumes 'convertible' already said true.
   7403     const auto better = [](const TType& from, const TType& to1, const TType& to2) -> bool {
   7404         // exact match is always better than mismatch
   7405         if (from == to2)
   7406             return from != to1;
   7407         if (from == to1)
   7408             return false;
   7409 
   7410         // shape changes are always worse
   7411         if (from.isScalar() || from.isVector()) {
   7412             if (from.getVectorSize() == to2.getVectorSize() &&
   7413                 from.getVectorSize() != to1.getVectorSize())
   7414                 return true;
   7415             if (from.getVectorSize() == to1.getVectorSize() &&
   7416                 from.getVectorSize() != to2.getVectorSize())
   7417                 return false;
   7418         }
   7419 
   7420         // Handle sampler betterness: An exact sampler match beats a non-exact match.
   7421         // (If we just looked at basic type, all EbtSamplers would look the same).
   7422         // If any type is not a sampler, just use the linearize function below.
   7423         if (from.getBasicType() == EbtSampler && to1.getBasicType() == EbtSampler && to2.getBasicType() == EbtSampler) {
   7424             // We can ignore the vector size in the comparison.
   7425             TSampler to1Sampler = to1.getSampler();
   7426             TSampler to2Sampler = to2.getSampler();
   7427 
   7428             to1Sampler.vectorSize = to2Sampler.vectorSize = from.getSampler().vectorSize;
   7429 
   7430             if (from.getSampler() == to2Sampler)
   7431                 return from.getSampler() != to1Sampler;
   7432             if (from.getSampler() == to1Sampler)
   7433                 return false;
   7434         }
   7435 
   7436         // Might or might not be changing shape, which means basic type might
   7437         // or might not match, so within that, the question is how big a
   7438         // basic-type conversion is being done.
   7439         //
   7440         // Use a hierarchy of domains, translated to order of magnitude
   7441         // in a linearized view:
   7442         //   - floating-point vs. integer
   7443         //     - 32 vs. 64 bit (or width in general)
   7444         //       - bool vs. non bool
   7445         //         - signed vs. not signed
   7446         const auto linearize = [](const TBasicType& basicType) -> int {
   7447             switch (basicType) {
   7448             case EbtBool:     return 1;
   7449             case EbtInt:      return 10;
   7450             case EbtUint:     return 11;
   7451             case EbtInt64:    return 20;
   7452             case EbtUint64:   return 21;
   7453             case EbtFloat:    return 100;
   7454             case EbtDouble:   return 110;
   7455             default:          return 0;
   7456             }
   7457         };
   7458 
   7459         return abs(linearize(to2.getBasicType()) - linearize(from.getBasicType())) <
   7460                abs(linearize(to1.getBasicType()) - linearize(from.getBasicType()));
   7461     };
   7462 
   7463     // for ambiguity reporting
   7464     bool tie = false;
   7465 
   7466     // send to the generic selector
   7467     const TFunction* bestMatch = selectFunction(candidateList, call, convertible, better, tie);
   7468 
   7469     if (bestMatch == nullptr) {
   7470         // If there is nothing selected by allowing only up-conversions (to a larger linearize() value),
   7471         // we instead try down-conversions, which are valid in HLSL, but not preferred if there are any
   7472         // upconversions possible.
   7473         allowOnlyUpConversions = false;
   7474         bestMatch = selectFunction(candidateList, call, convertible, better, tie);
   7475     }
   7476 
   7477     if (bestMatch == nullptr) {
   7478         error(loc, "no matching overloaded function found", call.getName().c_str(), "");
   7479         return nullptr;
   7480     }
   7481 
   7482     // For built-ins, we can convert across the arguments.  This will happen in several steps:
   7483     // Step 1:  If there's an exact match, use it.
   7484     // Step 2a: Otherwise, get the operator from the best match and promote arguments:
   7485     // Step 2b: reconstruct the TFunction based on the new arg types
   7486     // Step 3:  Re-select after type promotion is applied, to find proper candidate.
   7487     if (builtIn) {
   7488         // Step 1: If there's an exact match, use it.
   7489         if (call.getMangledName() == bestMatch->getMangledName())
   7490             return bestMatch;
   7491 
   7492         // Step 2a: Otherwise, get the operator from the best match and promote arguments as if we
   7493         // are that kind of operator.
   7494         if (args != nullptr) {
   7495             // The arg list can be a unary node, or an aggregate.  We have to handle both.
   7496             // We will use the normal promote() facilities, which require an interm node.
   7497             TIntermOperator* promote = nullptr;
   7498 
   7499             if (call.getParamCount() == 1) {
   7500                 promote = new TIntermUnary(bestMatch->getBuiltInOp());
   7501                 promote->getAsUnaryNode()->setOperand(args->getAsTyped());
   7502             } else {
   7503                 promote = new TIntermAggregate(bestMatch->getBuiltInOp());
   7504                 promote->getAsAggregate()->getSequence().swap(args->getAsAggregate()->getSequence());
   7505             }
   7506 
   7507             if (! intermediate.promote(promote))
   7508                 return nullptr;
   7509 
   7510             // Obtain the promoted arg list.
   7511             if (call.getParamCount() == 1) {
   7512                 args = promote->getAsUnaryNode()->getOperand();
   7513             } else {
   7514                 promote->getAsAggregate()->getSequence().swap(args->getAsAggregate()->getSequence());
   7515             }
   7516         }
   7517 
   7518         // Step 2b: reconstruct the TFunction based on the new arg types
   7519         TFunction convertedCall(&call.getName(), call.getType(), call.getBuiltInOp());
   7520 
   7521         if (args->getAsAggregate()) {
   7522             // Handle aggregates: put all args into the new function call
   7523             for (int arg=0; arg<int(args->getAsAggregate()->getSequence().size()); ++arg) {
   7524                 // TODO: But for constness, we could avoid the new & shallowCopy, and use the pointer directly.
   7525                 TParameter param = { 0, new TType, nullptr };
   7526                 param.type->shallowCopy(args->getAsAggregate()->getSequence()[arg]->getAsTyped()->getType());
   7527                 convertedCall.addParameter(param);
   7528             }
   7529         } else if (args->getAsUnaryNode()) {
   7530             // Handle unaries: put all args into the new function call
   7531             TParameter param = { 0, new TType, nullptr };
   7532             param.type->shallowCopy(args->getAsUnaryNode()->getOperand()->getAsTyped()->getType());
   7533             convertedCall.addParameter(param);
   7534         } else if (args->getAsTyped()) {
   7535             // Handle bare e.g, floats, not in an aggregate.
   7536             TParameter param = { 0, new TType, nullptr };
   7537             param.type->shallowCopy(args->getAsTyped()->getType());
   7538             convertedCall.addParameter(param);
   7539         } else {
   7540             assert(0); // unknown argument list.
   7541             return nullptr;
   7542         }
   7543 
   7544         // Step 3: Re-select after type promotion, to find proper candidate
   7545         // send to the generic selector
   7546         bestMatch = selectFunction(candidateList, convertedCall, convertible, better, tie);
   7547 
   7548         // At this point, there should be no tie.
   7549     }
   7550 
   7551     if (tie)
   7552         error(loc, "ambiguous best function under implicit type conversion", call.getName().c_str(), "");
   7553 
   7554     // Append default parameter values if needed
   7555     if (!tie && bestMatch != nullptr) {
   7556         for (int defParam = call.getParamCount(); defParam < bestMatch->getParamCount(); ++defParam) {
   7557             handleFunctionArgument(&call, args, (*bestMatch)[defParam].defaultValue);
   7558         }
   7559     }
   7560 
   7561     return bestMatch;
   7562 }
   7563 
   7564 //
   7565 // Do everything necessary to handle a typedef declaration, for a single symbol.
   7566 //
   7567 // 'parseType' is the type part of the declaration (to the left)
   7568 // 'arraySizes' is the arrayness tagged on the identifier (to the right)
   7569 //
   7570 void HlslParseContext::declareTypedef(const TSourceLoc& loc, const TString& identifier, const TType& parseType)
   7571 {
   7572     TVariable* typeSymbol = new TVariable(&identifier, parseType, true);
   7573     if (! symbolTable.insert(*typeSymbol))
   7574         error(loc, "name already defined", "typedef", identifier.c_str());
   7575 }
   7576 
   7577 // Do everything necessary to handle a struct declaration, including
   7578 // making IO aliases because HLSL allows mixed IO in a struct that specializes
   7579 // based on the usage (input, output, uniform, none).
   7580 void HlslParseContext::declareStruct(const TSourceLoc& loc, TString& structName, TType& type)
   7581 {
   7582     // If it was named, which means the type can be reused later, add
   7583     // it to the symbol table.  (Unless it's a block, in which
   7584     // case the name is not a type.)
   7585     if (type.getBasicType() == EbtBlock || structName.size() == 0)
   7586         return;
   7587 
   7588     TVariable* userTypeDef = new TVariable(&structName, type, true);
   7589     if (! symbolTable.insert(*userTypeDef)) {
   7590         error(loc, "redefinition", structName.c_str(), "struct");
   7591         return;
   7592     }
   7593 
   7594     // See if we need IO aliases for the structure typeList
   7595 
   7596     const auto condAlloc = [](bool pred, TTypeList*& list) {
   7597         if (pred && list == nullptr)
   7598             list = new TTypeList;
   7599     };
   7600 
   7601     tIoKinds newLists = { nullptr, nullptr, nullptr }; // allocate for each kind found
   7602     for (auto member = type.getStruct()->begin(); member != type.getStruct()->end(); ++member) {
   7603         condAlloc(hasUniform(member->type->getQualifier()), newLists.uniform);
   7604         condAlloc(  hasInput(member->type->getQualifier()), newLists.input);
   7605         condAlloc( hasOutput(member->type->getQualifier()), newLists.output);
   7606 
   7607         if (member->type->isStruct()) {
   7608             auto it = ioTypeMap.find(member->type->getStruct());
   7609             if (it != ioTypeMap.end()) {
   7610                 condAlloc(it->second.uniform != nullptr, newLists.uniform);
   7611                 condAlloc(it->second.input   != nullptr, newLists.input);
   7612                 condAlloc(it->second.output  != nullptr, newLists.output);
   7613             }
   7614         }
   7615     }
   7616     if (newLists.uniform == nullptr &&
   7617         newLists.input   == nullptr &&
   7618         newLists.output  == nullptr) {
   7619         // Won't do any IO caching, clear up the type and get out now.
   7620         for (auto member = type.getStruct()->begin(); member != type.getStruct()->end(); ++member)
   7621             clearUniformInputOutput(member->type->getQualifier());
   7622         return;
   7623     }
   7624 
   7625     // We have IO involved.
   7626 
   7627     // Make a pure typeList for the symbol table, and cache side copies of IO versions.
   7628     for (auto member = type.getStruct()->begin(); member != type.getStruct()->end(); ++member) {
   7629         const auto inheritStruct = [&](TTypeList* s, TTypeLoc& ioMember) {
   7630             if (s != nullptr) {
   7631                 ioMember.type = new TType;
   7632                 ioMember.type->shallowCopy(*member->type);
   7633                 ioMember.type->setStruct(s);
   7634             }
   7635         };
   7636         const auto newMember = [&](TTypeLoc& m) {
   7637             if (m.type == nullptr) {
   7638                 m.type = new TType;
   7639                 m.type->shallowCopy(*member->type);
   7640             }
   7641         };
   7642 
   7643         TTypeLoc newUniformMember = { nullptr, member->loc };
   7644         TTypeLoc newInputMember   = { nullptr, member->loc };
   7645         TTypeLoc newOutputMember  = { nullptr, member->loc };
   7646         if (member->type->isStruct()) {
   7647             // swap in an IO child if there is one
   7648             auto it = ioTypeMap.find(member->type->getStruct());
   7649             if (it != ioTypeMap.end()) {
   7650                 inheritStruct(it->second.uniform, newUniformMember);
   7651                 inheritStruct(it->second.input,   newInputMember);
   7652                 inheritStruct(it->second.output,  newOutputMember);
   7653             }
   7654         }
   7655         if (newLists.uniform) {
   7656             newMember(newUniformMember);
   7657 
   7658             // inherit default matrix layout (changeable via #pragma pack_matrix), if none given.
   7659             if (member->type->isMatrix() && member->type->getQualifier().layoutMatrix == ElmNone)
   7660                 newUniformMember.type->getQualifier().layoutMatrix = globalUniformDefaults.layoutMatrix;
   7661 
   7662             correctUniform(newUniformMember.type->getQualifier());
   7663             newLists.uniform->push_back(newUniformMember);
   7664         }
   7665         if (newLists.input) {
   7666             newMember(newInputMember);
   7667             correctInput(newInputMember.type->getQualifier());
   7668             newLists.input->push_back(newInputMember);
   7669         }
   7670         if (newLists.output) {
   7671             newMember(newOutputMember);
   7672             correctOutput(newOutputMember.type->getQualifier());
   7673             newLists.output->push_back(newOutputMember);
   7674         }
   7675 
   7676         // make original pure
   7677         clearUniformInputOutput(member->type->getQualifier());
   7678     }
   7679     ioTypeMap[type.getStruct()] = newLists;
   7680 }
   7681 
   7682 // Lookup a user-type by name.
   7683 // If found, fill in the type and return the defining symbol.
   7684 // If not found, return nullptr.
   7685 TSymbol* HlslParseContext::lookupUserType(const TString& typeName, TType& type)
   7686 {
   7687     TSymbol* symbol = symbolTable.find(typeName);
   7688     if (symbol && symbol->getAsVariable() && symbol->getAsVariable()->isUserType()) {
   7689         type.shallowCopy(symbol->getType());
   7690         return symbol;
   7691     } else
   7692         return nullptr;
   7693 }
   7694 
   7695 //
   7696 // Do everything necessary to handle a variable (non-block) declaration.
   7697 // Either redeclaring a variable, or making a new one, updating the symbol
   7698 // table, and all error checking.
   7699 //
   7700 // Returns a subtree node that computes an initializer, if needed.
   7701 // Returns nullptr if there is no code to execute for initialization.
   7702 //
   7703 // 'parseType' is the type part of the declaration (to the left)
   7704 // 'arraySizes' is the arrayness tagged on the identifier (to the right)
   7705 //
   7706 TIntermNode* HlslParseContext::declareVariable(const TSourceLoc& loc, const TString& identifier, TType& type,
   7707                                                TIntermTyped* initializer)
   7708 {
   7709     if (voidErrorCheck(loc, identifier, type.getBasicType()))
   7710         return nullptr;
   7711 
   7712     // Global consts with initializers that are non-const act like EvqGlobal in HLSL.
   7713     // This test is implicitly recursive, because initializers propagate constness
   7714     // up the aggregate node tree during creation.  E.g, for:
   7715     //    { { 1, 2 }, { 3, 4 } }
   7716     // the initializer list is marked EvqConst at the top node, and remains so here.  However:
   7717     //    { 1, { myvar, 2 }, 3 }
   7718     // is not a const intializer, and still becomes EvqGlobal here.
   7719 
   7720     const bool nonConstInitializer = (initializer != nullptr && initializer->getQualifier().storage != EvqConst);
   7721 
   7722     if (type.getQualifier().storage == EvqConst && symbolTable.atGlobalLevel() && nonConstInitializer) {
   7723         // Force to global
   7724         type.getQualifier().storage = EvqGlobal;
   7725     }
   7726 
   7727     // make const and initialization consistent
   7728     fixConstInit(loc, identifier, type, initializer);
   7729 
   7730     // Check for redeclaration of built-ins and/or attempting to declare a reserved name
   7731     TSymbol* symbol = nullptr;
   7732 
   7733     inheritGlobalDefaults(type.getQualifier());
   7734 
   7735     const bool flattenVar = shouldFlatten(type, type.getQualifier().storage, true);
   7736 
   7737     // correct IO in the type
   7738     switch (type.getQualifier().storage) {
   7739     case EvqGlobal:
   7740     case EvqTemporary:
   7741         clearUniformInputOutput(type.getQualifier());
   7742         break;
   7743     case EvqUniform:
   7744     case EvqBuffer:
   7745         correctUniform(type.getQualifier());
   7746         if (type.isStruct()) {
   7747             auto it = ioTypeMap.find(type.getStruct());
   7748             if (it != ioTypeMap.end())
   7749                 type.setStruct(it->second.uniform);
   7750         }
   7751 
   7752         break;
   7753     default:
   7754         break;
   7755     }
   7756 
   7757     // Declare the variable
   7758     if (type.isArray()) {
   7759         // array case
   7760         declareArray(loc, identifier, type, symbol, !flattenVar);
   7761     } else {
   7762         // non-array case
   7763         if (symbol == nullptr)
   7764             symbol = declareNonArray(loc, identifier, type, !flattenVar);
   7765         else if (type != symbol->getType())
   7766             error(loc, "cannot change the type of", "redeclaration", symbol->getName().c_str());
   7767     }
   7768 
   7769     if (symbol == nullptr)
   7770         return nullptr;
   7771 
   7772     if (flattenVar)
   7773         flatten(*symbol->getAsVariable(), symbolTable.atGlobalLevel());
   7774 
   7775     if (initializer == nullptr)
   7776         return nullptr;
   7777 
   7778     // Deal with initializer
   7779     TVariable* variable = symbol->getAsVariable();
   7780     if (variable == nullptr) {
   7781         error(loc, "initializer requires a variable, not a member", identifier.c_str(), "");
   7782         return nullptr;
   7783     }
   7784     return executeInitializer(loc, initializer, variable);
   7785 }
   7786 
   7787 // Pick up global defaults from the provide global defaults into dst.
   7788 void HlslParseContext::inheritGlobalDefaults(TQualifier& dst) const
   7789 {
   7790     if (dst.storage == EvqVaryingOut) {
   7791         if (! dst.hasStream() && language == EShLangGeometry)
   7792             dst.layoutStream = globalOutputDefaults.layoutStream;
   7793         if (! dst.hasXfbBuffer())
   7794             dst.layoutXfbBuffer = globalOutputDefaults.layoutXfbBuffer;
   7795     }
   7796 }
   7797 
   7798 //
   7799 // Make an internal-only variable whose name is for debug purposes only
   7800 // and won't be searched for.  Callers will only use the return value to use
   7801 // the variable, not the name to look it up.  It is okay if the name
   7802 // is the same as other names; there won't be any conflict.
   7803 //
   7804 TVariable* HlslParseContext::makeInternalVariable(const char* name, const TType& type) const
   7805 {
   7806     TString* nameString = NewPoolTString(name);
   7807     TVariable* variable = new TVariable(nameString, type);
   7808     symbolTable.makeInternalVariable(*variable);
   7809 
   7810     return variable;
   7811 }
   7812 
   7813 // Make a symbol node holding a new internal temporary variable.
   7814 TIntermSymbol* HlslParseContext::makeInternalVariableNode(const TSourceLoc& loc, const char* name,
   7815                                                           const TType& type) const
   7816 {
   7817     TVariable* tmpVar = makeInternalVariable(name, type);
   7818     tmpVar->getWritableType().getQualifier().makeTemporary();
   7819 
   7820     return intermediate.addSymbol(*tmpVar, loc);
   7821 }
   7822 
   7823 //
   7824 // Declare a non-array variable, the main point being there is no redeclaration
   7825 // for resizing allowed.
   7826 //
   7827 // Return the successfully declared variable.
   7828 //
   7829 TVariable* HlslParseContext::declareNonArray(const TSourceLoc& loc, const TString& identifier, const TType& type,
   7830                                              bool track)
   7831 {
   7832     // make a new variable
   7833     TVariable* variable = new TVariable(&identifier, type);
   7834 
   7835     // add variable to symbol table
   7836     if (symbolTable.insert(*variable)) {
   7837         if (track && symbolTable.atGlobalLevel())
   7838             trackLinkage(*variable);
   7839         return variable;
   7840     }
   7841 
   7842     error(loc, "redefinition", variable->getName().c_str(), "");
   7843     return nullptr;
   7844 }
   7845 
   7846 //
   7847 // Handle all types of initializers from the grammar.
   7848 //
   7849 // Returning nullptr just means there is no code to execute to handle the
   7850 // initializer, which will, for example, be the case for constant initializers.
   7851 //
   7852 // Returns a subtree that accomplished the initialization.
   7853 //
   7854 TIntermNode* HlslParseContext::executeInitializer(const TSourceLoc& loc, TIntermTyped* initializer, TVariable* variable)
   7855 {
   7856     //
   7857     // Identifier must be of type constant, a global, or a temporary, and
   7858     // starting at version 120, desktop allows uniforms to have initializers.
   7859     //
   7860     TStorageQualifier qualifier = variable->getType().getQualifier().storage;
   7861 
   7862     //
   7863     // If the initializer was from braces { ... }, we convert the whole subtree to a
   7864     // constructor-style subtree, allowing the rest of the code to operate
   7865     // identically for both kinds of initializers.
   7866     //
   7867     //
   7868     // Type can't be deduced from the initializer list, so a skeletal type to
   7869     // follow has to be passed in.  Constness and specialization-constness
   7870     // should be deduced bottom up, not dictated by the skeletal type.
   7871     //
   7872     TType skeletalType;
   7873     skeletalType.shallowCopy(variable->getType());
   7874     skeletalType.getQualifier().makeTemporary();
   7875     if (initializer->getAsAggregate() && initializer->getAsAggregate()->getOp() == EOpNull)
   7876         initializer = convertInitializerList(loc, skeletalType, initializer, nullptr);
   7877     if (initializer == nullptr) {
   7878         // error recovery; don't leave const without constant values
   7879         if (qualifier == EvqConst)
   7880             variable->getWritableType().getQualifier().storage = EvqTemporary;
   7881         return nullptr;
   7882     }
   7883 
   7884     // Fix outer arrayness if variable is unsized, getting size from the initializer
   7885     if (initializer->getType().isSizedArray() && variable->getType().isUnsizedArray())
   7886         variable->getWritableType().changeOuterArraySize(initializer->getType().getOuterArraySize());
   7887 
   7888     // Inner arrayness can also get set by an initializer
   7889     if (initializer->getType().isArrayOfArrays() && variable->getType().isArrayOfArrays() &&
   7890         initializer->getType().getArraySizes()->getNumDims() ==
   7891         variable->getType().getArraySizes()->getNumDims()) {
   7892         // adopt unsized sizes from the initializer's sizes
   7893         for (int d = 1; d < variable->getType().getArraySizes()->getNumDims(); ++d) {
   7894             if (variable->getType().getArraySizes()->getDimSize(d) == UnsizedArraySize) {
   7895                 variable->getWritableType().getArraySizes()->setDimSize(d,
   7896                     initializer->getType().getArraySizes()->getDimSize(d));
   7897             }
   7898         }
   7899     }
   7900 
   7901     // Uniform and global consts require a constant initializer
   7902     if (qualifier == EvqUniform && initializer->getType().getQualifier().storage != EvqConst) {
   7903         error(loc, "uniform initializers must be constant", "=", "'%s'", variable->getType().getCompleteString().c_str());
   7904         variable->getWritableType().getQualifier().storage = EvqTemporary;
   7905         return nullptr;
   7906     }
   7907 
   7908     // Const variables require a constant initializer
   7909     if (qualifier == EvqConst) {
   7910         if (initializer->getType().getQualifier().storage != EvqConst) {
   7911             variable->getWritableType().getQualifier().storage = EvqConstReadOnly;
   7912             qualifier = EvqConstReadOnly;
   7913         }
   7914     }
   7915 
   7916     if (qualifier == EvqConst || qualifier == EvqUniform) {
   7917         // Compile-time tagging of the variable with its constant value...
   7918 
   7919         initializer = intermediate.addConversion(EOpAssign, variable->getType(), initializer);
   7920         if (initializer != nullptr && variable->getType() != initializer->getType())
   7921             initializer = intermediate.addUniShapeConversion(EOpAssign, variable->getType(), initializer);
   7922         if (initializer == nullptr || !initializer->getAsConstantUnion() ||
   7923                                       variable->getType() != initializer->getType()) {
   7924             error(loc, "non-matching or non-convertible constant type for const initializer",
   7925                 variable->getType().getStorageQualifierString(), "");
   7926             variable->getWritableType().getQualifier().storage = EvqTemporary;
   7927             return nullptr;
   7928         }
   7929 
   7930         variable->setConstArray(initializer->getAsConstantUnion()->getConstArray());
   7931     } else {
   7932         // normal assigning of a value to a variable...
   7933         specializationCheck(loc, initializer->getType(), "initializer");
   7934         TIntermSymbol* intermSymbol = intermediate.addSymbol(*variable, loc);
   7935         TIntermNode* initNode = handleAssign(loc, EOpAssign, intermSymbol, initializer);
   7936         if (initNode == nullptr)
   7937             assignError(loc, "=", intermSymbol->getCompleteString(), initializer->getCompleteString());
   7938         return initNode;
   7939     }
   7940 
   7941     return nullptr;
   7942 }
   7943 
   7944 //
   7945 // Reprocess any initializer-list { ... } parts of the initializer.
   7946 // Need to hierarchically assign correct types and implicit
   7947 // conversions. Will do this mimicking the same process used for
   7948 // creating a constructor-style initializer, ensuring we get the
   7949 // same form.
   7950 //
   7951 // Returns a node representing an expression for the initializer list expressed
   7952 // as the correct type.
   7953 //
   7954 // Returns nullptr if there is an error.
   7955 //
   7956 TIntermTyped* HlslParseContext::convertInitializerList(const TSourceLoc& loc, const TType& type,
   7957                                                        TIntermTyped* initializer, TIntermTyped* scalarInit)
   7958 {
   7959     // Will operate recursively.  Once a subtree is found that is constructor style,
   7960     // everything below it is already good: Only the "top part" of the initializer
   7961     // can be an initializer list, where "top part" can extend for several (or all) levels.
   7962 
   7963     // see if we have bottomed out in the tree within the initializer-list part
   7964     TIntermAggregate* initList = initializer->getAsAggregate();
   7965     if (initList == nullptr || initList->getOp() != EOpNull) {
   7966         // We don't have a list, but if it's a scalar and the 'type' is a
   7967         // composite, we need to lengthen below to make it useful.
   7968         // Otherwise, this is an already formed object to initialize with.
   7969         if (type.isScalar() || !initializer->getType().isScalar())
   7970             return initializer;
   7971         else
   7972             initList = intermediate.makeAggregate(initializer);
   7973     }
   7974 
   7975     // Of the initializer-list set of nodes, need to process bottom up,
   7976     // so recurse deep, then process on the way up.
   7977 
   7978     // Go down the tree here...
   7979     if (type.isArray()) {
   7980         // The type's array might be unsized, which could be okay, so base sizes on the size of the aggregate.
   7981         // Later on, initializer execution code will deal with array size logic.
   7982         TType arrayType;
   7983         arrayType.shallowCopy(type);                     // sharing struct stuff is fine
   7984         arrayType.copyArraySizes(*type.getArraySizes()); // but get a fresh copy of the array information, to edit below
   7985 
   7986         // edit array sizes to fill in unsized dimensions
   7987         if (type.isUnsizedArray())
   7988             arrayType.changeOuterArraySize((int)initList->getSequence().size());
   7989 
   7990         // set unsized array dimensions that can be derived from the initializer's first element
   7991         if (arrayType.isArrayOfArrays() && initList->getSequence().size() > 0) {
   7992             TIntermTyped* firstInit = initList->getSequence()[0]->getAsTyped();
   7993             if (firstInit->getType().isArray() &&
   7994                 arrayType.getArraySizes()->getNumDims() == firstInit->getType().getArraySizes()->getNumDims() + 1) {
   7995                 for (int d = 1; d < arrayType.getArraySizes()->getNumDims(); ++d) {
   7996                     if (arrayType.getArraySizes()->getDimSize(d) == UnsizedArraySize)
   7997                         arrayType.getArraySizes()->setDimSize(d, firstInit->getType().getArraySizes()->getDimSize(d - 1));
   7998                 }
   7999             }
   8000         }
   8001 
   8002         // lengthen list to be long enough
   8003         lengthenList(loc, initList->getSequence(), arrayType.getOuterArraySize(), scalarInit);
   8004 
   8005         // recursively process each element
   8006         TType elementType(arrayType, 0); // dereferenced type
   8007         for (int i = 0; i < arrayType.getOuterArraySize(); ++i) {
   8008             initList->getSequence()[i] = convertInitializerList(loc, elementType,
   8009                                                                 initList->getSequence()[i]->getAsTyped(), scalarInit);
   8010             if (initList->getSequence()[i] == nullptr)
   8011                 return nullptr;
   8012         }
   8013 
   8014         return addConstructor(loc, initList, arrayType);
   8015     } else if (type.isStruct()) {
   8016         // do we have implicit assignments to opaques?
   8017         for (size_t i = initList->getSequence().size(); i < type.getStruct()->size(); ++i) {
   8018             if ((*type.getStruct())[i].type->containsOpaque()) {
   8019                 error(loc, "cannot implicitly initialize opaque members", "initializer list", "");
   8020                 return nullptr;
   8021             }
   8022         }
   8023 
   8024         // lengthen list to be long enough
   8025         lengthenList(loc, initList->getSequence(), static_cast<int>(type.getStruct()->size()), scalarInit);
   8026 
   8027         if (type.getStruct()->size() != initList->getSequence().size()) {
   8028             error(loc, "wrong number of structure members", "initializer list", "");
   8029             return nullptr;
   8030         }
   8031         for (size_t i = 0; i < type.getStruct()->size(); ++i) {
   8032             initList->getSequence()[i] = convertInitializerList(loc, *(*type.getStruct())[i].type,
   8033                                                                 initList->getSequence()[i]->getAsTyped(), scalarInit);
   8034             if (initList->getSequence()[i] == nullptr)
   8035                 return nullptr;
   8036         }
   8037     } else if (type.isMatrix()) {
   8038         if (type.computeNumComponents() == (int)initList->getSequence().size()) {
   8039             // This means the matrix is initialized component-wise, rather than as
   8040             // a series of rows and columns.  We can just use the list directly as
   8041             // a constructor; no further processing needed.
   8042         } else {
   8043             // lengthen list to be long enough
   8044             lengthenList(loc, initList->getSequence(), type.getMatrixCols(), scalarInit);
   8045 
   8046             if (type.getMatrixCols() != (int)initList->getSequence().size()) {
   8047                 error(loc, "wrong number of matrix columns:", "initializer list", type.getCompleteString().c_str());
   8048                 return nullptr;
   8049             }
   8050             TType vectorType(type, 0); // dereferenced type
   8051             for (int i = 0; i < type.getMatrixCols(); ++i) {
   8052                 initList->getSequence()[i] = convertInitializerList(loc, vectorType,
   8053                                                                     initList->getSequence()[i]->getAsTyped(), scalarInit);
   8054                 if (initList->getSequence()[i] == nullptr)
   8055                     return nullptr;
   8056             }
   8057         }
   8058     } else if (type.isVector()) {
   8059         // lengthen list to be long enough
   8060         lengthenList(loc, initList->getSequence(), type.getVectorSize(), scalarInit);
   8061 
   8062         // error check; we're at bottom, so work is finished below
   8063         if (type.getVectorSize() != (int)initList->getSequence().size()) {
   8064             error(loc, "wrong vector size (or rows in a matrix column):", "initializer list",
   8065                   type.getCompleteString().c_str());
   8066             return nullptr;
   8067         }
   8068     } else if (type.isScalar()) {
   8069         // lengthen list to be long enough
   8070         lengthenList(loc, initList->getSequence(), 1, scalarInit);
   8071 
   8072         if ((int)initList->getSequence().size() != 1) {
   8073             error(loc, "scalar expected one element:", "initializer list", type.getCompleteString().c_str());
   8074             return nullptr;
   8075         }
   8076     } else {
   8077         error(loc, "unexpected initializer-list type:", "initializer list", type.getCompleteString().c_str());
   8078         return nullptr;
   8079     }
   8080 
   8081     // Now that the subtree is processed, process this node as if the
   8082     // initializer list is a set of arguments to a constructor.
   8083     TIntermTyped* emulatedConstructorArguments;
   8084     if (initList->getSequence().size() == 1)
   8085         emulatedConstructorArguments = initList->getSequence()[0]->getAsTyped();
   8086     else
   8087         emulatedConstructorArguments = initList;
   8088 
   8089     return addConstructor(loc, emulatedConstructorArguments, type);
   8090 }
   8091 
   8092 // Lengthen list to be long enough to cover any gap from the current list size
   8093 // to 'size'. If the list is longer, do nothing.
   8094 // The value to lengthen with is the default for short lists.
   8095 //
   8096 // By default, lists that are too short due to lack of initializers initialize to zero.
   8097 // Alternatively, it could be a scalar initializer for a structure. Both cases are handled,
   8098 // based on whether something is passed in as 'scalarInit'.
   8099 //
   8100 // 'scalarInit' must be safe to use each time this is called (no side effects replication).
   8101 //
   8102 void HlslParseContext::lengthenList(const TSourceLoc& loc, TIntermSequence& list, int size, TIntermTyped* scalarInit)
   8103 {
   8104     for (int c = (int)list.size(); c < size; ++c) {
   8105         if (scalarInit == nullptr)
   8106             list.push_back(intermediate.addConstantUnion(0, loc));
   8107         else
   8108             list.push_back(scalarInit);
   8109     }
   8110 }
   8111 
   8112 //
   8113 // Test for the correctness of the parameters passed to various constructor functions
   8114 // and also convert them to the right data type, if allowed and required.
   8115 //
   8116 // Returns nullptr for an error or the constructed node (aggregate or typed) for no error.
   8117 //
   8118 TIntermTyped* HlslParseContext::handleConstructor(const TSourceLoc& loc, TIntermTyped* node, const TType& type)
   8119 {
   8120     if (node == nullptr)
   8121         return nullptr;
   8122 
   8123     // Construct identical type
   8124     if (type == node->getType())
   8125         return node;
   8126 
   8127     // Handle the idiom "(struct type)<scalar value>"
   8128     if (type.isStruct() && isScalarConstructor(node)) {
   8129         // 'node' will almost always get used multiple times, so should not be used directly,
   8130         // it would create a DAG instead of a tree, which might be okay (would
   8131         // like to formalize that for constants and symbols), but if it has
   8132         // side effects, they would get executed multiple times, which is not okay.
   8133         if (node->getAsConstantUnion() == nullptr && node->getAsSymbolNode() == nullptr) {
   8134             TIntermAggregate* seq = intermediate.makeAggregate(loc);
   8135             TIntermSymbol* copy = makeInternalVariableNode(loc, "scalarCopy", node->getType());
   8136             seq = intermediate.growAggregate(seq, intermediate.addBinaryNode(EOpAssign, copy, node, loc));
   8137             seq = intermediate.growAggregate(seq, convertInitializerList(loc, type, intermediate.makeAggregate(loc), copy));
   8138             seq->setOp(EOpComma);
   8139             seq->setType(type);
   8140             return seq;
   8141         } else
   8142             return convertInitializerList(loc, type, intermediate.makeAggregate(loc), node);
   8143     }
   8144 
   8145     return addConstructor(loc, node, type);
   8146 }
   8147 
   8148 // Add a constructor, either from the grammar, or other programmatic reasons.
   8149 //
   8150 // 'node' is what to construct from.
   8151 // 'type' is what type to construct.
   8152 //
   8153 // Returns the constructed object.
   8154 // Return nullptr if it can't be done.
   8155 //
   8156 TIntermTyped* HlslParseContext::addConstructor(const TSourceLoc& loc, TIntermTyped* node, const TType& type)
   8157 {
   8158     TIntermAggregate* aggrNode = node->getAsAggregate();
   8159     TOperator op = intermediate.mapTypeToConstructorOp(type);
   8160 
   8161     if (op == EOpConstructTextureSampler)
   8162         return intermediate.setAggregateOperator(aggrNode, op, type, loc);
   8163 
   8164     TTypeList::const_iterator memberTypes;
   8165     if (op == EOpConstructStruct)
   8166         memberTypes = type.getStruct()->begin();
   8167 
   8168     TType elementType;
   8169     if (type.isArray()) {
   8170         TType dereferenced(type, 0);
   8171         elementType.shallowCopy(dereferenced);
   8172     } else
   8173         elementType.shallowCopy(type);
   8174 
   8175     bool singleArg;
   8176     if (aggrNode != nullptr) {
   8177         if (aggrNode->getOp() != EOpNull)
   8178             singleArg = true;
   8179         else
   8180             singleArg = false;
   8181     } else
   8182         singleArg = true;
   8183 
   8184     TIntermTyped *newNode;
   8185     if (singleArg) {
   8186         // Handle array -> array conversion
   8187         // Constructing an array of one type from an array of another type is allowed,
   8188         // assuming there are enough components available (semantic-checked earlier).
   8189         if (type.isArray() && node->isArray())
   8190             newNode = convertArray(node, type);
   8191 
   8192         // If structure constructor or array constructor is being called
   8193         // for only one parameter inside the aggregate, we need to call constructAggregate function once.
   8194         else if (type.isArray())
   8195             newNode = constructAggregate(node, elementType, 1, node->getLoc());
   8196         else if (op == EOpConstructStruct)
   8197             newNode = constructAggregate(node, *(*memberTypes).type, 1, node->getLoc());
   8198         else {
   8199             // shape conversion for matrix constructor from scalar.  HLSL semantics are: scalar
   8200             // is replicated into every element of the matrix (not just the diagnonal), so
   8201             // that is handled specially here.
   8202             if (type.isMatrix() && node->getType().isScalarOrVec1())
   8203                 node = intermediate.addShapeConversion(type, node);
   8204 
   8205             newNode = constructBuiltIn(type, op, node, node->getLoc(), false);
   8206         }
   8207 
   8208         if (newNode && (type.isArray() || op == EOpConstructStruct))
   8209             newNode = intermediate.setAggregateOperator(newNode, EOpConstructStruct, type, loc);
   8210 
   8211         return newNode;
   8212     }
   8213 
   8214     //
   8215     // Handle list of arguments.
   8216     //
   8217     TIntermSequence& sequenceVector = aggrNode->getSequence();    // Stores the information about the parameter to the constructor
   8218     // if the structure constructor contains more than one parameter, then construct
   8219     // each parameter
   8220 
   8221     int paramCount = 0;  // keeps a track of the constructor parameter number being checked
   8222 
   8223     // for each parameter to the constructor call, check to see if the right type is passed or convert them
   8224     // to the right type if possible (and allowed).
   8225     // for structure constructors, just check if the right type is passed, no conversion is allowed.
   8226 
   8227     for (TIntermSequence::iterator p = sequenceVector.begin();
   8228         p != sequenceVector.end(); p++, paramCount++) {
   8229         if (type.isArray())
   8230             newNode = constructAggregate(*p, elementType, paramCount + 1, node->getLoc());
   8231         else if (op == EOpConstructStruct)
   8232             newNode = constructAggregate(*p, *(memberTypes[paramCount]).type, paramCount + 1, node->getLoc());
   8233         else
   8234             newNode = constructBuiltIn(type, op, (*p)->getAsTyped(), node->getLoc(), true);
   8235 
   8236         if (newNode)
   8237             *p = newNode;
   8238         else
   8239             return nullptr;
   8240     }
   8241 
   8242     TIntermTyped* constructor = intermediate.setAggregateOperator(aggrNode, op, type, loc);
   8243 
   8244     return constructor;
   8245 }
   8246 
   8247 // Function for constructor implementation. Calls addUnaryMath with appropriate EOp value
   8248 // for the parameter to the constructor (passed to this function). Essentially, it converts
   8249 // the parameter types correctly. If a constructor expects an int (like ivec2) and is passed a
   8250 // float, then float is converted to int.
   8251 //
   8252 // Returns nullptr for an error or the constructed node.
   8253 //
   8254 TIntermTyped* HlslParseContext::constructBuiltIn(const TType& type, TOperator op, TIntermTyped* node,
   8255                                                  const TSourceLoc& loc, bool subset)
   8256 {
   8257     TIntermTyped* newNode;
   8258     TOperator basicOp;
   8259 
   8260     //
   8261     // First, convert types as needed.
   8262     //
   8263     switch (op) {
   8264     case EOpConstructF16Vec2:
   8265     case EOpConstructF16Vec3:
   8266     case EOpConstructF16Vec4:
   8267     case EOpConstructF16Mat2x2:
   8268     case EOpConstructF16Mat2x3:
   8269     case EOpConstructF16Mat2x4:
   8270     case EOpConstructF16Mat3x2:
   8271     case EOpConstructF16Mat3x3:
   8272     case EOpConstructF16Mat3x4:
   8273     case EOpConstructF16Mat4x2:
   8274     case EOpConstructF16Mat4x3:
   8275     case EOpConstructF16Mat4x4:
   8276     case EOpConstructFloat16:
   8277         basicOp = EOpConstructFloat16;
   8278         break;
   8279 
   8280     case EOpConstructVec2:
   8281     case EOpConstructVec3:
   8282     case EOpConstructVec4:
   8283     case EOpConstructMat2x2:
   8284     case EOpConstructMat2x3:
   8285     case EOpConstructMat2x4:
   8286     case EOpConstructMat3x2:
   8287     case EOpConstructMat3x3:
   8288     case EOpConstructMat3x4:
   8289     case EOpConstructMat4x2:
   8290     case EOpConstructMat4x3:
   8291     case EOpConstructMat4x4:
   8292     case EOpConstructFloat:
   8293         basicOp = EOpConstructFloat;
   8294         break;
   8295 
   8296     case EOpConstructDVec2:
   8297     case EOpConstructDVec3:
   8298     case EOpConstructDVec4:
   8299     case EOpConstructDMat2x2:
   8300     case EOpConstructDMat2x3:
   8301     case EOpConstructDMat2x4:
   8302     case EOpConstructDMat3x2:
   8303     case EOpConstructDMat3x3:
   8304     case EOpConstructDMat3x4:
   8305     case EOpConstructDMat4x2:
   8306     case EOpConstructDMat4x3:
   8307     case EOpConstructDMat4x4:
   8308     case EOpConstructDouble:
   8309         basicOp = EOpConstructDouble;
   8310         break;
   8311 
   8312     case EOpConstructI16Vec2:
   8313     case EOpConstructI16Vec3:
   8314     case EOpConstructI16Vec4:
   8315     case EOpConstructInt16:
   8316         basicOp = EOpConstructInt16;
   8317         break;
   8318 
   8319     case EOpConstructIVec2:
   8320     case EOpConstructIVec3:
   8321     case EOpConstructIVec4:
   8322     case EOpConstructIMat2x2:
   8323     case EOpConstructIMat2x3:
   8324     case EOpConstructIMat2x4:
   8325     case EOpConstructIMat3x2:
   8326     case EOpConstructIMat3x3:
   8327     case EOpConstructIMat3x4:
   8328     case EOpConstructIMat4x2:
   8329     case EOpConstructIMat4x3:
   8330     case EOpConstructIMat4x4:
   8331     case EOpConstructInt:
   8332         basicOp = EOpConstructInt;
   8333         break;
   8334 
   8335     case EOpConstructU16Vec2:
   8336     case EOpConstructU16Vec3:
   8337     case EOpConstructU16Vec4:
   8338     case EOpConstructUint16:
   8339         basicOp = EOpConstructUint16;
   8340         break;
   8341 
   8342     case EOpConstructUVec2:
   8343     case EOpConstructUVec3:
   8344     case EOpConstructUVec4:
   8345     case EOpConstructUMat2x2:
   8346     case EOpConstructUMat2x3:
   8347     case EOpConstructUMat2x4:
   8348     case EOpConstructUMat3x2:
   8349     case EOpConstructUMat3x3:
   8350     case EOpConstructUMat3x4:
   8351     case EOpConstructUMat4x2:
   8352     case EOpConstructUMat4x3:
   8353     case EOpConstructUMat4x4:
   8354     case EOpConstructUint:
   8355         basicOp = EOpConstructUint;
   8356         break;
   8357 
   8358     case EOpConstructBVec2:
   8359     case EOpConstructBVec3:
   8360     case EOpConstructBVec4:
   8361     case EOpConstructBMat2x2:
   8362     case EOpConstructBMat2x3:
   8363     case EOpConstructBMat2x4:
   8364     case EOpConstructBMat3x2:
   8365     case EOpConstructBMat3x3:
   8366     case EOpConstructBMat3x4:
   8367     case EOpConstructBMat4x2:
   8368     case EOpConstructBMat4x3:
   8369     case EOpConstructBMat4x4:
   8370     case EOpConstructBool:
   8371         basicOp = EOpConstructBool;
   8372         break;
   8373 
   8374     default:
   8375         error(loc, "unsupported construction", "", "");
   8376 
   8377         return nullptr;
   8378     }
   8379     newNode = intermediate.addUnaryMath(basicOp, node, node->getLoc());
   8380     if (newNode == nullptr) {
   8381         error(loc, "can't convert", "constructor", "");
   8382         return nullptr;
   8383     }
   8384 
   8385     //
   8386     // Now, if there still isn't an operation to do the construction, and we need one, add one.
   8387     //
   8388 
   8389     // Otherwise, skip out early.
   8390     if (subset || (newNode != node && newNode->getType() == type))
   8391         return newNode;
   8392 
   8393     // setAggregateOperator will insert a new node for the constructor, as needed.
   8394     return intermediate.setAggregateOperator(newNode, op, type, loc);
   8395 }
   8396 
   8397 // Convert the array in node to the requested type, which is also an array.
   8398 // Returns nullptr on failure, otherwise returns aggregate holding the list of
   8399 // elements needed to construct the array.
   8400 TIntermTyped* HlslParseContext::convertArray(TIntermTyped* node, const TType& type)
   8401 {
   8402     assert(node->isArray() && type.isArray());
   8403     if (node->getType().computeNumComponents() < type.computeNumComponents())
   8404         return nullptr;
   8405 
   8406     // TODO: write an argument replicator, for the case the argument should not be
   8407     // executed multiple times, yet multiple copies are needed.
   8408 
   8409     TIntermTyped* constructee = node->getAsTyped();
   8410     // track where we are in consuming the argument
   8411     int constructeeElement = 0;
   8412     int constructeeComponent = 0;
   8413 
   8414     // bump up to the next component to consume
   8415     const auto getNextComponent = [&]() {
   8416         TIntermTyped* component;
   8417         component = handleBracketDereference(node->getLoc(), constructee,
   8418                                              intermediate.addConstantUnion(constructeeElement, node->getLoc()));
   8419         if (component->isVector())
   8420             component = handleBracketDereference(node->getLoc(), component,
   8421                                                  intermediate.addConstantUnion(constructeeComponent, node->getLoc()));
   8422         // bump component pointer up
   8423         ++constructeeComponent;
   8424         if (constructeeComponent == constructee->getVectorSize()) {
   8425             constructeeComponent = 0;
   8426             ++constructeeElement;
   8427         }
   8428         return component;
   8429     };
   8430 
   8431     // make one subnode per constructed array element
   8432     TIntermAggregate* constructor = nullptr;
   8433     TType derefType(type, 0);
   8434     TType speculativeComponentType(derefType, 0);
   8435     TType* componentType = derefType.isVector() ? &speculativeComponentType : &derefType;
   8436     TOperator componentOp = intermediate.mapTypeToConstructorOp(*componentType);
   8437     TType crossType(node->getBasicType(), EvqTemporary, type.getVectorSize());
   8438     for (int e = 0; e < type.getOuterArraySize(); ++e) {
   8439         // construct an element
   8440         TIntermTyped* elementArg;
   8441         if (type.getVectorSize() == constructee->getVectorSize()) {
   8442             // same element shape
   8443             elementArg = handleBracketDereference(node->getLoc(), constructee,
   8444                                                   intermediate.addConstantUnion(e, node->getLoc()));
   8445         } else {
   8446             // mismatched element shapes
   8447             if (type.getVectorSize() == 1)
   8448                 elementArg = getNextComponent();
   8449             else {
   8450                 // make a vector
   8451                 TIntermAggregate* elementConstructee = nullptr;
   8452                 for (int c = 0; c < type.getVectorSize(); ++c)
   8453                     elementConstructee = intermediate.growAggregate(elementConstructee, getNextComponent());
   8454                 elementArg = addConstructor(node->getLoc(), elementConstructee, crossType);
   8455             }
   8456         }
   8457         // convert basic types
   8458         elementArg = intermediate.addConversion(componentOp, derefType, elementArg);
   8459         if (elementArg == nullptr)
   8460             return nullptr;
   8461         // combine with top-level constructor
   8462         constructor = intermediate.growAggregate(constructor, elementArg);
   8463     }
   8464 
   8465     return constructor;
   8466 }
   8467 
   8468 // This function tests for the type of the parameters to the structure or array constructor. Raises
   8469 // an error message if the expected type does not match the parameter passed to the constructor.
   8470 //
   8471 // Returns nullptr for an error or the input node itself if the expected and the given parameter types match.
   8472 //
   8473 TIntermTyped* HlslParseContext::constructAggregate(TIntermNode* node, const TType& type, int paramCount,
   8474                                                    const TSourceLoc& loc)
   8475 {
   8476     // Handle cases that map more 1:1 between constructor arguments and constructed.
   8477     TIntermTyped* converted = intermediate.addConversion(EOpConstructStruct, type, node->getAsTyped());
   8478     if (converted == nullptr || converted->getType() != type) {
   8479         error(loc, "", "constructor", "cannot convert parameter %d from '%s' to '%s'", paramCount,
   8480             node->getAsTyped()->getType().getCompleteString().c_str(), type.getCompleteString().c_str());
   8481 
   8482         return nullptr;
   8483     }
   8484 
   8485     return converted;
   8486 }
   8487 
   8488 //
   8489 // Do everything needed to add an interface block.
   8490 //
   8491 void HlslParseContext::declareBlock(const TSourceLoc& loc, TType& type, const TString* instanceName)
   8492 {
   8493     assert(type.getWritableStruct() != nullptr);
   8494 
   8495     // Clean up top-level decorations that don't belong.
   8496     switch (type.getQualifier().storage) {
   8497     case EvqUniform:
   8498     case EvqBuffer:
   8499         correctUniform(type.getQualifier());
   8500         break;
   8501     case EvqVaryingIn:
   8502         correctInput(type.getQualifier());
   8503         break;
   8504     case EvqVaryingOut:
   8505         correctOutput(type.getQualifier());
   8506         break;
   8507     default:
   8508         break;
   8509     }
   8510 
   8511     TTypeList& typeList = *type.getWritableStruct();
   8512     // fix and check for member storage qualifiers and types that don't belong within a block
   8513     for (unsigned int member = 0; member < typeList.size(); ++member) {
   8514         TType& memberType = *typeList[member].type;
   8515         TQualifier& memberQualifier = memberType.getQualifier();
   8516         const TSourceLoc& memberLoc = typeList[member].loc;
   8517         globalQualifierFix(memberLoc, memberQualifier);
   8518         memberQualifier.storage = type.getQualifier().storage;
   8519 
   8520         if (memberType.isStruct()) {
   8521             // clean up and pick up the right set of decorations
   8522             auto it = ioTypeMap.find(memberType.getStruct());
   8523             switch (type.getQualifier().storage) {
   8524             case EvqUniform:
   8525             case EvqBuffer:
   8526                 correctUniform(type.getQualifier());
   8527                 if (it != ioTypeMap.end() && it->second.uniform)
   8528                     memberType.setStruct(it->second.uniform);
   8529                 break;
   8530             case EvqVaryingIn:
   8531                 correctInput(type.getQualifier());
   8532                 if (it != ioTypeMap.end() && it->second.input)
   8533                     memberType.setStruct(it->second.input);
   8534                 break;
   8535             case EvqVaryingOut:
   8536                 correctOutput(type.getQualifier());
   8537                 if (it != ioTypeMap.end() && it->second.output)
   8538                     memberType.setStruct(it->second.output);
   8539                 break;
   8540             default:
   8541                 break;
   8542             }
   8543         }
   8544     }
   8545 
   8546     // Make default block qualification, and adjust the member qualifications
   8547 
   8548     TQualifier defaultQualification;
   8549     switch (type.getQualifier().storage) {
   8550     case EvqUniform:    defaultQualification = globalUniformDefaults;    break;
   8551     case EvqBuffer:     defaultQualification = globalBufferDefaults;     break;
   8552     case EvqVaryingIn:  defaultQualification = globalInputDefaults;      break;
   8553     case EvqVaryingOut: defaultQualification = globalOutputDefaults;     break;
   8554     default:            defaultQualification.clear();                    break;
   8555     }
   8556 
   8557     // Special case for "push_constant uniform", which has a default of std430,
   8558     // contrary to normal uniform defaults, and can't have a default tracked for it.
   8559     if (type.getQualifier().layoutPushConstant && ! type.getQualifier().hasPacking())
   8560         type.getQualifier().layoutPacking = ElpStd430;
   8561 
   8562     // fix and check for member layout qualifiers
   8563 
   8564     mergeObjectLayoutQualifiers(defaultQualification, type.getQualifier(), true);
   8565 
   8566     bool memberWithLocation = false;
   8567     bool memberWithoutLocation = false;
   8568     for (unsigned int member = 0; member < typeList.size(); ++member) {
   8569         TQualifier& memberQualifier = typeList[member].type->getQualifier();
   8570         const TSourceLoc& memberLoc = typeList[member].loc;
   8571         if (memberQualifier.hasStream()) {
   8572             if (defaultQualification.layoutStream != memberQualifier.layoutStream)
   8573                 error(memberLoc, "member cannot contradict block", "stream", "");
   8574         }
   8575 
   8576         // "This includes a block's inheritance of the
   8577         // current global default buffer, a block member's inheritance of the block's
   8578         // buffer, and the requirement that any *xfb_buffer* declared on a block
   8579         // member must match the buffer inherited from the block."
   8580         if (memberQualifier.hasXfbBuffer()) {
   8581             if (defaultQualification.layoutXfbBuffer != memberQualifier.layoutXfbBuffer)
   8582                 error(memberLoc, "member cannot contradict block (or what block inherited from global)", "xfb_buffer", "");
   8583         }
   8584 
   8585         if (memberQualifier.hasLocation()) {
   8586             switch (type.getQualifier().storage) {
   8587             case EvqVaryingIn:
   8588             case EvqVaryingOut:
   8589                 memberWithLocation = true;
   8590                 break;
   8591             default:
   8592                 break;
   8593             }
   8594         } else
   8595             memberWithoutLocation = true;
   8596 
   8597         TQualifier newMemberQualification = defaultQualification;
   8598         mergeQualifiers(newMemberQualification, memberQualifier);
   8599         memberQualifier = newMemberQualification;
   8600     }
   8601 
   8602     // Process the members
   8603     fixBlockLocations(loc, type.getQualifier(), typeList, memberWithLocation, memberWithoutLocation);
   8604     fixXfbOffsets(type.getQualifier(), typeList);
   8605     fixBlockUniformOffsets(type.getQualifier(), typeList);
   8606 
   8607     // reverse merge, so that currentBlockQualifier now has all layout information
   8608     // (can't use defaultQualification directly, it's missing other non-layout-default-class qualifiers)
   8609     mergeObjectLayoutQualifiers(type.getQualifier(), defaultQualification, true);
   8610 
   8611     //
   8612     // Build and add the interface block as a new type named 'blockName'
   8613     //
   8614 
   8615     // Use the instance name as the interface name if one exists, else the block name.
   8616     const TString& interfaceName = (instanceName && !instanceName->empty()) ? *instanceName : type.getTypeName();
   8617 
   8618     TType blockType(&typeList, interfaceName, type.getQualifier());
   8619     if (type.isArray())
   8620         blockType.transferArraySizes(type.getArraySizes());
   8621 
   8622     // Add the variable, as anonymous or named instanceName.
   8623     // Make an anonymous variable if no name was provided.
   8624     if (instanceName == nullptr)
   8625         instanceName = NewPoolTString("");
   8626 
   8627     TVariable& variable = *new TVariable(instanceName, blockType);
   8628     if (! symbolTable.insert(variable)) {
   8629         if (*instanceName == "")
   8630             error(loc, "nameless block contains a member that already has a name at global scope",
   8631                   "" /* blockName->c_str() */, "");
   8632         else
   8633             error(loc, "block instance name redefinition", variable.getName().c_str(), "");
   8634 
   8635         return;
   8636     }
   8637 
   8638     // Save it in the AST for linker use.
   8639     if (symbolTable.atGlobalLevel())
   8640         trackLinkage(variable);
   8641 }
   8642 
   8643 //
   8644 // "For a block, this process applies to the entire block, or until the first member
   8645 // is reached that has a location layout qualifier. When a block member is declared with a location
   8646 // qualifier, its location comes from that qualifier: The member's location qualifier overrides the block-level
   8647 // declaration. Subsequent members are again assigned consecutive locations, based on the newest location,
   8648 // until the next member declared with a location qualifier. The values used for locations do not have to be
   8649 // declared in increasing order."
   8650 void HlslParseContext::fixBlockLocations(const TSourceLoc& loc, TQualifier& qualifier, TTypeList& typeList, bool memberWithLocation, bool memberWithoutLocation)
   8651 {
   8652     // "If a block has no block-level location layout qualifier, it is required that either all or none of its members
   8653     // have a location layout qualifier, or a compile-time error results."
   8654     if (! qualifier.hasLocation() && memberWithLocation && memberWithoutLocation)
   8655         error(loc, "either the block needs a location, or all members need a location, or no members have a location", "location", "");
   8656     else {
   8657         if (memberWithLocation) {
   8658             // remove any block-level location and make it per *every* member
   8659             int nextLocation = 0;  // by the rule above, initial value is not relevant
   8660             if (qualifier.hasAnyLocation()) {
   8661                 nextLocation = qualifier.layoutLocation;
   8662                 qualifier.layoutLocation = TQualifier::layoutLocationEnd;
   8663                 if (qualifier.hasComponent()) {
   8664                     // "It is a compile-time error to apply the *component* qualifier to a ... block"
   8665                     error(loc, "cannot apply to a block", "component", "");
   8666                 }
   8667                 if (qualifier.hasIndex()) {
   8668                     error(loc, "cannot apply to a block", "index", "");
   8669                 }
   8670             }
   8671             for (unsigned int member = 0; member < typeList.size(); ++member) {
   8672                 TQualifier& memberQualifier = typeList[member].type->getQualifier();
   8673                 const TSourceLoc& memberLoc = typeList[member].loc;
   8674                 if (! memberQualifier.hasLocation()) {
   8675                     if (nextLocation >= (int)TQualifier::layoutLocationEnd)
   8676                         error(memberLoc, "location is too large", "location", "");
   8677                     memberQualifier.layoutLocation = nextLocation;
   8678                     memberQualifier.layoutComponent = 0;
   8679                 }
   8680                 nextLocation = memberQualifier.layoutLocation +
   8681                                intermediate.computeTypeLocationSize(*typeList[member].type, language);
   8682             }
   8683         }
   8684     }
   8685 }
   8686 
   8687 void HlslParseContext::fixXfbOffsets(TQualifier& qualifier, TTypeList& typeList)
   8688 {
   8689     // "If a block is qualified with xfb_offset, all its
   8690     // members are assigned transform feedback buffer offsets. If a block is not qualified with xfb_offset, any
   8691     // members of that block not qualified with an xfb_offset will not be assigned transform feedback buffer
   8692     // offsets."
   8693 
   8694     if (! qualifier.hasXfbBuffer() || ! qualifier.hasXfbOffset())
   8695         return;
   8696 
   8697     int nextOffset = qualifier.layoutXfbOffset;
   8698     for (unsigned int member = 0; member < typeList.size(); ++member) {
   8699         TQualifier& memberQualifier = typeList[member].type->getQualifier();
   8700         bool containsDouble = false;
   8701         int memberSize = intermediate.computeTypeXfbSize(*typeList[member].type, containsDouble);
   8702         // see if we need to auto-assign an offset to this member
   8703         if (! memberQualifier.hasXfbOffset()) {
   8704             // "if applied to an aggregate containing a double, the offset must also be a multiple of 8"
   8705             if (containsDouble)
   8706                 RoundToPow2(nextOffset, 8);
   8707             memberQualifier.layoutXfbOffset = nextOffset;
   8708         } else
   8709             nextOffset = memberQualifier.layoutXfbOffset;
   8710         nextOffset += memberSize;
   8711     }
   8712 
   8713     // The above gave all block members an offset, so we can take it off the block now,
   8714     // which will avoid double counting the offset usage.
   8715     qualifier.layoutXfbOffset = TQualifier::layoutXfbOffsetEnd;
   8716 }
   8717 
   8718 // Calculate and save the offset of each block member, using the recursively
   8719 // defined block offset rules and the user-provided offset and align.
   8720 //
   8721 // Also, compute and save the total size of the block. For the block's size, arrayness
   8722 // is not taken into account, as each element is backed by a separate buffer.
   8723 //
   8724 void HlslParseContext::fixBlockUniformOffsets(const TQualifier& qualifier, TTypeList& typeList)
   8725 {
   8726     if (! qualifier.isUniformOrBuffer())
   8727         return;
   8728     if (qualifier.layoutPacking != ElpStd140 && qualifier.layoutPacking != ElpStd430 && qualifier.layoutPacking != ElpScalar)
   8729         return;
   8730 
   8731     int offset = 0;
   8732     int memberSize;
   8733     for (unsigned int member = 0; member < typeList.size(); ++member) {
   8734         TQualifier& memberQualifier = typeList[member].type->getQualifier();
   8735         const TSourceLoc& memberLoc = typeList[member].loc;
   8736 
   8737         // "When align is applied to an array, it effects only the start of the array, not the array's internal stride."
   8738 
   8739         // modify just the children's view of matrix layout, if there is one for this member
   8740         TLayoutMatrix subMatrixLayout = typeList[member].type->getQualifier().layoutMatrix;
   8741         int dummyStride;
   8742         int memberAlignment = intermediate.getMemberAlignment(*typeList[member].type, memberSize, dummyStride,
   8743                                                               qualifier.layoutPacking,
   8744                                                               subMatrixLayout != ElmNone
   8745                                                                   ? subMatrixLayout == ElmRowMajor
   8746                                                                   : qualifier.layoutMatrix == ElmRowMajor);
   8747         if (memberQualifier.hasOffset()) {
   8748             // "The specified offset must be a multiple
   8749             // of the base alignment of the type of the block member it qualifies, or a compile-time error results."
   8750             if (! IsMultipleOfPow2(memberQualifier.layoutOffset, memberAlignment))
   8751                 error(memberLoc, "must be a multiple of the member's alignment", "offset", "");
   8752 
   8753             // "The offset qualifier forces the qualified member to start at or after the specified
   8754             // integral-constant expression, which will be its byte offset from the beginning of the buffer.
   8755             // "The actual offset of a member is computed as
   8756             // follows: If offset was declared, start with that offset, otherwise start with the next available offset."
   8757             offset = std::max(offset, memberQualifier.layoutOffset);
   8758         }
   8759 
   8760         // "The actual alignment of a member will be the greater of the specified align alignment and the standard
   8761         // (e.g., std140) base alignment for the member's type."
   8762         if (memberQualifier.hasAlign())
   8763             memberAlignment = std::max(memberAlignment, memberQualifier.layoutAlign);
   8764 
   8765         // "If the resulting offset is not a multiple of the actual alignment,
   8766         // increase it to the first offset that is a multiple of
   8767         // the actual alignment."
   8768         RoundToPow2(offset, memberAlignment);
   8769         typeList[member].type->getQualifier().layoutOffset = offset;
   8770         offset += memberSize;
   8771     }
   8772 }
   8773 
   8774 // For an identifier that is already declared, add more qualification to it.
   8775 void HlslParseContext::addQualifierToExisting(const TSourceLoc& loc, TQualifier qualifier, const TString& identifier)
   8776 {
   8777     TSymbol* symbol = symbolTable.find(identifier);
   8778     if (symbol == nullptr) {
   8779         error(loc, "identifier not previously declared", identifier.c_str(), "");
   8780         return;
   8781     }
   8782     if (symbol->getAsFunction()) {
   8783         error(loc, "cannot re-qualify a function name", identifier.c_str(), "");
   8784         return;
   8785     }
   8786 
   8787     if (qualifier.isAuxiliary() ||
   8788         qualifier.isMemory() ||
   8789         qualifier.isInterpolation() ||
   8790         qualifier.hasLayout() ||
   8791         qualifier.storage != EvqTemporary ||
   8792         qualifier.precision != EpqNone) {
   8793         error(loc, "cannot add storage, auxiliary, memory, interpolation, layout, or precision qualifier to an existing variable", identifier.c_str(), "");
   8794         return;
   8795     }
   8796 
   8797     // For read-only built-ins, add a new symbol for holding the modified qualifier.
   8798     // This will bring up an entire block, if a block type has to be modified (e.g., gl_Position inside a block)
   8799     if (symbol->isReadOnly())
   8800         symbol = symbolTable.copyUp(symbol);
   8801 
   8802     if (qualifier.invariant) {
   8803         if (intermediate.inIoAccessed(identifier))
   8804             error(loc, "cannot change qualification after use", "invariant", "");
   8805         symbol->getWritableType().getQualifier().invariant = true;
   8806     } else if (qualifier.noContraction) {
   8807         if (intermediate.inIoAccessed(identifier))
   8808             error(loc, "cannot change qualification after use", "precise", "");
   8809         symbol->getWritableType().getQualifier().noContraction = true;
   8810     } else if (qualifier.specConstant) {
   8811         symbol->getWritableType().getQualifier().makeSpecConstant();
   8812         if (qualifier.hasSpecConstantId())
   8813             symbol->getWritableType().getQualifier().layoutSpecConstantId = qualifier.layoutSpecConstantId;
   8814     } else
   8815         warn(loc, "unknown requalification", "", "");
   8816 }
   8817 
   8818 void HlslParseContext::addQualifierToExisting(const TSourceLoc& loc, TQualifier qualifier, TIdentifierList& identifiers)
   8819 {
   8820     for (unsigned int i = 0; i < identifiers.size(); ++i)
   8821         addQualifierToExisting(loc, qualifier, *identifiers[i]);
   8822 }
   8823 
   8824 //
   8825 // Update the intermediate for the given input geometry
   8826 //
   8827 bool HlslParseContext::handleInputGeometry(const TSourceLoc& loc, const TLayoutGeometry& geometry)
   8828 {
   8829     switch (geometry) {
   8830     case ElgPoints:             // fall through
   8831     case ElgLines:              // ...
   8832     case ElgTriangles:          // ...
   8833     case ElgLinesAdjacency:     // ...
   8834     case ElgTrianglesAdjacency: // ...
   8835         if (! intermediate.setInputPrimitive(geometry)) {
   8836             error(loc, "input primitive geometry redefinition", TQualifier::getGeometryString(geometry), "");
   8837             return false;
   8838         }
   8839         break;
   8840 
   8841     default:
   8842         error(loc, "cannot apply to 'in'", TQualifier::getGeometryString(geometry), "");
   8843         return false;
   8844     }
   8845 
   8846     return true;
   8847 }
   8848 
   8849 //
   8850 // Update the intermediate for the given output geometry
   8851 //
   8852 bool HlslParseContext::handleOutputGeometry(const TSourceLoc& loc, const TLayoutGeometry& geometry)
   8853 {
   8854     // If this is not a geometry shader, ignore.  It might be a mixed shader including several stages.
   8855     // Since that's an OK situation, return true for success.
   8856     if (language != EShLangGeometry)
   8857         return true;
   8858 
   8859     switch (geometry) {
   8860     case ElgPoints:
   8861     case ElgLineStrip:
   8862     case ElgTriangleStrip:
   8863         if (! intermediate.setOutputPrimitive(geometry)) {
   8864             error(loc, "output primitive geometry redefinition", TQualifier::getGeometryString(geometry), "");
   8865             return false;
   8866         }
   8867         break;
   8868     default:
   8869         error(loc, "cannot apply to 'out'", TQualifier::getGeometryString(geometry), "");
   8870         return false;
   8871     }
   8872 
   8873     return true;
   8874 }
   8875 
   8876 //
   8877 // Selection attributes
   8878 //
   8879 void HlslParseContext::handleSelectionAttributes(const TSourceLoc& loc, TIntermSelection* selection,
   8880     const TAttributes& attributes)
   8881 {
   8882     if (selection == nullptr)
   8883         return;
   8884 
   8885     for (auto it = attributes.begin(); it != attributes.end(); ++it) {
   8886         switch (it->name) {
   8887         case EatFlatten:
   8888             selection->setFlatten();
   8889             break;
   8890         case EatBranch:
   8891             selection->setDontFlatten();
   8892             break;
   8893         default:
   8894             warn(loc, "attribute does not apply to a selection", "", "");
   8895             break;
   8896         }
   8897     }
   8898 }
   8899 
   8900 //
   8901 // Switch attributes
   8902 //
   8903 void HlslParseContext::handleSwitchAttributes(const TSourceLoc& loc, TIntermSwitch* selection,
   8904     const TAttributes& attributes)
   8905 {
   8906     if (selection == nullptr)
   8907         return;
   8908 
   8909     for (auto it = attributes.begin(); it != attributes.end(); ++it) {
   8910         switch (it->name) {
   8911         case EatFlatten:
   8912             selection->setFlatten();
   8913             break;
   8914         case EatBranch:
   8915             selection->setDontFlatten();
   8916             break;
   8917         default:
   8918             warn(loc, "attribute does not apply to a switch", "", "");
   8919             break;
   8920         }
   8921     }
   8922 }
   8923 
   8924 //
   8925 // Loop attributes
   8926 //
   8927 void HlslParseContext::handleLoopAttributes(const TSourceLoc& loc, TIntermLoop* loop,
   8928     const TAttributes& attributes)
   8929 {
   8930     if (loop == nullptr)
   8931         return;
   8932 
   8933     for (auto it = attributes.begin(); it != attributes.end(); ++it) {
   8934         switch (it->name) {
   8935         case EatUnroll:
   8936             loop->setUnroll();
   8937             break;
   8938         case EatLoop:
   8939             loop->setDontUnroll();
   8940             break;
   8941         default:
   8942             warn(loc, "attribute does not apply to a loop", "", "");
   8943             break;
   8944         }
   8945     }
   8946 }
   8947 
   8948 //
   8949 // Updating default qualifier for the case of a declaration with just a qualifier,
   8950 // no type, block, or identifier.
   8951 //
   8952 void HlslParseContext::updateStandaloneQualifierDefaults(const TSourceLoc& loc, const TPublicType& publicType)
   8953 {
   8954     if (publicType.shaderQualifiers.vertices != TQualifier::layoutNotSet) {
   8955         assert(language == EShLangTessControl || language == EShLangGeometry);
   8956         // const char* id = (language == EShLangTessControl) ? "vertices" : "max_vertices";
   8957     }
   8958     if (publicType.shaderQualifiers.invocations != TQualifier::layoutNotSet) {
   8959         if (! intermediate.setInvocations(publicType.shaderQualifiers.invocations))
   8960             error(loc, "cannot change previously set layout value", "invocations", "");
   8961     }
   8962     if (publicType.shaderQualifiers.geometry != ElgNone) {
   8963         if (publicType.qualifier.storage == EvqVaryingIn) {
   8964             switch (publicType.shaderQualifiers.geometry) {
   8965             case ElgPoints:
   8966             case ElgLines:
   8967             case ElgLinesAdjacency:
   8968             case ElgTriangles:
   8969             case ElgTrianglesAdjacency:
   8970             case ElgQuads:
   8971             case ElgIsolines:
   8972                 break;
   8973             default:
   8974                 error(loc, "cannot apply to input", TQualifier::getGeometryString(publicType.shaderQualifiers.geometry),
   8975                       "");
   8976             }
   8977         } else if (publicType.qualifier.storage == EvqVaryingOut) {
   8978             handleOutputGeometry(loc, publicType.shaderQualifiers.geometry);
   8979         } else
   8980             error(loc, "cannot apply to:", TQualifier::getGeometryString(publicType.shaderQualifiers.geometry),
   8981                   GetStorageQualifierString(publicType.qualifier.storage));
   8982     }
   8983     if (publicType.shaderQualifiers.spacing != EvsNone)
   8984         intermediate.setVertexSpacing(publicType.shaderQualifiers.spacing);
   8985     if (publicType.shaderQualifiers.order != EvoNone)
   8986         intermediate.setVertexOrder(publicType.shaderQualifiers.order);
   8987     if (publicType.shaderQualifiers.pointMode)
   8988         intermediate.setPointMode();
   8989     for (int i = 0; i < 3; ++i) {
   8990         if (publicType.shaderQualifiers.localSize[i] > 1) {
   8991             int max = 0;
   8992             switch (i) {
   8993             case 0: max = resources.maxComputeWorkGroupSizeX; break;
   8994             case 1: max = resources.maxComputeWorkGroupSizeY; break;
   8995             case 2: max = resources.maxComputeWorkGroupSizeZ; break;
   8996             default: break;
   8997             }
   8998             if (intermediate.getLocalSize(i) > (unsigned int)max)
   8999                 error(loc, "too large; see gl_MaxComputeWorkGroupSize", "local_size", "");
   9000 
   9001             // Fix the existing constant gl_WorkGroupSize with this new information.
   9002             TVariable* workGroupSize = getEditableVariable("gl_WorkGroupSize");
   9003             workGroupSize->getWritableConstArray()[i].setUConst(intermediate.getLocalSize(i));
   9004         }
   9005         if (publicType.shaderQualifiers.localSizeSpecId[i] != TQualifier::layoutNotSet) {
   9006             intermediate.setLocalSizeSpecId(i, publicType.shaderQualifiers.localSizeSpecId[i]);
   9007             // Set the workgroup built-in variable as a specialization constant
   9008             TVariable* workGroupSize = getEditableVariable("gl_WorkGroupSize");
   9009             workGroupSize->getWritableType().getQualifier().specConstant = true;
   9010         }
   9011     }
   9012     if (publicType.shaderQualifiers.earlyFragmentTests)
   9013         intermediate.setEarlyFragmentTests();
   9014 
   9015     const TQualifier& qualifier = publicType.qualifier;
   9016 
   9017     switch (qualifier.storage) {
   9018     case EvqUniform:
   9019         if (qualifier.hasMatrix())
   9020             globalUniformDefaults.layoutMatrix = qualifier.layoutMatrix;
   9021         if (qualifier.hasPacking())
   9022             globalUniformDefaults.layoutPacking = qualifier.layoutPacking;
   9023         break;
   9024     case EvqBuffer:
   9025         if (qualifier.hasMatrix())
   9026             globalBufferDefaults.layoutMatrix = qualifier.layoutMatrix;
   9027         if (qualifier.hasPacking())
   9028             globalBufferDefaults.layoutPacking = qualifier.layoutPacking;
   9029         break;
   9030     case EvqVaryingIn:
   9031         break;
   9032     case EvqVaryingOut:
   9033         if (qualifier.hasStream())
   9034             globalOutputDefaults.layoutStream = qualifier.layoutStream;
   9035         if (qualifier.hasXfbBuffer())
   9036             globalOutputDefaults.layoutXfbBuffer = qualifier.layoutXfbBuffer;
   9037         if (globalOutputDefaults.hasXfbBuffer() && qualifier.hasXfbStride()) {
   9038             if (! intermediate.setXfbBufferStride(globalOutputDefaults.layoutXfbBuffer, qualifier.layoutXfbStride))
   9039                 error(loc, "all stride settings must match for xfb buffer", "xfb_stride", "%d",
   9040                       qualifier.layoutXfbBuffer);
   9041         }
   9042         break;
   9043     default:
   9044         error(loc, "default qualifier requires 'uniform', 'buffer', 'in', or 'out' storage qualification", "", "");
   9045         return;
   9046     }
   9047 }
   9048 
   9049 //
   9050 // Take the sequence of statements that has been built up since the last case/default,
   9051 // put it on the list of top-level nodes for the current (inner-most) switch statement,
   9052 // and follow that by the case/default we are on now.  (See switch topology comment on
   9053 // TIntermSwitch.)
   9054 //
   9055 void HlslParseContext::wrapupSwitchSubsequence(TIntermAggregate* statements, TIntermNode* branchNode)
   9056 {
   9057     TIntermSequence* switchSequence = switchSequenceStack.back();
   9058 
   9059     if (statements) {
   9060         statements->setOperator(EOpSequence);
   9061         switchSequence->push_back(statements);
   9062     }
   9063     if (branchNode) {
   9064         // check all previous cases for the same label (or both are 'default')
   9065         for (unsigned int s = 0; s < switchSequence->size(); ++s) {
   9066             TIntermBranch* prevBranch = (*switchSequence)[s]->getAsBranchNode();
   9067             if (prevBranch) {
   9068                 TIntermTyped* prevExpression = prevBranch->getExpression();
   9069                 TIntermTyped* newExpression = branchNode->getAsBranchNode()->getExpression();
   9070                 if (prevExpression == nullptr && newExpression == nullptr)
   9071                     error(branchNode->getLoc(), "duplicate label", "default", "");
   9072                 else if (prevExpression != nullptr &&
   9073                     newExpression != nullptr &&
   9074                     prevExpression->getAsConstantUnion() &&
   9075                     newExpression->getAsConstantUnion() &&
   9076                     prevExpression->getAsConstantUnion()->getConstArray()[0].getIConst() ==
   9077                     newExpression->getAsConstantUnion()->getConstArray()[0].getIConst())
   9078                     error(branchNode->getLoc(), "duplicated value", "case", "");
   9079             }
   9080         }
   9081         switchSequence->push_back(branchNode);
   9082     }
   9083 }
   9084 
   9085 //
   9086 // Turn the top-level node sequence built up of wrapupSwitchSubsequence
   9087 // into a switch node.
   9088 //
   9089 TIntermNode* HlslParseContext::addSwitch(const TSourceLoc& loc, TIntermTyped* expression,
   9090                                          TIntermAggregate* lastStatements, const TAttributes& attributes)
   9091 {
   9092     wrapupSwitchSubsequence(lastStatements, nullptr);
   9093 
   9094     if (expression == nullptr ||
   9095         (expression->getBasicType() != EbtInt && expression->getBasicType() != EbtUint) ||
   9096         expression->getType().isArray() || expression->getType().isMatrix() || expression->getType().isVector())
   9097         error(loc, "condition must be a scalar integer expression", "switch", "");
   9098 
   9099     // If there is nothing to do, drop the switch but still execute the expression
   9100     TIntermSequence* switchSequence = switchSequenceStack.back();
   9101     if (switchSequence->size() == 0)
   9102         return expression;
   9103 
   9104     if (lastStatements == nullptr) {
   9105         // emulate a break for error recovery
   9106         lastStatements = intermediate.makeAggregate(intermediate.addBranch(EOpBreak, loc));
   9107         lastStatements->setOperator(EOpSequence);
   9108         switchSequence->push_back(lastStatements);
   9109     }
   9110 
   9111     TIntermAggregate* body = new TIntermAggregate(EOpSequence);
   9112     body->getSequence() = *switchSequenceStack.back();
   9113     body->setLoc(loc);
   9114 
   9115     TIntermSwitch* switchNode = new TIntermSwitch(expression, body);
   9116     switchNode->setLoc(loc);
   9117     handleSwitchAttributes(loc, switchNode, attributes);
   9118 
   9119     return switchNode;
   9120 }
   9121 
   9122 // Make a new symbol-table level that is made out of the members of a structure.
   9123 // This should be done as an anonymous struct (name is "") so that the symbol table
   9124 // finds the members with no explicit reference to a 'this' variable.
   9125 void HlslParseContext::pushThisScope(const TType& thisStruct, const TVector<TFunctionDeclarator>& functionDeclarators)
   9126 {
   9127     // member variables
   9128     TVariable& thisVariable = *new TVariable(NewPoolTString(""), thisStruct);
   9129     symbolTable.pushThis(thisVariable);
   9130 
   9131     // member functions
   9132     for (auto it = functionDeclarators.begin(); it != functionDeclarators.end(); ++it) {
   9133         // member should have a prefix matching currentTypePrefix.back()
   9134         // but, symbol lookup within the class scope will just use the
   9135         // unprefixed name. Hence, there are two: one fully prefixed and
   9136         // one with no prefix.
   9137         TFunction& member = *it->function->clone();
   9138         member.removePrefix(currentTypePrefix.back());
   9139         symbolTable.insert(member);
   9140     }
   9141 }
   9142 
   9143 // Track levels of class/struct/namespace nesting with a prefix string using
   9144 // the type names separated by the scoping operator. E.g., two levels
   9145 // would look like:
   9146 //
   9147 //   outer::inner
   9148 //
   9149 // The string is empty when at normal global level.
   9150 //
   9151 void HlslParseContext::pushNamespace(const TString& typeName)
   9152 {
   9153     // make new type prefix
   9154     TString newPrefix;
   9155     if (currentTypePrefix.size() > 0)
   9156         newPrefix = currentTypePrefix.back();
   9157     newPrefix.append(typeName);
   9158     newPrefix.append(scopeMangler);
   9159     currentTypePrefix.push_back(newPrefix);
   9160 }
   9161 
   9162 // Opposite of pushNamespace(), see above
   9163 void HlslParseContext::popNamespace()
   9164 {
   9165     currentTypePrefix.pop_back();
   9166 }
   9167 
   9168 // Use the class/struct nesting string to create a global name for
   9169 // a member of a class/struct.
   9170 void HlslParseContext::getFullNamespaceName(TString*& name) const
   9171 {
   9172     if (currentTypePrefix.size() == 0)
   9173         return;
   9174 
   9175     TString* fullName = NewPoolTString(currentTypePrefix.back().c_str());
   9176     fullName->append(*name);
   9177     name = fullName;
   9178 }
   9179 
   9180 // Helper function to add the namespace scope mangling syntax to a string.
   9181 void HlslParseContext::addScopeMangler(TString& name)
   9182 {
   9183     name.append(scopeMangler);
   9184 }
   9185 
   9186 // Return true if this has uniform-interface like decorations.
   9187 bool HlslParseContext::hasUniform(const TQualifier& qualifier) const
   9188 {
   9189     return qualifier.hasUniformLayout() ||
   9190            qualifier.layoutPushConstant;
   9191 }
   9192 
   9193 // Potentially not the opposite of hasUniform(), as if some characteristic is
   9194 // ever used for more than one thing (e.g., uniform or input), hasUniform() should
   9195 // say it exists, but clearUniform() should leave it in place.
   9196 void HlslParseContext::clearUniform(TQualifier& qualifier)
   9197 {
   9198     qualifier.clearUniformLayout();
   9199     qualifier.layoutPushConstant = false;
   9200 }
   9201 
   9202 // Return false if builtIn by itself doesn't force this qualifier to be an input qualifier.
   9203 bool HlslParseContext::isInputBuiltIn(const TQualifier& qualifier) const
   9204 {
   9205     switch (qualifier.builtIn) {
   9206     case EbvPosition:
   9207     case EbvPointSize:
   9208         return language != EShLangVertex && language != EShLangCompute && language != EShLangFragment;
   9209     case EbvClipDistance:
   9210     case EbvCullDistance:
   9211         return language != EShLangVertex && language != EShLangCompute;
   9212     case EbvFragCoord:
   9213     case EbvFace:
   9214     case EbvHelperInvocation:
   9215     case EbvLayer:
   9216     case EbvPointCoord:
   9217     case EbvSampleId:
   9218     case EbvSampleMask:
   9219     case EbvSamplePosition:
   9220     case EbvViewportIndex:
   9221         return language == EShLangFragment;
   9222     case EbvGlobalInvocationId:
   9223     case EbvLocalInvocationIndex:
   9224     case EbvLocalInvocationId:
   9225     case EbvNumWorkGroups:
   9226     case EbvWorkGroupId:
   9227     case EbvWorkGroupSize:
   9228         return language == EShLangCompute;
   9229     case EbvInvocationId:
   9230         return language == EShLangTessControl || language == EShLangTessEvaluation || language == EShLangGeometry;
   9231     case EbvPatchVertices:
   9232         return language == EShLangTessControl || language == EShLangTessEvaluation;
   9233     case EbvInstanceId:
   9234     case EbvInstanceIndex:
   9235     case EbvVertexId:
   9236     case EbvVertexIndex:
   9237         return language == EShLangVertex;
   9238     case EbvPrimitiveId:
   9239         return language == EShLangGeometry || language == EShLangFragment || language == EShLangTessControl;
   9240     case EbvTessLevelInner:
   9241     case EbvTessLevelOuter:
   9242         return language == EShLangTessEvaluation;
   9243     case EbvTessCoord:
   9244         return language == EShLangTessEvaluation;
   9245     default:
   9246         return false;
   9247     }
   9248 }
   9249 
   9250 // Return true if there are decorations to preserve for input-like storage.
   9251 bool HlslParseContext::hasInput(const TQualifier& qualifier) const
   9252 {
   9253     if (qualifier.hasAnyLocation())
   9254         return true;
   9255 
   9256     if (language == EShLangFragment && (qualifier.isInterpolation() || qualifier.centroid || qualifier.sample))
   9257         return true;
   9258 
   9259     if (language == EShLangTessEvaluation && qualifier.patch)
   9260         return true;
   9261 
   9262     if (isInputBuiltIn(qualifier))
   9263         return true;
   9264 
   9265     return false;
   9266 }
   9267 
   9268 // Return false if builtIn by itself doesn't force this qualifier to be an output qualifier.
   9269 bool HlslParseContext::isOutputBuiltIn(const TQualifier& qualifier) const
   9270 {
   9271     switch (qualifier.builtIn) {
   9272     case EbvPosition:
   9273     case EbvPointSize:
   9274     case EbvClipVertex:
   9275     case EbvClipDistance:
   9276     case EbvCullDistance:
   9277         return language != EShLangFragment && language != EShLangCompute;
   9278     case EbvFragDepth:
   9279     case EbvFragDepthGreater:
   9280     case EbvFragDepthLesser:
   9281     case EbvSampleMask:
   9282         return language == EShLangFragment;
   9283     case EbvLayer:
   9284     case EbvViewportIndex:
   9285         return language == EShLangGeometry || language == EShLangVertex;
   9286     case EbvPrimitiveId:
   9287         return language == EShLangGeometry;
   9288     case EbvTessLevelInner:
   9289     case EbvTessLevelOuter:
   9290         return language == EShLangTessControl;
   9291     default:
   9292         return false;
   9293     }
   9294 }
   9295 
   9296 // Return true if there are decorations to preserve for output-like storage.
   9297 bool HlslParseContext::hasOutput(const TQualifier& qualifier) const
   9298 {
   9299     if (qualifier.hasAnyLocation())
   9300         return true;
   9301 
   9302     if (language != EShLangFragment && language != EShLangCompute && qualifier.hasXfb())
   9303         return true;
   9304 
   9305     if (language == EShLangTessControl && qualifier.patch)
   9306         return true;
   9307 
   9308     if (language == EShLangGeometry && qualifier.hasStream())
   9309         return true;
   9310 
   9311     if (isOutputBuiltIn(qualifier))
   9312         return true;
   9313 
   9314     return false;
   9315 }
   9316 
   9317 // Make the IO decorations etc. be appropriate only for an input interface.
   9318 void HlslParseContext::correctInput(TQualifier& qualifier)
   9319 {
   9320     clearUniform(qualifier);
   9321     if (language == EShLangVertex)
   9322         qualifier.clearInterstage();
   9323     if (language != EShLangTessEvaluation)
   9324         qualifier.patch = false;
   9325     if (language != EShLangFragment) {
   9326         qualifier.clearInterpolation();
   9327         qualifier.sample = false;
   9328     }
   9329 
   9330     qualifier.clearStreamLayout();
   9331     qualifier.clearXfbLayout();
   9332 
   9333     if (! isInputBuiltIn(qualifier))
   9334         qualifier.builtIn = EbvNone;
   9335 }
   9336 
   9337 // Make the IO decorations etc. be appropriate only for an output interface.
   9338 void HlslParseContext::correctOutput(TQualifier& qualifier)
   9339 {
   9340     clearUniform(qualifier);
   9341     if (language == EShLangFragment)
   9342         qualifier.clearInterstage();
   9343     if (language != EShLangGeometry)
   9344         qualifier.clearStreamLayout();
   9345     if (language == EShLangFragment)
   9346         qualifier.clearXfbLayout();
   9347     if (language != EShLangTessControl)
   9348         qualifier.patch = false;
   9349 
   9350     switch (qualifier.builtIn) {
   9351     case EbvFragDepth:
   9352         intermediate.setDepthReplacing();
   9353         intermediate.setDepth(EldAny);
   9354         break;
   9355     case EbvFragDepthGreater:
   9356         intermediate.setDepthReplacing();
   9357         intermediate.setDepth(EldGreater);
   9358         qualifier.builtIn = EbvFragDepth;
   9359         break;
   9360     case EbvFragDepthLesser:
   9361         intermediate.setDepthReplacing();
   9362         intermediate.setDepth(EldLess);
   9363         qualifier.builtIn = EbvFragDepth;
   9364         break;
   9365     default:
   9366         break;
   9367     }
   9368 
   9369     if (! isOutputBuiltIn(qualifier))
   9370         qualifier.builtIn = EbvNone;
   9371 }
   9372 
   9373 // Make the IO decorations etc. be appropriate only for uniform type interfaces.
   9374 void HlslParseContext::correctUniform(TQualifier& qualifier)
   9375 {
   9376     if (qualifier.declaredBuiltIn == EbvNone)
   9377         qualifier.declaredBuiltIn = qualifier.builtIn;
   9378 
   9379     qualifier.builtIn = EbvNone;
   9380     qualifier.clearInterstage();
   9381     qualifier.clearInterstageLayout();
   9382 }
   9383 
   9384 // Clear out all IO/Uniform stuff, so this has nothing to do with being an IO interface.
   9385 void HlslParseContext::clearUniformInputOutput(TQualifier& qualifier)
   9386 {
   9387     clearUniform(qualifier);
   9388     correctUniform(qualifier);
   9389 }
   9390 
   9391 
   9392 // Set texture return type.  Returns success (not all types are valid).
   9393 bool HlslParseContext::setTextureReturnType(TSampler& sampler, const TType& retType, const TSourceLoc& loc)
   9394 {
   9395     // Seed the output with an invalid index.  We will set it to a valid one if we can.
   9396     sampler.structReturnIndex = TSampler::noReturnStruct;
   9397 
   9398     // Arrays aren't supported.
   9399     if (retType.isArray()) {
   9400         error(loc, "Arrays not supported in texture template types", "", "");
   9401         return false;
   9402     }
   9403 
   9404     // If return type is a vector, remember the vector size in the sampler, and return.
   9405     if (retType.isVector() || retType.isScalar()) {
   9406         sampler.vectorSize = retType.getVectorSize();
   9407         return true;
   9408     }
   9409 
   9410     // If it wasn't a vector, it must be a struct meeting certain requirements.  The requirements
   9411     // are checked below: just check for struct-ness here.
   9412     if (!retType.isStruct()) {
   9413         error(loc, "Invalid texture template type", "", "");
   9414         return false;
   9415     }
   9416 
   9417     // TODO: Subpass doesn't handle struct returns, due to some oddities with fn overloading.
   9418     if (sampler.isSubpass()) {
   9419         error(loc, "Unimplemented: structure template type in subpass input", "", "");
   9420         return false;
   9421     }
   9422 
   9423     TTypeList* members = retType.getWritableStruct();
   9424 
   9425     // Check for too many or not enough structure members.
   9426     if (members->size() > 4 || members->size() == 0) {
   9427         error(loc, "Invalid member count in texture template structure", "", "");
   9428         return false;
   9429     }
   9430 
   9431     // Error checking: We must have <= 4 total components, all of the same basic type.
   9432     unsigned totalComponents = 0;
   9433     for (unsigned m = 0; m < members->size(); ++m) {
   9434         // Check for bad member types
   9435         if (!(*members)[m].type->isScalar() && !(*members)[m].type->isVector()) {
   9436             error(loc, "Invalid texture template struct member type", "", "");
   9437             return false;
   9438         }
   9439 
   9440         const unsigned memberVectorSize = (*members)[m].type->getVectorSize();
   9441         totalComponents += memberVectorSize;
   9442 
   9443         // too many total member components
   9444         if (totalComponents > 4) {
   9445             error(loc, "Too many components in texture template structure type", "", "");
   9446             return false;
   9447         }
   9448 
   9449         // All members must be of a common basic type
   9450         if ((*members)[m].type->getBasicType() != (*members)[0].type->getBasicType()) {
   9451             error(loc, "Texture template structure members must same basic type", "", "");
   9452             return false;
   9453         }
   9454     }
   9455 
   9456     // If the structure in the return type already exists in the table, we'll use it.  Otherwise, we'll make
   9457     // a new entry.  This is a linear search, but it hardly ever happens, and the list cannot be very large.
   9458     for (unsigned int idx = 0; idx < textureReturnStruct.size(); ++idx) {
   9459         if (textureReturnStruct[idx] == members) {
   9460             sampler.structReturnIndex = idx;
   9461             return true;
   9462         }
   9463     }
   9464 
   9465     // It wasn't found as an existing entry.  See if we have room for a new one.
   9466     if (textureReturnStruct.size() >= TSampler::structReturnSlots) {
   9467         error(loc, "Texture template struct return slots exceeded", "", "");
   9468         return false;
   9469     }
   9470 
   9471     // Insert it in the vector that tracks struct return types.
   9472     sampler.structReturnIndex = unsigned(textureReturnStruct.size());
   9473     textureReturnStruct.push_back(members);
   9474 
   9475     // Success!
   9476     return true;
   9477 }
   9478 
   9479 // Return the sampler return type in retType.
   9480 void HlslParseContext::getTextureReturnType(const TSampler& sampler, TType& retType) const
   9481 {
   9482     if (sampler.hasReturnStruct()) {
   9483         assert(textureReturnStruct.size() >= sampler.structReturnIndex);
   9484 
   9485         // We land here if the texture return is a structure.
   9486         TTypeList* blockStruct = textureReturnStruct[sampler.structReturnIndex];
   9487 
   9488         const TType resultType(blockStruct, "");
   9489         retType.shallowCopy(resultType);
   9490     } else {
   9491         // We land here if the texture return is a vector or scalar.
   9492         const TType resultType(sampler.type, EvqTemporary, sampler.getVectorSize());
   9493         retType.shallowCopy(resultType);
   9494     }
   9495 }
   9496 
   9497 
   9498 // Return a symbol for the tessellation linkage variable of the given TBuiltInVariable type
   9499 TIntermSymbol* HlslParseContext::findTessLinkageSymbol(TBuiltInVariable biType) const
   9500 {
   9501     const auto it = builtInTessLinkageSymbols.find(biType);
   9502     if (it == builtInTessLinkageSymbols.end())  // if it wasn't declared by the user, return nullptr
   9503         return nullptr;
   9504 
   9505     return intermediate.addSymbol(*it->second->getAsVariable());
   9506 }
   9507 
   9508 // Find the patch constant function (issues error, returns nullptr if not found)
   9509 const TFunction* HlslParseContext::findPatchConstantFunction(const TSourceLoc& loc)
   9510 {
   9511     if (symbolTable.isFunctionNameVariable(patchConstantFunctionName)) {
   9512         error(loc, "can't use variable in patch constant function", patchConstantFunctionName.c_str(), "");
   9513         return nullptr;
   9514     }
   9515 
   9516     const TString mangledName = patchConstantFunctionName + "(";
   9517 
   9518     // create list of PCF candidates
   9519     TVector<const TFunction*> candidateList;
   9520     bool builtIn;
   9521     symbolTable.findFunctionNameList(mangledName, candidateList, builtIn);
   9522 
   9523     // We have to have one and only one, or we don't know which to pick: the patchconstantfunc does not
   9524     // allow any disambiguation of overloads.
   9525     if (candidateList.empty()) {
   9526         error(loc, "patch constant function not found", patchConstantFunctionName.c_str(), "");
   9527         return nullptr;
   9528     }
   9529 
   9530     // Based on directed experiments, it appears that if there are overloaded patchconstantfunctions,
   9531     // HLSL picks the last one in shader source order.  Since that isn't yet implemented here, error
   9532     // out if there is more than one candidate.
   9533     if (candidateList.size() > 1) {
   9534         error(loc, "ambiguous patch constant function", patchConstantFunctionName.c_str(), "");
   9535         return nullptr;
   9536     }
   9537 
   9538     return candidateList[0];
   9539 }
   9540 
   9541 // Finalization step: Add patch constant function invocation
   9542 void HlslParseContext::addPatchConstantInvocation()
   9543 {
   9544     TSourceLoc loc;
   9545     loc.init();
   9546 
   9547     // If there's no patch constant function, or we're not a HS, do nothing.
   9548     if (patchConstantFunctionName.empty() || language != EShLangTessControl)
   9549         return;
   9550 
   9551     // Look for built-in variables in a function's parameter list.
   9552     const auto findBuiltIns = [&](const TFunction& function, std::set<tInterstageIoData>& builtIns) {
   9553         for (int p=0; p<function.getParamCount(); ++p) {
   9554             TStorageQualifier storage = function[p].type->getQualifier().storage;
   9555 
   9556             if (storage == EvqConstReadOnly) // treated identically to input
   9557                 storage = EvqIn;
   9558 
   9559             if (function[p].getDeclaredBuiltIn() != EbvNone)
   9560                 builtIns.insert(HlslParseContext::tInterstageIoData(function[p].getDeclaredBuiltIn(), storage));
   9561             else
   9562                 builtIns.insert(HlslParseContext::tInterstageIoData(function[p].type->getQualifier().builtIn, storage));
   9563         }
   9564     };
   9565 
   9566     // If we synthesize a built-in interface variable, we must add it to the linkage.
   9567     const auto addToLinkage = [&](const TType& type, const TString* name, TIntermSymbol** symbolNode) {
   9568         if (name == nullptr) {
   9569             error(loc, "unable to locate patch function parameter name", "", "");
   9570             return;
   9571         } else {
   9572             TVariable& variable = *new TVariable(name, type);
   9573             if (! symbolTable.insert(variable)) {
   9574                 error(loc, "unable to declare patch constant function interface variable", name->c_str(), "");
   9575                 return;
   9576             }
   9577 
   9578             globalQualifierFix(loc, variable.getWritableType().getQualifier());
   9579 
   9580             if (symbolNode != nullptr)
   9581                 *symbolNode = intermediate.addSymbol(variable);
   9582 
   9583             trackLinkage(variable);
   9584         }
   9585     };
   9586 
   9587     const auto isOutputPatch = [](TFunction& patchConstantFunction, int param) {
   9588         const TType& type = *patchConstantFunction[param].type;
   9589         const TBuiltInVariable biType = patchConstantFunction[param].getDeclaredBuiltIn();
   9590 
   9591         return type.isSizedArray() && biType == EbvOutputPatch;
   9592     };
   9593 
   9594     // We will perform these steps.  Each is in a scoped block for separation: they could
   9595     // become separate functions to make addPatchConstantInvocation shorter.
   9596     //
   9597     // 1. Union the interfaces, and create built-ins for anything present in the PCF and
   9598     //    declared as a built-in variable that isn't present in the entry point's signature.
   9599     //
   9600     // 2. Synthesizes a call to the patchconstfunction using built-in variables from either main,
   9601     //    or the ones we created.  Matching is based on built-in type.  We may use synthesized
   9602     //    variables from (1) above.
   9603     //
   9604     // 2B: Synthesize per control point invocations of wrapped entry point if the PCF requires them.
   9605     //
   9606     // 3. Create a return sequence: copy the return value (if any) from the PCF to a
   9607     //    (non-sanitized) output variable.  In case this may involve multiple copies, such as for
   9608     //    an arrayed variable, a temporary copy of the PCF output is created to avoid multiple
   9609     //    indirections into a complex R-value coming from the call to the PCF.
   9610     //
   9611     // 4. Create a barrier.
   9612     //
   9613     // 5/5B. Call the PCF inside an if test for (invocation id == 0).
   9614 
   9615     TFunction* patchConstantFunctionPtr = const_cast<TFunction*>(findPatchConstantFunction(loc));
   9616 
   9617     if (patchConstantFunctionPtr == nullptr)
   9618         return;
   9619 
   9620     TFunction& patchConstantFunction = *patchConstantFunctionPtr;
   9621 
   9622     const int pcfParamCount = patchConstantFunction.getParamCount();
   9623     TIntermSymbol* invocationIdSym = findTessLinkageSymbol(EbvInvocationId);
   9624     TIntermSequence& epBodySeq = entryPointFunctionBody->getAsAggregate()->getSequence();
   9625 
   9626     int outPatchParam = -1; // -1 means there isn't one.
   9627 
   9628     // ================ Step 1A: Union Interfaces ================
   9629     // Our patch constant function.
   9630     {
   9631         std::set<tInterstageIoData> pcfBuiltIns;  // patch constant function built-ins
   9632         std::set<tInterstageIoData> epfBuiltIns;  // entry point function built-ins
   9633 
   9634         assert(entryPointFunction);
   9635         assert(entryPointFunctionBody);
   9636 
   9637         findBuiltIns(patchConstantFunction, pcfBuiltIns);
   9638         findBuiltIns(*entryPointFunction,   epfBuiltIns);
   9639 
   9640         // Find the set of built-ins in the PCF that are not present in the entry point.
   9641         std::set<tInterstageIoData> notInEntryPoint;
   9642 
   9643         notInEntryPoint = pcfBuiltIns;
   9644 
   9645         // std::set_difference not usable on unordered containers
   9646         for (auto bi = epfBuiltIns.begin(); bi != epfBuiltIns.end(); ++bi)
   9647             notInEntryPoint.erase(*bi);
   9648 
   9649         // Now we'll add those to the entry and to the linkage.
   9650         for (int p=0; p<pcfParamCount; ++p) {
   9651             const TBuiltInVariable biType   = patchConstantFunction[p].getDeclaredBuiltIn();
   9652             TStorageQualifier storage = patchConstantFunction[p].type->getQualifier().storage;
   9653 
   9654             // Track whether there is an output patch param
   9655             if (isOutputPatch(patchConstantFunction, p)) {
   9656                 if (outPatchParam >= 0) {
   9657                     // Presently we only support one per ctrl pt input.
   9658                     error(loc, "unimplemented: multiple output patches in patch constant function", "", "");
   9659                     return;
   9660                 }
   9661                 outPatchParam = p;
   9662             }
   9663 
   9664             if (biType != EbvNone) {
   9665                 TType* paramType = patchConstantFunction[p].type->clone();
   9666 
   9667                 if (storage == EvqConstReadOnly) // treated identically to input
   9668                     storage = EvqIn;
   9669 
   9670                 // Presently, the only non-built-in we support is InputPatch, which is treated as
   9671                 // a pseudo-built-in.
   9672                 if (biType == EbvInputPatch) {
   9673                     builtInTessLinkageSymbols[biType] = inputPatch;
   9674                 } else if (biType == EbvOutputPatch) {
   9675                     // Nothing...
   9676                 } else {
   9677                     // Use the original declaration type for the linkage
   9678                     paramType->getQualifier().builtIn = biType;
   9679 
   9680                     if (notInEntryPoint.count(tInterstageIoData(biType, storage)) == 1)
   9681                         addToLinkage(*paramType, patchConstantFunction[p].name, nullptr);
   9682                 }
   9683             }
   9684         }
   9685 
   9686         // If we didn't find it because the shader made one, add our own.
   9687         if (invocationIdSym == nullptr) {
   9688             TType invocationIdType(EbtUint, EvqIn, 1);
   9689             TString* invocationIdName = NewPoolTString("InvocationId");
   9690             invocationIdType.getQualifier().builtIn = EbvInvocationId;
   9691             addToLinkage(invocationIdType, invocationIdName, &invocationIdSym);
   9692         }
   9693 
   9694         assert(invocationIdSym);
   9695     }
   9696 
   9697     TIntermTyped* pcfArguments = nullptr;
   9698     TVariable* perCtrlPtVar = nullptr;
   9699 
   9700     // ================ Step 1B: Argument synthesis ================
   9701     // Create pcfArguments for synthesis of patchconstantfunction invocation
   9702     {
   9703         for (int p=0; p<pcfParamCount; ++p) {
   9704             TIntermTyped* inputArg = nullptr;
   9705 
   9706             if (p == outPatchParam) {
   9707                 if (perCtrlPtVar == nullptr) {
   9708                     perCtrlPtVar = makeInternalVariable(*patchConstantFunction[outPatchParam].name,
   9709                                                         *patchConstantFunction[outPatchParam].type);
   9710 
   9711                     perCtrlPtVar->getWritableType().getQualifier().makeTemporary();
   9712                 }
   9713                 inputArg = intermediate.addSymbol(*perCtrlPtVar, loc);
   9714             } else {
   9715                 // find which built-in it is
   9716                 const TBuiltInVariable biType = patchConstantFunction[p].getDeclaredBuiltIn();
   9717 
   9718                 if (biType == EbvInputPatch && inputPatch == nullptr) {
   9719                     error(loc, "unimplemented: PCF input patch without entry point input patch parameter", "", "");
   9720                     return;
   9721                 }
   9722 
   9723                 inputArg = findTessLinkageSymbol(biType);
   9724 
   9725                 if (inputArg == nullptr) {
   9726                     error(loc, "unable to find patch constant function built-in variable", "", "");
   9727                     return;
   9728                 }
   9729             }
   9730 
   9731             if (pcfParamCount == 1)
   9732                 pcfArguments = inputArg;
   9733             else
   9734                 pcfArguments = intermediate.growAggregate(pcfArguments, inputArg);
   9735         }
   9736     }
   9737 
   9738     // ================ Step 2: Synthesize call to PCF ================
   9739     TIntermAggregate* pcfCallSequence = nullptr;
   9740     TIntermTyped* pcfCall = nullptr;
   9741 
   9742     {
   9743         // Create a function call to the patchconstantfunction
   9744         if (pcfArguments)
   9745             addInputArgumentConversions(patchConstantFunction, pcfArguments);
   9746 
   9747         // Synthetic call.
   9748         pcfCall = intermediate.setAggregateOperator(pcfArguments, EOpFunctionCall, patchConstantFunction.getType(), loc);
   9749         pcfCall->getAsAggregate()->setUserDefined();
   9750         pcfCall->getAsAggregate()->setName(patchConstantFunction.getMangledName());
   9751         intermediate.addToCallGraph(infoSink, intermediate.getEntryPointMangledName().c_str(),
   9752                                     patchConstantFunction.getMangledName());
   9753 
   9754         if (pcfCall->getAsAggregate()) {
   9755             TQualifierList& qualifierList = pcfCall->getAsAggregate()->getQualifierList();
   9756             for (int i = 0; i < patchConstantFunction.getParamCount(); ++i) {
   9757                 TStorageQualifier qual = patchConstantFunction[i].type->getQualifier().storage;
   9758                 qualifierList.push_back(qual);
   9759             }
   9760             pcfCall = addOutputArgumentConversions(patchConstantFunction, *pcfCall->getAsOperator());
   9761         }
   9762     }
   9763 
   9764     // ================ Step 2B: Per Control Point synthesis ================
   9765     // If there is per control point data, we must either emulate that with multiple
   9766     // invocations of the entry point to build up an array, or (TODO:) use a yet
   9767     // unavailable extension to look across the SIMD lanes.  This is the former
   9768     // as a placeholder for the latter.
   9769     if (outPatchParam >= 0) {
   9770         // We must introduce a local temp variable of the type wanted by the PCF input.
   9771         const int arraySize = patchConstantFunction[outPatchParam].type->getOuterArraySize();
   9772 
   9773         if (entryPointFunction->getType().getBasicType() == EbtVoid) {
   9774             error(loc, "entry point must return a value for use with patch constant function", "", "");
   9775             return;
   9776         }
   9777 
   9778         // Create calls to wrapped main to fill in the array.  We will substitute fixed values
   9779         // of invocation ID when calling the wrapped main.
   9780 
   9781         // This is the type of the each member of the per ctrl point array.
   9782         const TType derefType(perCtrlPtVar->getType(), 0);
   9783 
   9784         for (int cpt = 0; cpt < arraySize; ++cpt) {
   9785             // TODO: improve.  substr(1) here is to avoid the '@' that was grafted on but isn't in the symtab
   9786             // for this function.
   9787             const TString origName = entryPointFunction->getName().substr(1);
   9788             TFunction callee(&origName, TType(EbtVoid));
   9789             TIntermTyped* callingArgs = nullptr;
   9790 
   9791             for (int i = 0; i < entryPointFunction->getParamCount(); i++) {
   9792                 TParameter& param = (*entryPointFunction)[i];
   9793                 TType& paramType = *param.type;
   9794 
   9795                 if (paramType.getQualifier().isParamOutput()) {
   9796                     error(loc, "unimplemented: entry point outputs in patch constant function invocation", "", "");
   9797                     return;
   9798                 }
   9799 
   9800                 if (paramType.getQualifier().isParamInput())  {
   9801                     TIntermTyped* arg = nullptr;
   9802                     if ((*entryPointFunction)[i].getDeclaredBuiltIn() == EbvInvocationId) {
   9803                         // substitute invocation ID with the array element ID
   9804                         arg = intermediate.addConstantUnion(cpt, loc);
   9805                     } else {
   9806                         TVariable* argVar = makeInternalVariable(*param.name, *param.type);
   9807                         argVar->getWritableType().getQualifier().makeTemporary();
   9808                         arg = intermediate.addSymbol(*argVar);
   9809                     }
   9810 
   9811                     handleFunctionArgument(&callee, callingArgs, arg);
   9812                 }
   9813             }
   9814 
   9815             // Call and assign to per ctrl point variable
   9816             currentCaller = intermediate.getEntryPointMangledName().c_str();
   9817             TIntermTyped* callReturn = handleFunctionCall(loc, &callee, callingArgs);
   9818             TIntermTyped* index = intermediate.addConstantUnion(cpt, loc);
   9819             TIntermSymbol* perCtrlPtSym = intermediate.addSymbol(*perCtrlPtVar, loc);
   9820             TIntermTyped* element = intermediate.addIndex(EOpIndexDirect, perCtrlPtSym, index, loc);
   9821             element->setType(derefType);
   9822             element->setLoc(loc);
   9823 
   9824             pcfCallSequence = intermediate.growAggregate(pcfCallSequence,
   9825                                                          handleAssign(loc, EOpAssign, element, callReturn));
   9826         }
   9827     }
   9828 
   9829     // ================ Step 3: Create return Sequence ================
   9830     // Return sequence: copy PCF result to a temporary, then to shader output variable.
   9831     if (pcfCall->getBasicType() != EbtVoid) {
   9832         const TType* retType = &patchConstantFunction.getType();  // return type from the PCF
   9833         TType outType; // output type that goes with the return type.
   9834         outType.shallowCopy(*retType);
   9835 
   9836         // substitute the output type
   9837         const auto newLists = ioTypeMap.find(retType->getStruct());
   9838         if (newLists != ioTypeMap.end())
   9839             outType.setStruct(newLists->second.output);
   9840 
   9841         // Substitute the top level type's built-in type
   9842         if (patchConstantFunction.getDeclaredBuiltInType() != EbvNone)
   9843             outType.getQualifier().builtIn = patchConstantFunction.getDeclaredBuiltInType();
   9844 
   9845         outType.getQualifier().patch = true; // make it a per-patch variable
   9846 
   9847         TVariable* pcfOutput = makeInternalVariable("@patchConstantOutput", outType);
   9848         pcfOutput->getWritableType().getQualifier().storage = EvqVaryingOut;
   9849 
   9850         if (pcfOutput->getType().containsBuiltIn())
   9851             split(*pcfOutput);
   9852 
   9853         assignToInterface(*pcfOutput);
   9854 
   9855         TIntermSymbol* pcfOutputSym = intermediate.addSymbol(*pcfOutput, loc);
   9856 
   9857         // The call to the PCF is a complex R-value: we want to store it in a temp to avoid
   9858         // repeated calls to the PCF:
   9859         TVariable* pcfCallResult = makeInternalVariable("@patchConstantResult", *retType);
   9860         pcfCallResult->getWritableType().getQualifier().makeTemporary();
   9861 
   9862         TIntermSymbol* pcfResultVar = intermediate.addSymbol(*pcfCallResult, loc);
   9863         TIntermNode* pcfResultAssign = handleAssign(loc, EOpAssign, pcfResultVar, pcfCall);
   9864         TIntermNode* pcfResultToOut = handleAssign(loc, EOpAssign, pcfOutputSym,
   9865                                                    intermediate.addSymbol(*pcfCallResult, loc));
   9866 
   9867         pcfCallSequence = intermediate.growAggregate(pcfCallSequence, pcfResultAssign);
   9868         pcfCallSequence = intermediate.growAggregate(pcfCallSequence, pcfResultToOut);
   9869     } else {
   9870         pcfCallSequence = intermediate.growAggregate(pcfCallSequence, pcfCall);
   9871     }
   9872 
   9873     // ================ Step 4: Barrier ================
   9874     TIntermTyped* barrier = new TIntermAggregate(EOpBarrier);
   9875     barrier->setLoc(loc);
   9876     barrier->setType(TType(EbtVoid));
   9877     epBodySeq.insert(epBodySeq.end(), barrier);
   9878 
   9879     // ================ Step 5: Test on invocation ID ================
   9880     TIntermTyped* zero = intermediate.addConstantUnion(0, loc, true);
   9881     TIntermTyped* cmp =  intermediate.addBinaryNode(EOpEqual, invocationIdSym, zero, loc, TType(EbtBool));
   9882 
   9883 
   9884     // ================ Step 5B: Create if statement on Invocation ID == 0 ================
   9885     intermediate.setAggregateOperator(pcfCallSequence, EOpSequence, TType(EbtVoid), loc);
   9886     TIntermTyped* invocationIdTest = new TIntermSelection(cmp, pcfCallSequence, nullptr);
   9887     invocationIdTest->setLoc(loc);
   9888 
   9889     // add our test sequence before the return.
   9890     epBodySeq.insert(epBodySeq.end(), invocationIdTest);
   9891 }
   9892 
   9893 // Finalization step: remove unused buffer blocks from linkage (we don't know until the
   9894 // shader is entirely compiled).
   9895 // Preserve order of remaining symbols.
   9896 void HlslParseContext::removeUnusedStructBufferCounters()
   9897 {
   9898     const auto endIt = std::remove_if(linkageSymbols.begin(), linkageSymbols.end(),
   9899                                       [this](const TSymbol* sym) {
   9900                                           const auto sbcIt = structBufferCounter.find(sym->getName());
   9901                                           return sbcIt != structBufferCounter.end() && !sbcIt->second;
   9902                                       });
   9903 
   9904     linkageSymbols.erase(endIt, linkageSymbols.end());
   9905 }
   9906 
   9907 // Finalization step: patch texture shadow modes to match samplers they were combined with
   9908 void HlslParseContext::fixTextureShadowModes()
   9909 {
   9910     for (auto symbol = linkageSymbols.begin(); symbol != linkageSymbols.end(); ++symbol) {
   9911         TSampler& sampler = (*symbol)->getWritableType().getSampler();
   9912 
   9913         if (sampler.isTexture()) {
   9914             const auto shadowMode = textureShadowVariant.find((*symbol)->getUniqueId());
   9915             if (shadowMode != textureShadowVariant.end()) {
   9916 
   9917                 if (shadowMode->second->overloaded())
   9918                     // Texture needs legalization if it's been seen with both shadow and non-shadow modes.
   9919                     intermediate.setNeedsLegalization();
   9920 
   9921                 sampler.shadow = shadowMode->second->isShadowId((*symbol)->getUniqueId());
   9922             }
   9923         }
   9924     }
   9925 }
   9926 
   9927 // Finalization step: patch append methods to use proper stream output, which isn't known until
   9928 // main is parsed, which could happen after the append method is parsed.
   9929 void HlslParseContext::finalizeAppendMethods()
   9930 {
   9931     TSourceLoc loc;
   9932     loc.init();
   9933 
   9934     // Nothing to do: bypass test for valid stream output.
   9935     if (gsAppends.empty())
   9936         return;
   9937 
   9938     if (gsStreamOutput == nullptr) {
   9939         error(loc, "unable to find output symbol for Append()", "", "");
   9940         return;
   9941     }
   9942 
   9943     // Patch append sequences, now that we know the stream output symbol.
   9944     for (auto append = gsAppends.begin(); append != gsAppends.end(); ++append) {
   9945         append->node->getSequence()[0] =
   9946             handleAssign(append->loc, EOpAssign,
   9947                          intermediate.addSymbol(*gsStreamOutput, append->loc),
   9948                          append->node->getSequence()[0]->getAsTyped());
   9949     }
   9950 }
   9951 
   9952 // post-processing
   9953 void HlslParseContext::finish()
   9954 {
   9955     // Error check: There was a dangling .mips operator.  These are not nested constructs in the grammar, so
   9956     // cannot be detected there.  This is not strictly needed in a non-validating parser; it's just helpful.
   9957     if (! mipsOperatorMipArg.empty()) {
   9958         error(mipsOperatorMipArg.back().loc, "unterminated mips operator:", "", "");
   9959     }
   9960 
   9961     removeUnusedStructBufferCounters();
   9962     addPatchConstantInvocation();
   9963     fixTextureShadowModes();
   9964     finalizeAppendMethods();
   9965 
   9966     // Communicate out (esp. for command line) that we formed AST that will make
   9967     // illegal AST SPIR-V and it needs transforms to legalize it.
   9968     if (intermediate.needsLegalization() && (messages & EShMsgHlslLegalization))
   9969         infoSink.info << "WARNING: AST will form illegal SPIR-V; need to transform to legalize";
   9970 
   9971     TParseContextBase::finish();
   9972 }
   9973 
   9974 } // end namespace glslang
   9975