Home | History | Annotate | Download | only in core
      1 /****************************************************************************
      2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice (including the next
     12 * paragraph) shall be included in all copies or substantial portions of the
     13 * Software.
     14 *
     15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21 * IN THE SOFTWARE.
     22 *
     23 * @file state.h
     24 *
     25 * @brief Definitions for API state.
     26 *
     27 ******************************************************************************/
     28 #pragma once
     29 
     30 #include "common/formats.h"
     31 #include "common/intrin.h"
     32 using gfxptr_t = unsigned long long;
     33 #include <functional>
     34 #include <algorithm>
     35 
     36 //////////////////////////////////////////////////////////////////////////
     37 /// PRIMITIVE_TOPOLOGY.
     38 //////////////////////////////////////////////////////////////////////////
     39 enum PRIMITIVE_TOPOLOGY
     40 {
     41     TOP_UNKNOWN = 0x0,
     42     TOP_POINT_LIST = 0x1,
     43     TOP_LINE_LIST = 0x2,
     44     TOP_LINE_STRIP = 0x3,
     45     TOP_TRIANGLE_LIST = 0x4,
     46     TOP_TRIANGLE_STRIP = 0x5,
     47     TOP_TRIANGLE_FAN = 0x6,
     48     TOP_QUAD_LIST = 0x7,
     49     TOP_QUAD_STRIP = 0x8,
     50     TOP_LINE_LIST_ADJ = 0x9,
     51     TOP_LISTSTRIP_ADJ = 0xA,
     52     TOP_TRI_LIST_ADJ = 0xB,
     53     TOP_TRI_STRIP_ADJ = 0xC,
     54     TOP_TRI_STRIP_REVERSE = 0xD,
     55     TOP_POLYGON = 0xE,
     56     TOP_RECT_LIST = 0xF,
     57     TOP_LINE_LOOP = 0x10,
     58     TOP_POINT_LIST_BF = 0x11,
     59     TOP_LINE_STRIP_CONT = 0x12,
     60     TOP_LINE_STRIP_BF = 0x13,
     61     TOP_LINE_STRIP_CONT_BF = 0x14,
     62     TOP_TRIANGLE_FAN_NOSTIPPLE = 0x16,
     63     TOP_TRIANGLE_DISC = 0x17,   /// @todo What is this??
     64 
     65     TOP_PATCHLIST_BASE = 0x1F,  // Invalid topology, used to calculate num verts for a patchlist.
     66     TOP_PATCHLIST_1 = 0x20,     // List of 1-vertex patches
     67     TOP_PATCHLIST_2 = 0x21,
     68     TOP_PATCHLIST_3 = 0x22,
     69     TOP_PATCHLIST_4 = 0x23,
     70     TOP_PATCHLIST_5 = 0x24,
     71     TOP_PATCHLIST_6 = 0x25,
     72     TOP_PATCHLIST_7 = 0x26,
     73     TOP_PATCHLIST_8 = 0x27,
     74     TOP_PATCHLIST_9 = 0x28,
     75     TOP_PATCHLIST_10 = 0x29,
     76     TOP_PATCHLIST_11 = 0x2A,
     77     TOP_PATCHLIST_12 = 0x2B,
     78     TOP_PATCHLIST_13 = 0x2C,
     79     TOP_PATCHLIST_14 = 0x2D,
     80     TOP_PATCHLIST_15 = 0x2E,
     81     TOP_PATCHLIST_16 = 0x2F,
     82     TOP_PATCHLIST_17 = 0x30,
     83     TOP_PATCHLIST_18 = 0x31,
     84     TOP_PATCHLIST_19 = 0x32,
     85     TOP_PATCHLIST_20 = 0x33,
     86     TOP_PATCHLIST_21 = 0x34,
     87     TOP_PATCHLIST_22 = 0x35,
     88     TOP_PATCHLIST_23 = 0x36,
     89     TOP_PATCHLIST_24 = 0x37,
     90     TOP_PATCHLIST_25 = 0x38,
     91     TOP_PATCHLIST_26 = 0x39,
     92     TOP_PATCHLIST_27 = 0x3A,
     93     TOP_PATCHLIST_28 = 0x3B,
     94     TOP_PATCHLIST_29 = 0x3C,
     95     TOP_PATCHLIST_30 = 0x3D,
     96     TOP_PATCHLIST_31 = 0x3E,
     97     TOP_PATCHLIST_32 = 0x3F,   // List of 32-vertex patches
     98 };
     99 
    100 //////////////////////////////////////////////////////////////////////////
    101 /// SWR_SHADER_TYPE
    102 //////////////////////////////////////////////////////////////////////////
    103 enum SWR_SHADER_TYPE
    104 {
    105     SHADER_VERTEX,
    106     SHADER_GEOMETRY,
    107     SHADER_DOMAIN,
    108     SHADER_HULL,
    109     SHADER_PIXEL,
    110     SHADER_COMPUTE,
    111 
    112     NUM_SHADER_TYPES,
    113 };
    114 
    115 //////////////////////////////////////////////////////////////////////////
    116 /// SWR_RENDERTARGET_ATTACHMENT
    117 /// @todo Its not clear what an "attachment" means. Its not common term.
    118 //////////////////////////////////////////////////////////////////////////
    119 enum SWR_RENDERTARGET_ATTACHMENT
    120 {
    121     SWR_ATTACHMENT_COLOR0,
    122     SWR_ATTACHMENT_COLOR1,
    123     SWR_ATTACHMENT_COLOR2,
    124     SWR_ATTACHMENT_COLOR3,
    125     SWR_ATTACHMENT_COLOR4,
    126     SWR_ATTACHMENT_COLOR5,
    127     SWR_ATTACHMENT_COLOR6,
    128     SWR_ATTACHMENT_COLOR7,
    129     SWR_ATTACHMENT_DEPTH,
    130     SWR_ATTACHMENT_STENCIL,
    131 
    132     SWR_NUM_ATTACHMENTS
    133 };
    134 
    135 #define SWR_NUM_RENDERTARGETS 8
    136 
    137 #define SWR_ATTACHMENT_COLOR0_BIT 0x001
    138 #define SWR_ATTACHMENT_COLOR1_BIT 0x002
    139 #define SWR_ATTACHMENT_COLOR2_BIT 0x004
    140 #define SWR_ATTACHMENT_COLOR3_BIT 0x008
    141 #define SWR_ATTACHMENT_COLOR4_BIT 0x010
    142 #define SWR_ATTACHMENT_COLOR5_BIT 0x020
    143 #define SWR_ATTACHMENT_COLOR6_BIT 0x040
    144 #define SWR_ATTACHMENT_COLOR7_BIT 0x080
    145 #define SWR_ATTACHMENT_DEPTH_BIT 0x100
    146 #define SWR_ATTACHMENT_STENCIL_BIT 0x200
    147 #define SWR_ATTACHMENT_MASK_ALL 0x3ff
    148 #define SWR_ATTACHMENT_MASK_COLOR 0x0ff
    149 
    150 
    151 //////////////////////////////////////////////////////////////////////////
    152 /// @brief SWR Inner Tessellation factor ID
    153 /// See above GetTessFactorOutputPosition code for documentation
    154 enum SWR_INNER_TESSFACTOR_ID
    155 {
    156     SWR_QUAD_U_TRI_INSIDE,
    157     SWR_QUAD_V_INSIDE,
    158 
    159     SWR_NUM_INNER_TESS_FACTORS,
    160 };
    161 
    162 //////////////////////////////////////////////////////////////////////////
    163 /// @brief SWR Outer Tessellation factor ID
    164 /// See above GetTessFactorOutputPosition code for documentation
    165 enum SWR_OUTER_TESSFACTOR_ID
    166 {
    167     SWR_QUAD_U_EQ0_TRI_U_LINE_DETAIL,
    168     SWR_QUAD_V_EQ0_TRI_V_LINE_DENSITY,
    169     SWR_QUAD_U_EQ1_TRI_W,
    170     SWR_QUAD_V_EQ1,
    171 
    172     SWR_NUM_OUTER_TESS_FACTORS,
    173 };
    174 
    175 
    176 /////////////////////////////////////////////////////////////////////////
    177 /// simdvertex
    178 /// @brief Defines a vertex element that holds all the data for SIMD vertices.
    179 ///        Contains space for position, SGV, and 32 generic attributes
    180 /////////////////////////////////////////////////////////////////////////
    181 enum SWR_VTX_SLOTS
    182 {
    183     VERTEX_SGV_SLOT                 = 0,
    184         VERTEX_SGV_RTAI_COMP        = 0,
    185         VERTEX_SGV_VAI_COMP         = 1,
    186         VERTEX_SGV_POINT_SIZE_COMP  = 2,
    187     VERTEX_POSITION_SLOT            = 1,
    188     VERTEX_POSITION_END_SLOT        = 1,
    189     VERTEX_CLIPCULL_DIST_LO_SLOT    = (1 + VERTEX_POSITION_END_SLOT), // VS writes lower 4 clip/cull dist
    190     VERTEX_CLIPCULL_DIST_HI_SLOT    = (2 + VERTEX_POSITION_END_SLOT), // VS writes upper 4 clip/cull dist
    191     VERTEX_ATTRIB_START_SLOT        = (3 + VERTEX_POSITION_END_SLOT),
    192     VERTEX_ATTRIB_END_SLOT          = (34 + VERTEX_POSITION_END_SLOT),
    193     SWR_VTX_NUM_SLOTS               = (1 + VERTEX_ATTRIB_END_SLOT)
    194 };
    195 
    196 // SoAoSoA
    197 struct simdvertex
    198 {
    199     simdvector      attrib[SWR_VTX_NUM_SLOTS];
    200 };
    201 
    202 #if ENABLE_AVX512_SIMD16
    203 struct simd16vertex
    204 {
    205     simd16vector    attrib[SWR_VTX_NUM_SLOTS];
    206 };
    207 
    208 #endif
    209 
    210 template<typename SIMD_T>
    211 struct SIMDVERTEX_T
    212 {
    213     typename SIMD_T::Vec4               attrib[SWR_VTX_NUM_SLOTS];
    214 };
    215 
    216 //////////////////////////////////////////////////////////////////////////
    217 /// SWR_VS_CONTEXT
    218 /// @brief Input to vertex shader
    219 /////////////////////////////////////////////////////////////////////////
    220 struct SWR_VS_CONTEXT
    221 {
    222     simdvertex* pVin;           // IN: SIMD input vertex data store
    223     simdvertex* pVout;          // OUT: SIMD output vertex data store
    224 
    225     uint32_t InstanceID;        // IN: Instance ID, constant across all verts of the SIMD
    226     simdscalari VertexID;       // IN: Vertex ID
    227     simdscalari mask;           // IN: Active mask for shader
    228 #if USE_SIMD16_FRONTEND
    229     uint32_t AlternateOffset;   // IN: amount to offset for interleaving even/odd simd8 in simd16vertex output
    230 #if USE_SIMD16_VS
    231     simd16scalari mask16;	// IN: Active mask for shader (16-wide)
    232     simd16scalari VertexID16;	// IN: Vertex ID (16-wide)
    233 #endif
    234 #endif
    235 };
    236 
    237 /////////////////////////////////////////////////////////////////////////
    238 /// ScalarCPoint
    239 /// @brief defines a control point element as passed from the output
    240 /// of the hull shader to the input of the domain shader
    241 /////////////////////////////////////////////////////////////////////////
    242 struct ScalarAttrib
    243 {
    244     float x;
    245     float y;
    246     float z;
    247     float w;
    248 };
    249 
    250 struct ScalarCPoint
    251 {
    252     ScalarAttrib attrib[SWR_VTX_NUM_SLOTS];
    253 };
    254 
    255 //////////////////////////////////////////////////////////////////////////
    256 /// SWR_TESSELLATION_FACTORS
    257 /// @brief Tessellation factors structure (non-vector)
    258 /////////////////////////////////////////////////////////////////////////
    259 struct SWR_TESSELLATION_FACTORS
    260 {
    261     float  OuterTessFactors[SWR_NUM_OUTER_TESS_FACTORS];
    262     float  InnerTessFactors[SWR_NUM_INNER_TESS_FACTORS];
    263 };
    264 
    265 #define MAX_NUM_VERTS_PER_PRIM 32 // support up to 32 control point patches
    266 struct ScalarPatch
    267 {
    268     SWR_TESSELLATION_FACTORS tessFactors;
    269     ScalarCPoint cp[MAX_NUM_VERTS_PER_PRIM];
    270     ScalarCPoint patchData;
    271 };
    272 
    273 //////////////////////////////////////////////////////////////////////////
    274 /// SWR_HS_CONTEXT
    275 /// @brief Input to hull shader
    276 /////////////////////////////////////////////////////////////////////////
    277 struct SWR_HS_CONTEXT
    278 {
    279     simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: (SIMD) input primitive data
    280     simdscalari PrimitiveID;    // IN: (SIMD) primitive ID generated from the draw call
    281     simdscalari mask;           // IN: Active mask for shader
    282     ScalarPatch* pCPout;        // OUT: Output control point patch
    283                                 // SIMD-sized-array of SCALAR patches
    284 };
    285 
    286 //////////////////////////////////////////////////////////////////////////
    287 /// SWR_DS_CONTEXT
    288 /// @brief Input to domain shader
    289 /////////////////////////////////////////////////////////////////////////
    290 struct SWR_DS_CONTEXT
    291 {
    292     uint32_t        PrimitiveID;    // IN: (SCALAR) PrimitiveID for the patch associated with the DS invocation
    293     uint32_t        vectorOffset;   // IN: (SCALAR) vector index offset into SIMD data.
    294     uint32_t        vectorStride;   // IN: (SCALAR) stride (in vectors) of output data per attribute-component
    295     uint32_t        outVertexAttribOffset; // IN: (SCALAR) Offset to the attributes as processed by the next shader stage.
    296     ScalarPatch*    pCpIn;          // IN: (SCALAR) Control patch
    297     simdscalar*     pDomainU;       // IN: (SIMD) Domain Point U coords
    298     simdscalar*     pDomainV;       // IN: (SIMD) Domain Point V coords
    299     simdscalari     mask;           // IN: Active mask for shader
    300     simdscalar*     pOutputData;    // OUT: (SIMD) Vertex Attributes (2D array of vectors, one row per attribute-component)
    301 };
    302 
    303 //////////////////////////////////////////////////////////////////////////
    304 /// SWR_GS_CONTEXT
    305 /// @brief Input to geometry shader.
    306 /////////////////////////////////////////////////////////////////////////
    307 struct SWR_GS_CONTEXT
    308 {
    309     simdvector* pVerts;                 // IN: input primitive data for SIMD prims
    310     uint32_t inputVertStride;           // IN: input vertex stride, in attributes
    311     simdscalari PrimitiveID;            // IN: input primitive ID generated from the draw call
    312     uint32_t InstanceID;                // IN: input instance ID
    313     simdscalari mask;                   // IN: Active mask for shader
    314     uint8_t* pStreams[KNOB_SIMD_WIDTH]; // OUT: output stream (contains vertices for all output streams)
    315 };
    316 
    317 struct PixelPositions
    318 {
    319     simdscalar UL;
    320     simdscalar center;
    321     simdscalar sample;
    322     simdscalar centroid;
    323 };
    324 
    325 #define SWR_MAX_NUM_MULTISAMPLES 16
    326 
    327 //////////////////////////////////////////////////////////////////////////
    328 /// SWR_PS_CONTEXT
    329 /// @brief Input to pixel shader.
    330 /////////////////////////////////////////////////////////////////////////
    331 struct SWR_PS_CONTEXT
    332 {
    333     PixelPositions vX;          // IN: x location(s) of pixels
    334     PixelPositions vY;          // IN: x location(s) of pixels
    335     simdscalar vZ;              // INOUT: z location of pixels
    336     simdscalari activeMask;     // OUT: mask for kill
    337     simdscalar  inputMask;      // IN: input coverage mask for all samples
    338     simdscalari oMask;          // OUT: mask for output coverage
    339 
    340     PixelPositions vI;          // barycentric coords evaluated at pixel center, sample position, centroid
    341     PixelPositions vJ;
    342     PixelPositions vOneOverW;   // IN: 1/w
    343 
    344     const float* pAttribs;      // IN: pointer to attribute barycentric coefficients
    345     const float* pPerspAttribs; // IN: pointer to attribute/w barycentric coefficients
    346     const float* pRecipW;       // IN: pointer to 1/w coord for each vertex
    347     const float *I;             // IN: Barycentric A, B, and C coefs used to compute I
    348     const float *J;             // IN: Barycentric A, B, and C coefs used to compute J
    349     float recipDet;             // IN: 1/Det, used when barycentric interpolating attributes
    350     const float* pSamplePosX;   // IN: array of sample positions
    351     const float* pSamplePosY;   // IN: array of sample positions
    352     simdvector shaded[SWR_NUM_RENDERTARGETS];
    353                                 // OUT: result color per rendertarget
    354 
    355     uint32_t frontFace;                 // IN: front- 1, back- 0
    356     uint32_t sampleIndex;               // IN: sampleIndex
    357     uint32_t renderTargetArrayIndex;    // IN: render target array index from GS
    358     uint32_t rasterizerSampleCount;     // IN: sample count used by the rasterizer
    359 
    360     uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS]; // IN: Pointers to render target hottiles
    361 };
    362 
    363 //////////////////////////////////////////////////////////////////////////
    364 /// SWR_CS_CONTEXT
    365 /// @brief Input to compute shader.
    366 /////////////////////////////////////////////////////////////////////////
    367 struct SWR_CS_CONTEXT
    368 {
    369     // The ThreadGroupId is the current thread group index relative
    370     // to all thread groups in the Dispatch call. The ThreadId, ThreadIdInGroup,
    371     // and ThreadIdInGroupFlattened can be derived from ThreadGroupId in the shader.
    372 
    373     // Compute shader accepts the following system values.
    374     // o ThreadId - Current thread id relative to all other threads in dispatch.
    375     // o ThreadGroupId - Current thread group id relative to all other groups in dispatch.
    376     // o ThreadIdInGroup - Current thread relative to all threads in the current thread group.
    377     // o ThreadIdInGroupFlattened - Flattened linear id derived from ThreadIdInGroup.
    378     //
    379     // All of these system values can be computed in the shader. They will be
    380     // derived from the current tile counter. The tile counter is an atomic counter that
    381     // resides in the draw context and is initialized to the product of the dispatch dims.
    382     //
    383     //  tileCounter = dispatchDims.x * dispatchDims.y * dispatchDims.z
    384     //
    385     // Each CPU worker thread will atomically decrement this counter and passes the current
    386     // count into the shader. When the count reaches 0 then all thread groups in the
    387     // dispatch call have been completed.
    388 
    389     uint32_t tileCounter;  // The tile counter value for this thread group.
    390 
    391     // Dispatch dimensions used by shader to compute system values from the tile counter.
    392     uint32_t dispatchDims[3];
    393 
    394     uint8_t* pTGSM;  // Thread Group Shared Memory pointer.
    395 
    396     uint8_t* pSpillFillBuffer;  // Spill/fill buffer for barrier support
    397 
    398     uint8_t* pScratchSpace;     // Pointer to scratch space buffer used by the shader, shader is responsible
    399                                 // for subdividing scratch space per instance/simd
    400 
    401     uint32_t scratchSpacePerSimd; // Scratch space per work item x SIMD_WIDTH
    402 };
    403 
    404 // enums
    405 enum SWR_TILE_MODE
    406 {
    407     SWR_TILE_NONE = 0x0,    // Linear mode (no tiling)
    408     SWR_TILE_MODE_WMAJOR,   // W major tiling
    409     SWR_TILE_MODE_XMAJOR,   // X major tiling
    410     SWR_TILE_MODE_YMAJOR,   // Y major tiling
    411     SWR_TILE_SWRZ,          // SWR-Z tiling
    412 
    413     SWR_TILE_MODE_COUNT
    414 };
    415 
    416 enum SWR_SURFACE_TYPE
    417 {
    418     SURFACE_1D        = 0,
    419     SURFACE_2D        = 1,
    420     SURFACE_3D        = 2,
    421     SURFACE_CUBE      = 3,
    422     SURFACE_BUFFER    = 4,
    423     SURFACE_STRUCTURED_BUFFER = 5,
    424     SURFACE_NULL       = 7
    425 };
    426 
    427 enum SWR_ZFUNCTION
    428 {
    429     ZFUNC_ALWAYS,
    430     ZFUNC_NEVER,
    431     ZFUNC_LT,
    432     ZFUNC_EQ,
    433     ZFUNC_LE,
    434     ZFUNC_GT,
    435     ZFUNC_NE,
    436     ZFUNC_GE,
    437     NUM_ZFUNC
    438 };
    439 
    440 enum SWR_STENCILOP
    441 {
    442     STENCILOP_KEEP,
    443     STENCILOP_ZERO,
    444     STENCILOP_REPLACE,
    445     STENCILOP_INCRSAT,
    446     STENCILOP_DECRSAT,
    447     STENCILOP_INCR,
    448     STENCILOP_DECR,
    449     STENCILOP_INVERT
    450 };
    451 
    452 enum SWR_BLEND_FACTOR
    453 {
    454     BLENDFACTOR_ONE,
    455     BLENDFACTOR_SRC_COLOR,
    456     BLENDFACTOR_SRC_ALPHA,
    457     BLENDFACTOR_DST_ALPHA,
    458     BLENDFACTOR_DST_COLOR,
    459     BLENDFACTOR_SRC_ALPHA_SATURATE,
    460     BLENDFACTOR_CONST_COLOR,
    461     BLENDFACTOR_CONST_ALPHA,
    462     BLENDFACTOR_SRC1_COLOR,
    463     BLENDFACTOR_SRC1_ALPHA,
    464     BLENDFACTOR_ZERO,
    465     BLENDFACTOR_INV_SRC_COLOR,
    466     BLENDFACTOR_INV_SRC_ALPHA,
    467     BLENDFACTOR_INV_DST_ALPHA,
    468     BLENDFACTOR_INV_DST_COLOR,
    469     BLENDFACTOR_INV_CONST_COLOR,
    470     BLENDFACTOR_INV_CONST_ALPHA,
    471     BLENDFACTOR_INV_SRC1_COLOR,
    472     BLENDFACTOR_INV_SRC1_ALPHA
    473 };
    474 
    475 enum SWR_BLEND_OP
    476 {
    477     BLENDOP_ADD,
    478     BLENDOP_SUBTRACT,
    479     BLENDOP_REVSUBTRACT,
    480     BLENDOP_MIN,
    481     BLENDOP_MAX,
    482 };
    483 
    484 enum SWR_LOGIC_OP
    485 {
    486     LOGICOP_CLEAR,
    487     LOGICOP_NOR,
    488     LOGICOP_AND_INVERTED,
    489     LOGICOP_COPY_INVERTED,
    490     LOGICOP_AND_REVERSE,
    491     LOGICOP_INVERT,
    492     LOGICOP_XOR,
    493     LOGICOP_NAND,
    494     LOGICOP_AND,
    495     LOGICOP_EQUIV,
    496     LOGICOP_NOOP,
    497     LOGICOP_OR_INVERTED,
    498     LOGICOP_COPY,
    499     LOGICOP_OR_REVERSE,
    500     LOGICOP_OR,
    501     LOGICOP_SET,
    502 };
    503 
    504 //////////////////////////////////////////////////////////////////////////
    505 /// SWR_AUX_MODE
    506 /// @brief Specifies how the auxiliary buffer is used by the driver.
    507 //////////////////////////////////////////////////////////////////////////
    508 enum SWR_AUX_MODE
    509 {
    510     AUX_MODE_NONE,
    511     AUX_MODE_COLOR,
    512     AUX_MODE_UAV,
    513     AUX_MODE_DEPTH,
    514 };
    515 
    516 //////////////////////////////////////////////////////////////////////////
    517 /// SWR_SURFACE_STATE
    518 //////////////////////////////////////////////////////////////////////////
    519 struct SWR_SURFACE_STATE
    520 {
    521     gfxptr_t xpBaseAddress;
    522     SWR_SURFACE_TYPE type;  // @llvm_enum
    523     SWR_FORMAT format;      // @llvm_enum
    524     uint32_t width;
    525     uint32_t height;
    526     uint32_t depth;
    527     uint32_t numSamples;
    528     uint32_t samplePattern;
    529     uint32_t pitch;
    530     uint32_t qpitch;
    531     uint32_t minLod;            // for sampled surfaces, the most detailed LOD that can be accessed by sampler
    532     uint32_t maxLod;            // for sampled surfaces, the max LOD that can be accessed
    533     float resourceMinLod;       // for sampled surfaces, the most detailed fractional mip that can be accessed by sampler
    534     uint32_t lod;               // for render targets, the lod being rendered to
    535     uint32_t arrayIndex;        // for render targets, the array index being rendered to for arrayed surfaces
    536     SWR_TILE_MODE tileMode;     // @llvm_enum
    537     uint32_t halign;
    538     uint32_t valign;
    539     uint32_t xOffset;
    540     uint32_t yOffset;
    541 
    542     uint32_t lodOffsets[2][15]; // lod offsets for sampled surfaces
    543 
    544     gfxptr_t xpAuxBaseAddress;   // Used for compression, append/consume counter, etc.
    545     SWR_AUX_MODE auxMode;      // @llvm_enum
    546 
    547 
    548     bool bInterleavedSamples;   // are MSAA samples stored interleaved or planar
    549 };
    550 
    551 // vertex fetch state
    552 // WARNING- any changes to this struct need to be reflected
    553 // in the fetch shader jit
    554 struct SWR_VERTEX_BUFFER_STATE
    555 {
    556     uint32_t index;
    557     uint32_t pitch;
    558     const uint8_t *pData;
    559     uint32_t size;
    560     uint32_t numaNode;
    561     uint32_t minVertex;             // min vertex (for bounds checking)
    562     uint32_t maxVertex;             // size / pitch.  precalculated value used by fetch shader for OOB checks
    563     uint32_t partialInboundsSize;   // size % pitch.  precalculated value used by fetch shader for partially OOB vertices
    564 };
    565 
    566 struct SWR_INDEX_BUFFER_STATE
    567 {
    568     // Format type for indices (e.g. UINT16, UINT32, etc.)
    569     SWR_FORMAT format; // @llvm_enum
    570     const void *pIndices;
    571     uint32_t size;
    572 };
    573 
    574 
    575 //////////////////////////////////////////////////////////////////////////
    576 /// SWR_FETCH_CONTEXT
    577 /// @brief Input to fetch shader.
    578 /// @note WARNING - Changes to this struct need to be reflected in the
    579 ///                 fetch shader jit.
    580 /////////////////////////////////////////////////////////////////////////
    581 struct SWR_FETCH_CONTEXT
    582 {
    583     const SWR_VERTEX_BUFFER_STATE* pStreams;    // IN: array of bound vertex buffers
    584     const int32_t* pIndices;                    // IN: pointer to index buffer for indexed draws
    585     const int32_t* pLastIndex;                  // IN: pointer to end of index buffer, used for bounds checking
    586     uint32_t CurInstance;                       // IN: current instance
    587     uint32_t BaseVertex;                        // IN: base vertex
    588     uint32_t StartVertex;                       // IN: start vertex
    589     uint32_t StartInstance;                     // IN: start instance
    590     simdscalari VertexID;                       // OUT: vector of vertex IDs
    591     simdscalari CutMask;                        // OUT: vector mask of indices which have the cut index value
    592 #if USE_SIMD16_SHADERS
    593 //    simd16scalari VertexID;                     // OUT: vector of vertex IDs
    594 //    simd16scalari CutMask;                      // OUT: vector mask of indices which have the cut index value
    595     simdscalari VertexID2;                      // OUT: vector of vertex IDs
    596     simdscalari CutMask2;                       // OUT: vector mask of indices which have the cut index value
    597 #endif
    598 };
    599 
    600 //////////////////////////////////////////////////////////////////////////
    601 /// SWR_STATS
    602 ///
    603 /// @brief All statistics generated by SWR go here. These are public
    604 ///        to driver.
    605 /////////////////////////////////////////////////////////////////////////
    606 OSALIGNLINE(struct) SWR_STATS
    607 {
    608     // Occlusion Query
    609     uint64_t DepthPassCount; // Number of passing depth tests. Not exact.
    610 
    611     // Pipeline Stats
    612     uint64_t PsInvocations;  // Number of Pixel Shader invocations
    613     uint64_t CsInvocations;  // Number of Compute Shader invocations
    614 
    615 };
    616 
    617 //////////////////////////////////////////////////////////////////////////
    618 /// SWR_STATS
    619 ///
    620 /// @brief All statistics generated by FE.
    621 /////////////////////////////////////////////////////////////////////////
    622 OSALIGNLINE(struct) SWR_STATS_FE
    623 {
    624     uint64_t IaVertices;    // Number of Fetch Shader vertices
    625     uint64_t IaPrimitives;  // Number of PA primitives.
    626     uint64_t VsInvocations; // Number of Vertex Shader invocations
    627     uint64_t HsInvocations; // Number of Hull Shader invocations
    628     uint64_t DsInvocations; // Number of Domain Shader invocations
    629     uint64_t GsInvocations; // Number of Geometry Shader invocations
    630     uint64_t GsPrimitives;  // Number of prims GS outputs.
    631     uint64_t CInvocations;  // Number of clipper invocations
    632     uint64_t CPrimitives;   // Number of clipper primitives.
    633 
    634     // Streamout Stats
    635     uint64_t SoPrimStorageNeeded[4];
    636     uint64_t SoNumPrimsWritten[4];
    637 };
    638 
    639 //////////////////////////////////////////////////////////////////////////
    640 /// STREAMOUT_BUFFERS
    641 /////////////////////////////////////////////////////////////////////////
    642 
    643 #define MAX_SO_STREAMS 4
    644 #define MAX_SO_BUFFERS 4
    645 #define MAX_ATTRIBUTES 32
    646 
    647 struct SWR_STREAMOUT_BUFFER
    648 {
    649     bool enable;
    650     bool soWriteEnable;
    651 
    652     // Pointers to streamout buffers.
    653     uint32_t* pBuffer;
    654 
    655     // Size of buffer in dwords.
    656     uint32_t bufferSize;
    657 
    658     // Vertex pitch of buffer in dwords.
    659     uint32_t pitch;
    660 
    661     // Offset into buffer in dwords. SOS will increment this offset.
    662     uint32_t streamOffset;
    663 
    664     // Offset to the SO write offset. If not null then we update offset here.
    665     uint32_t* pWriteOffset;
    666 
    667 };
    668 
    669 //////////////////////////////////////////////////////////////////////////
    670 /// STREAMOUT_STATE
    671 /////////////////////////////////////////////////////////////////////////
    672 struct SWR_STREAMOUT_STATE
    673 {
    674     // This disables stream output.
    675     bool soEnable;
    676 
    677     // which streams are enabled for streamout
    678     bool streamEnable[MAX_SO_STREAMS];
    679 
    680     // If set then do not send any streams to the rasterizer.
    681     bool rasterizerDisable;
    682 
    683     // Specifies which stream to send to the rasterizer.
    684     uint32_t streamToRasterizer;
    685 
    686     // The stream masks specify which attributes are sent to which streams.
    687     // These masks help the FE to setup the pPrimData buffer that is passed
    688     // the Stream Output Shader (SOS) function.
    689     uint32_t streamMasks[MAX_SO_STREAMS];
    690 
    691     // Number of attributes, including position, per vertex that are streamed out.
    692     // This should match number of bits in stream mask.
    693     uint32_t streamNumEntries[MAX_SO_STREAMS];
    694 
    695     // Offset to the start of the attributes of the input vertices, in simdvector units
    696     uint32_t vertexAttribOffset[MAX_SO_STREAMS];
    697 };
    698 
    699 //////////////////////////////////////////////////////////////////////////
    700 /// STREAMOUT_CONTEXT - Passed to SOS
    701 /////////////////////////////////////////////////////////////////////////
    702 struct SWR_STREAMOUT_CONTEXT
    703 {
    704     uint32_t* pPrimData;
    705     SWR_STREAMOUT_BUFFER* pBuffer[MAX_SO_STREAMS];
    706 
    707     // Num prims written for this stream
    708     uint32_t numPrimsWritten;
    709 
    710     // Num prims that should have been written if there were no overflow.
    711     uint32_t numPrimStorageNeeded;
    712 };
    713 
    714 //////////////////////////////////////////////////////////////////////////
    715 /// SWR_GS_STATE - Geometry shader state
    716 /////////////////////////////////////////////////////////////////////////
    717 struct SWR_GS_STATE
    718 {
    719     bool gsEnable;
    720 
    721     // Number of input attributes per vertex. Used by the frontend to
    722     // optimize assembling primitives for GS
    723     uint32_t numInputAttribs;
    724 
    725     // Stride of incoming verts in attributes
    726     uint32_t inputVertStride;
    727 
    728     // Output topology - can be point, tristrip, or linestrip
    729     PRIMITIVE_TOPOLOGY outputTopology;      // @llvm_enum
    730 
    731     // Maximum number of verts that can be emitted by a single instance of the GS
    732     uint32_t maxNumVerts;
    733 
    734     // Instance count
    735     uint32_t instanceCount;
    736 
    737     // If true, geometry shader emits a single stream, with separate cut buffer.
    738     // If false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer
    739     // to map vertices to streams
    740     bool isSingleStream;
    741 
    742     // When single stream is enabled, singleStreamID dictates which stream is being output.
    743     // field ignored if isSingleStream is false
    744     uint32_t singleStreamID;
    745 
    746     // Total amount of memory to allocate for one instance of the shader output in bytes
    747     uint32_t allocationSize;
    748 
    749     // Offset to the start of the attributes of the input vertices, in simdvector units, as read by the GS
    750     uint32_t vertexAttribOffset;
    751 
    752     // Offset to the attributes as stored by the preceding shader stage.
    753     uint32_t srcVertexAttribOffset;
    754 
    755     // Size of the control data section which contains cut or streamID data, in simdscalar units. Should be sized to handle
    756     // the maximum number of verts output by the GS. Can be 0 if there are no cuts or streamID bits.
    757     uint32_t controlDataSize;
    758 
    759     // Offset to the control data section, in bytes
    760     uint32_t controlDataOffset;
    761 
    762     // Total size of an output vertex, in simdvector units
    763     uint32_t outputVertexSize;
    764 
    765     // Offset to the start of the vertex section, in bytes
    766     uint32_t outputVertexOffset;
    767 
    768     // Set this to non-zero to indicate that the shader outputs a static number of verts. If zero, shader is
    769     // expected to store the final vertex count in the first dword of the gs output stream.
    770     uint32_t staticVertexCount;
    771 };
    772 
    773 
    774 //////////////////////////////////////////////////////////////////////////
    775 /// SWR_TS_OUTPUT_TOPOLOGY - Defines data output by the tessellator / DS
    776 /////////////////////////////////////////////////////////////////////////
    777 enum SWR_TS_OUTPUT_TOPOLOGY
    778 {
    779     SWR_TS_OUTPUT_POINT,
    780     SWR_TS_OUTPUT_LINE,
    781     SWR_TS_OUTPUT_TRI_CW,
    782     SWR_TS_OUTPUT_TRI_CCW,
    783 
    784     SWR_TS_OUTPUT_TOPOLOGY_COUNT
    785 };
    786 
    787 //////////////////////////////////////////////////////////////////////////
    788 /// SWR_TS_PARTITIONING - Defines tessellation algorithm
    789 /////////////////////////////////////////////////////////////////////////
    790 enum SWR_TS_PARTITIONING
    791 {
    792     SWR_TS_INTEGER,
    793     SWR_TS_ODD_FRACTIONAL,
    794     SWR_TS_EVEN_FRACTIONAL,
    795 
    796     SWR_TS_PARTITIONING_COUNT
    797 };
    798 
    799 //////////////////////////////////////////////////////////////////////////
    800 /// SWR_TS_DOMAIN - Defines Tessellation Domain
    801 /////////////////////////////////////////////////////////////////////////
    802 enum SWR_TS_DOMAIN
    803 {
    804     SWR_TS_QUAD,
    805     SWR_TS_TRI,
    806     SWR_TS_ISOLINE,
    807 
    808     SWR_TS_DOMAIN_COUNT
    809 };
    810 
    811 //////////////////////////////////////////////////////////////////////////
    812 /// SWR_TS_STATE - Tessellation state
    813 /////////////////////////////////////////////////////////////////////////
    814 struct SWR_TS_STATE
    815 {
    816     bool                    tsEnable;
    817     SWR_TS_OUTPUT_TOPOLOGY  tsOutputTopology;   // @llvm_enum
    818     SWR_TS_PARTITIONING     partitioning;       // @llvm_enum
    819     SWR_TS_DOMAIN           domain;             // @llvm_enum
    820 
    821     PRIMITIVE_TOPOLOGY      postDSTopology;     // @llvm_enum
    822 
    823     uint32_t                numHsInputAttribs;
    824     uint32_t                numHsOutputAttribs;
    825     uint32_t                numDsOutputAttribs;
    826     uint32_t                dsAllocationSize;
    827     uint32_t                dsOutVtxAttribOffset;
    828 
    829     // Offset to the start of the attributes of the input vertices, in simdvector units
    830     uint32_t                vertexAttribOffset;
    831 };
    832 
    833 // output merger state
    834 struct SWR_RENDER_TARGET_BLEND_STATE
    835 {
    836     uint8_t writeDisableRed : 1;
    837     uint8_t writeDisableGreen : 1;
    838     uint8_t writeDisableBlue : 1;
    839     uint8_t writeDisableAlpha : 1;
    840 };
    841 static_assert(sizeof(SWR_RENDER_TARGET_BLEND_STATE) == 1, "Invalid SWR_RENDER_TARGET_BLEND_STATE size");
    842 
    843 enum SWR_MULTISAMPLE_COUNT
    844 {
    845     SWR_MULTISAMPLE_1X = 0,
    846     SWR_MULTISAMPLE_2X,
    847     SWR_MULTISAMPLE_4X,
    848     SWR_MULTISAMPLE_8X,
    849     SWR_MULTISAMPLE_16X,
    850     SWR_MULTISAMPLE_TYPE_COUNT
    851 };
    852 
    853 INLINE uint32_t GetNumSamples(SWR_MULTISAMPLE_COUNT sampleCount) // @llvm_func_start
    854 {
    855     static const uint32_t sampleCountLUT[SWR_MULTISAMPLE_TYPE_COUNT] {1, 2, 4, 8, 16};
    856     assert(sampleCount < SWR_MULTISAMPLE_TYPE_COUNT);
    857     return sampleCountLUT[sampleCount];
    858 } // @llvm_func_end
    859 
    860 struct SWR_BLEND_STATE
    861 {
    862     // constant blend factor color in RGBA float
    863     float constantColor[4];
    864 
    865     // alpha test reference value in unorm8 or float32
    866     uint32_t alphaTestReference;
    867     uint32_t sampleMask;
    868     // all RT's have the same sample count
    869     ///@todo move this to Output Merger state when we refactor
    870     SWR_MULTISAMPLE_COUNT sampleCount;  // @llvm_enum
    871 
    872     SWR_RENDER_TARGET_BLEND_STATE renderTarget[SWR_NUM_RENDERTARGETS];
    873 };
    874 static_assert(sizeof(SWR_BLEND_STATE) == 36, "Invalid SWR_BLEND_STATE size");
    875 
    876 //////////////////////////////////////////////////////////////////////////
    877 /// FUNCTION POINTERS FOR SHADERS
    878 
    879 #if USE_SIMD16_SHADERS
    880 typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simd16vertex& out);
    881 #else
    882 typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out);
    883 #endif
    884 typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, SWR_VS_CONTEXT* pVsContext);
    885 typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, SWR_HS_CONTEXT* pHsContext);
    886 typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, SWR_DS_CONTEXT* pDsContext);
    887 typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, SWR_GS_CONTEXT* pGsContext);
    888 typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, SWR_CS_CONTEXT* pCsContext);
    889 typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext);
    890 typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
    891 typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext);
    892 typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*,
    893     simdvector& vSrc, simdvector& vSrc1, simdscalar& vSrc0Alpha, uint32_t sample,
    894     uint8_t* pDst, simdvector& vResult, simdscalari* vOMask, simdscalari* vCoverageMask);
    895 typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar const &);
    896 
    897 
    898 
    899 //////////////////////////////////////////////////////////////////////////
    900 /// FRONTEND_STATE
    901 /////////////////////////////////////////////////////////////////////////
    902 struct SWR_FRONTEND_STATE
    903 {
    904     // skip clip test, perspective divide, and viewport transform
    905     // intended for verts in screen space
    906     bool vpTransformDisable;
    907     bool bEnableCutIndex;
    908     union
    909     {
    910         struct
    911         {
    912             uint32_t triFan : 2;
    913             uint32_t lineStripList : 1;
    914             uint32_t triStripList : 2;
    915         };
    916         uint32_t bits;
    917     } provokingVertex;
    918     uint32_t topologyProvokingVertex; // provoking vertex for the draw topology
    919 
    920     // Size of a vertex in simdvector units. Should be sized to the
    921     // maximum of the input/output of the vertex shader.
    922     uint32_t vsVertexSize;
    923 };
    924 
    925 //////////////////////////////////////////////////////////////////////////
    926 /// VIEWPORT_MATRIX
    927 /////////////////////////////////////////////////////////////////////////
    928 struct SWR_VIEWPORT_MATRIX
    929 {
    930     float m00;
    931     float m11;
    932     float m22;
    933     float m30;
    934     float m31;
    935     float m32;
    936 };
    937 
    938 //////////////////////////////////////////////////////////////////////////
    939 /// VIEWPORT_MATRIXES
    940 /////////////////////////////////////////////////////////////////////////
    941 struct SWR_VIEWPORT_MATRICES
    942 {
    943     float m00[KNOB_NUM_VIEWPORTS_SCISSORS];
    944     float m11[KNOB_NUM_VIEWPORTS_SCISSORS];
    945     float m22[KNOB_NUM_VIEWPORTS_SCISSORS];
    946     float m30[KNOB_NUM_VIEWPORTS_SCISSORS];
    947     float m31[KNOB_NUM_VIEWPORTS_SCISSORS];
    948     float m32[KNOB_NUM_VIEWPORTS_SCISSORS];
    949 };
    950 
    951 //////////////////////////////////////////////////////////////////////////
    952 /// SWR_VIEWPORT
    953 /////////////////////////////////////////////////////////////////////////
    954 struct SWR_VIEWPORT
    955 {
    956     float x;
    957     float y;
    958     float width;
    959     float height;
    960     float minZ;
    961     float maxZ;
    962 };
    963 
    964 //////////////////////////////////////////////////////////////////////////
    965 /// SWR_CULLMODE
    966 //////////////////////////////////////////////////////////////////////////
    967 enum SWR_CULLMODE
    968 {
    969     SWR_CULLMODE_BOTH,
    970     SWR_CULLMODE_NONE,
    971     SWR_CULLMODE_FRONT,
    972     SWR_CULLMODE_BACK
    973 };
    974 
    975 enum SWR_FILLMODE
    976 {
    977     SWR_FILLMODE_POINT,
    978     SWR_FILLMODE_WIREFRAME,
    979     SWR_FILLMODE_SOLID
    980 };
    981 
    982 enum SWR_FRONTWINDING
    983 {
    984     SWR_FRONTWINDING_CW,
    985     SWR_FRONTWINDING_CCW
    986 };
    987 
    988 
    989 enum SWR_PIXEL_LOCATION
    990 {
    991     SWR_PIXEL_LOCATION_CENTER,
    992     SWR_PIXEL_LOCATION_UL,
    993 };
    994 
    995 // fixed point screen space sample locations within a pixel
    996 struct SWR_MULTISAMPLE_POS
    997 {
    998 public:
    999     INLINE void SetXi(uint32_t sampleNum, uint32_t val) { _xi[sampleNum] = val; }; // @llvm_func
   1000     INLINE void SetYi(uint32_t sampleNum, uint32_t val) { _yi[sampleNum] = val; }; // @llvm_func
   1001     INLINE uint32_t Xi(uint32_t sampleNum) const { return _xi[sampleNum]; }; // @llvm_func
   1002     INLINE uint32_t Yi(uint32_t sampleNum) const { return _yi[sampleNum]; }; // @llvm_func
   1003     INLINE void SetX(uint32_t sampleNum, float val) { _x[sampleNum] = val; }; // @llvm_func
   1004     INLINE void SetY(uint32_t sampleNum, float val) { _y[sampleNum] = val; }; // @llvm_func
   1005     INLINE float X(uint32_t sampleNum) const { return _x[sampleNum]; }; // @llvm_func
   1006     INLINE float Y(uint32_t sampleNum) const { return _y[sampleNum]; }; // @llvm_func
   1007     typedef const float(&sampleArrayT)[SWR_MAX_NUM_MULTISAMPLES]; //@llvm_typedef
   1008     INLINE sampleArrayT X() const { return _x; }; // @llvm_func
   1009     INLINE sampleArrayT Y() const { return _y; }; // @llvm_func
   1010     INLINE const __m128i& vXi(uint32_t sampleNum) const { return _vXi[sampleNum]; }; // @llvm_func
   1011     INLINE const __m128i& vYi(uint32_t sampleNum) const { return _vYi[sampleNum]; }; // @llvm_func
   1012     INLINE const simdscalar& vX(uint32_t sampleNum) const { return _vX[sampleNum]; }; // @llvm_func
   1013     INLINE const simdscalar& vY(uint32_t sampleNum) const { return _vY[sampleNum]; }; // @llvm_func
   1014     INLINE const __m128i& TileSampleOffsetsX() const { return tileSampleOffsetsX; }; // @llvm_func
   1015     INLINE const __m128i& TileSampleOffsetsY() const { return tileSampleOffsetsY; }; // @llvm_func
   1016 
   1017     INLINE void PrecalcSampleData(int numSamples); //@llvm_func
   1018 
   1019 private:
   1020     template <typename MaskT>
   1021     INLINE __m128i expandThenBlend4(uint32_t* min, uint32_t* max); // @llvm_func
   1022     INLINE void CalcTileSampleOffsets(int numSamples);   // @llvm_func
   1023 
   1024     // scalar sample values
   1025     uint32_t _xi[SWR_MAX_NUM_MULTISAMPLES];
   1026     uint32_t _yi[SWR_MAX_NUM_MULTISAMPLES];
   1027     float _x[SWR_MAX_NUM_MULTISAMPLES];
   1028     float _y[SWR_MAX_NUM_MULTISAMPLES];
   1029 
   1030     // precalc'd / vectorized samples
   1031     __m128i _vXi[SWR_MAX_NUM_MULTISAMPLES];
   1032     __m128i _vYi[SWR_MAX_NUM_MULTISAMPLES];
   1033     simdscalar _vX[SWR_MAX_NUM_MULTISAMPLES];
   1034     simdscalar _vY[SWR_MAX_NUM_MULTISAMPLES];
   1035     __m128i tileSampleOffsetsX;
   1036     __m128i tileSampleOffsetsY;
   1037 };
   1038 
   1039 //////////////////////////////////////////////////////////////////////////
   1040 /// SWR_RASTSTATE
   1041 //////////////////////////////////////////////////////////////////////////
   1042 struct SWR_RASTSTATE
   1043 {
   1044     uint32_t cullMode               : 2;
   1045     uint32_t fillMode               : 2;
   1046     uint32_t frontWinding           : 1;
   1047     uint32_t scissorEnable          : 1;
   1048     uint32_t depthClipEnable        : 1;
   1049     uint32_t clipHalfZ              : 1;
   1050     uint32_t pointParam             : 1;
   1051     uint32_t pointSpriteEnable      : 1;
   1052     uint32_t pointSpriteTopOrigin   : 1;
   1053     uint32_t forcedSampleCount      : 1;
   1054     uint32_t pixelOffset            : 1;
   1055     uint32_t depthBiasPreAdjusted   : 1;    ///< depth bias constant is in float units, not per-format Z units
   1056     uint32_t conservativeRast       : 1;
   1057 
   1058     float pointSize;
   1059     float lineWidth;
   1060 
   1061     float depthBias;
   1062     float slopeScaledDepthBias;
   1063     float depthBiasClamp;
   1064     SWR_FORMAT depthFormat;     // @llvm_enum
   1065 
   1066     // sample count the rasterizer is running at
   1067     SWR_MULTISAMPLE_COUNT sampleCount;  // @llvm_enum
   1068     uint32_t pixelLocation;     // UL or Center
   1069     SWR_MULTISAMPLE_POS samplePositions;    // @llvm_struct
   1070     bool bIsCenterPattern;   // @llvm_enum
   1071 };
   1072 
   1073 
   1074 enum SWR_CONSTANT_SOURCE
   1075 {
   1076     SWR_CONSTANT_SOURCE_CONST_0000,
   1077     SWR_CONSTANT_SOURCE_CONST_0001_FLOAT,
   1078     SWR_CONSTANT_SOURCE_CONST_1111_FLOAT,
   1079     SWR_CONSTANT_SOURCE_PRIM_ID
   1080 };
   1081 
   1082 struct SWR_ATTRIB_SWIZZLE
   1083 {
   1084     uint16_t sourceAttrib : 5;          // source attribute
   1085     uint16_t constantSource : 2;        // constant source to apply
   1086     uint16_t componentOverrideMask : 4; // override component with constant source
   1087 };
   1088 
   1089 // backend state
   1090 struct SWR_BACKEND_STATE
   1091 {
   1092     uint32_t constantInterpolationMask;     // bitmask indicating which attributes have constant interpolation
   1093     uint32_t pointSpriteTexCoordMask;       // bitmask indicating the attribute(s) which should be interpreted as tex coordinates
   1094 
   1095     uint8_t numAttributes;                  // total number of attributes to send to backend (up to 32)
   1096     uint8_t numComponents[32];              // number of components to setup per attribute, this reduces some calculations for unneeded components
   1097 
   1098     bool swizzleEnable;                 // when enabled, core will parse the swizzle map when
   1099                                         // setting up attributes for the backend, otherwise
   1100                                         // all attributes up to numAttributes will be sent
   1101     SWR_ATTRIB_SWIZZLE swizzleMap[32];
   1102 
   1103     bool readRenderTargetArrayIndex;    // Forward render target array index from last FE stage to the backend
   1104     bool readViewportArrayIndex;        // Read viewport array index from last FE stage during binning
   1105 
   1106 	// Offset to the start of the attributes of the input vertices, in simdvector units
   1107     uint32_t vertexAttribOffset;
   1108 
   1109     // User clip/cull distance enables
   1110     uint8_t cullDistanceMask;
   1111     uint8_t clipDistanceMask;
   1112 
   1113     // Offset to clip/cull attrib section of the vertex, in simdvector units
   1114     uint32_t vertexClipCullOffset;
   1115 };
   1116 
   1117 
   1118 union SWR_DEPTH_STENCIL_STATE
   1119 {
   1120     struct
   1121     {
   1122         // dword 0
   1123         uint32_t depthWriteEnable : 1;
   1124         uint32_t depthTestEnable : 1;
   1125         uint32_t stencilWriteEnable : 1;
   1126         uint32_t stencilTestEnable : 1;
   1127         uint32_t doubleSidedStencilTestEnable : 1;
   1128 
   1129         uint32_t depthTestFunc : 3;
   1130         uint32_t stencilTestFunc : 3;
   1131 
   1132         uint32_t backfaceStencilPassDepthPassOp : 3;
   1133         uint32_t backfaceStencilPassDepthFailOp : 3;
   1134         uint32_t backfaceStencilFailOp : 3;
   1135         uint32_t backfaceStencilTestFunc : 3;
   1136         uint32_t stencilPassDepthPassOp : 3;
   1137         uint32_t stencilPassDepthFailOp : 3;
   1138         uint32_t stencilFailOp : 3;
   1139 
   1140         // dword 1
   1141         uint8_t backfaceStencilWriteMask;
   1142         uint8_t backfaceStencilTestMask;
   1143         uint8_t stencilWriteMask;
   1144         uint8_t stencilTestMask;
   1145 
   1146         // dword 2
   1147         uint8_t backfaceStencilRefValue;
   1148         uint8_t stencilRefValue;
   1149     };
   1150     uint32_t value[3];
   1151 };
   1152 
   1153 enum SWR_SHADING_RATE
   1154 {
   1155     SWR_SHADING_RATE_PIXEL,
   1156     SWR_SHADING_RATE_SAMPLE,
   1157     SWR_SHADING_RATE_COUNT,
   1158 };
   1159 
   1160 enum SWR_INPUT_COVERAGE
   1161 {
   1162     SWR_INPUT_COVERAGE_NONE,
   1163     SWR_INPUT_COVERAGE_NORMAL,
   1164     SWR_INPUT_COVERAGE_INNER_CONSERVATIVE,
   1165     SWR_INPUT_COVERAGE_COUNT,
   1166 };
   1167 
   1168 enum SWR_PS_POSITION_OFFSET
   1169 {
   1170     SWR_PS_POSITION_SAMPLE_NONE,
   1171     SWR_PS_POSITION_SAMPLE_OFFSET,
   1172     SWR_PS_POSITION_CENTROID_OFFSET,
   1173     SWR_PS_POSITION_OFFSET_COUNT,
   1174 };
   1175 
   1176 enum SWR_BARYCENTRICS_MASK
   1177 {
   1178     SWR_BARYCENTRIC_PER_PIXEL_MASK = 0x1,
   1179     SWR_BARYCENTRIC_CENTROID_MASK = 0x2,
   1180     SWR_BARYCENTRIC_PER_SAMPLE_MASK = 0x4,
   1181 };
   1182 
   1183 // pixel shader state
   1184 struct SWR_PS_STATE
   1185 {
   1186     // dword 0-1
   1187     PFN_PIXEL_KERNEL pfnPixelShader;  // @llvm_pfn
   1188 
   1189     // dword 2
   1190     uint32_t killsPixel             : 1;    // pixel shader can kill pixels
   1191     uint32_t inputCoverage          : 2;    // ps uses input coverage
   1192     uint32_t writesODepth           : 1;    // pixel shader writes to depth
   1193     uint32_t usesSourceDepth        : 1;    // pixel shader reads depth
   1194     uint32_t shadingRate            : 2;    // shading per pixel / sample / coarse pixel
   1195     uint32_t posOffset              : 2;    // type of offset (none, sample, centroid) to add to pixel position
   1196     uint32_t barycentricsMask       : 3;    // which type(s) of barycentric coords does the PS interpolate attributes with
   1197     uint32_t usesUAV                : 1;    // pixel shader accesses UAV
   1198     uint32_t forceEarlyZ            : 1;    // force execution of early depth/stencil test
   1199 
   1200     uint8_t renderTargetMask;               // Mask of render targets written
   1201 };
   1202 
   1203 // depth bounds state
   1204 struct SWR_DEPTH_BOUNDS_STATE
   1205 {
   1206     bool    depthBoundsTestEnable;
   1207     float   depthBoundsTestMinValue;
   1208     float   depthBoundsTestMaxValue;
   1209 };
   1210 
   1211