1 /**************************************************************************** 2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * @file state.h 24 * 25 * @brief Definitions for API state. 26 * 27 ******************************************************************************/ 28 #pragma once 29 30 #include "common/formats.h" 31 #include "common/intrin.h" 32 using gfxptr_t = unsigned long long; 33 #include <functional> 34 #include <algorithm> 35 36 ////////////////////////////////////////////////////////////////////////// 37 /// PRIMITIVE_TOPOLOGY. 38 ////////////////////////////////////////////////////////////////////////// 39 enum PRIMITIVE_TOPOLOGY 40 { 41 TOP_UNKNOWN = 0x0, 42 TOP_POINT_LIST = 0x1, 43 TOP_LINE_LIST = 0x2, 44 TOP_LINE_STRIP = 0x3, 45 TOP_TRIANGLE_LIST = 0x4, 46 TOP_TRIANGLE_STRIP = 0x5, 47 TOP_TRIANGLE_FAN = 0x6, 48 TOP_QUAD_LIST = 0x7, 49 TOP_QUAD_STRIP = 0x8, 50 TOP_LINE_LIST_ADJ = 0x9, 51 TOP_LISTSTRIP_ADJ = 0xA, 52 TOP_TRI_LIST_ADJ = 0xB, 53 TOP_TRI_STRIP_ADJ = 0xC, 54 TOP_TRI_STRIP_REVERSE = 0xD, 55 TOP_POLYGON = 0xE, 56 TOP_RECT_LIST = 0xF, 57 TOP_LINE_LOOP = 0x10, 58 TOP_POINT_LIST_BF = 0x11, 59 TOP_LINE_STRIP_CONT = 0x12, 60 TOP_LINE_STRIP_BF = 0x13, 61 TOP_LINE_STRIP_CONT_BF = 0x14, 62 TOP_TRIANGLE_FAN_NOSTIPPLE = 0x16, 63 TOP_TRIANGLE_DISC = 0x17, /// @todo What is this?? 64 65 TOP_PATCHLIST_BASE = 0x1F, // Invalid topology, used to calculate num verts for a patchlist. 66 TOP_PATCHLIST_1 = 0x20, // List of 1-vertex patches 67 TOP_PATCHLIST_2 = 0x21, 68 TOP_PATCHLIST_3 = 0x22, 69 TOP_PATCHLIST_4 = 0x23, 70 TOP_PATCHLIST_5 = 0x24, 71 TOP_PATCHLIST_6 = 0x25, 72 TOP_PATCHLIST_7 = 0x26, 73 TOP_PATCHLIST_8 = 0x27, 74 TOP_PATCHLIST_9 = 0x28, 75 TOP_PATCHLIST_10 = 0x29, 76 TOP_PATCHLIST_11 = 0x2A, 77 TOP_PATCHLIST_12 = 0x2B, 78 TOP_PATCHLIST_13 = 0x2C, 79 TOP_PATCHLIST_14 = 0x2D, 80 TOP_PATCHLIST_15 = 0x2E, 81 TOP_PATCHLIST_16 = 0x2F, 82 TOP_PATCHLIST_17 = 0x30, 83 TOP_PATCHLIST_18 = 0x31, 84 TOP_PATCHLIST_19 = 0x32, 85 TOP_PATCHLIST_20 = 0x33, 86 TOP_PATCHLIST_21 = 0x34, 87 TOP_PATCHLIST_22 = 0x35, 88 TOP_PATCHLIST_23 = 0x36, 89 TOP_PATCHLIST_24 = 0x37, 90 TOP_PATCHLIST_25 = 0x38, 91 TOP_PATCHLIST_26 = 0x39, 92 TOP_PATCHLIST_27 = 0x3A, 93 TOP_PATCHLIST_28 = 0x3B, 94 TOP_PATCHLIST_29 = 0x3C, 95 TOP_PATCHLIST_30 = 0x3D, 96 TOP_PATCHLIST_31 = 0x3E, 97 TOP_PATCHLIST_32 = 0x3F, // List of 32-vertex patches 98 }; 99 100 ////////////////////////////////////////////////////////////////////////// 101 /// SWR_SHADER_TYPE 102 ////////////////////////////////////////////////////////////////////////// 103 enum SWR_SHADER_TYPE 104 { 105 SHADER_VERTEX, 106 SHADER_GEOMETRY, 107 SHADER_DOMAIN, 108 SHADER_HULL, 109 SHADER_PIXEL, 110 SHADER_COMPUTE, 111 112 NUM_SHADER_TYPES, 113 }; 114 115 ////////////////////////////////////////////////////////////////////////// 116 /// SWR_RENDERTARGET_ATTACHMENT 117 /// @todo Its not clear what an "attachment" means. Its not common term. 118 ////////////////////////////////////////////////////////////////////////// 119 enum SWR_RENDERTARGET_ATTACHMENT 120 { 121 SWR_ATTACHMENT_COLOR0, 122 SWR_ATTACHMENT_COLOR1, 123 SWR_ATTACHMENT_COLOR2, 124 SWR_ATTACHMENT_COLOR3, 125 SWR_ATTACHMENT_COLOR4, 126 SWR_ATTACHMENT_COLOR5, 127 SWR_ATTACHMENT_COLOR6, 128 SWR_ATTACHMENT_COLOR7, 129 SWR_ATTACHMENT_DEPTH, 130 SWR_ATTACHMENT_STENCIL, 131 132 SWR_NUM_ATTACHMENTS 133 }; 134 135 #define SWR_NUM_RENDERTARGETS 8 136 137 #define SWR_ATTACHMENT_COLOR0_BIT 0x001 138 #define SWR_ATTACHMENT_COLOR1_BIT 0x002 139 #define SWR_ATTACHMENT_COLOR2_BIT 0x004 140 #define SWR_ATTACHMENT_COLOR3_BIT 0x008 141 #define SWR_ATTACHMENT_COLOR4_BIT 0x010 142 #define SWR_ATTACHMENT_COLOR5_BIT 0x020 143 #define SWR_ATTACHMENT_COLOR6_BIT 0x040 144 #define SWR_ATTACHMENT_COLOR7_BIT 0x080 145 #define SWR_ATTACHMENT_DEPTH_BIT 0x100 146 #define SWR_ATTACHMENT_STENCIL_BIT 0x200 147 #define SWR_ATTACHMENT_MASK_ALL 0x3ff 148 #define SWR_ATTACHMENT_MASK_COLOR 0x0ff 149 150 151 ////////////////////////////////////////////////////////////////////////// 152 /// @brief SWR Inner Tessellation factor ID 153 /// See above GetTessFactorOutputPosition code for documentation 154 enum SWR_INNER_TESSFACTOR_ID 155 { 156 SWR_QUAD_U_TRI_INSIDE, 157 SWR_QUAD_V_INSIDE, 158 159 SWR_NUM_INNER_TESS_FACTORS, 160 }; 161 162 ////////////////////////////////////////////////////////////////////////// 163 /// @brief SWR Outer Tessellation factor ID 164 /// See above GetTessFactorOutputPosition code for documentation 165 enum SWR_OUTER_TESSFACTOR_ID 166 { 167 SWR_QUAD_U_EQ0_TRI_U_LINE_DETAIL, 168 SWR_QUAD_V_EQ0_TRI_V_LINE_DENSITY, 169 SWR_QUAD_U_EQ1_TRI_W, 170 SWR_QUAD_V_EQ1, 171 172 SWR_NUM_OUTER_TESS_FACTORS, 173 }; 174 175 176 ///////////////////////////////////////////////////////////////////////// 177 /// simdvertex 178 /// @brief Defines a vertex element that holds all the data for SIMD vertices. 179 /// Contains space for position, SGV, and 32 generic attributes 180 ///////////////////////////////////////////////////////////////////////// 181 enum SWR_VTX_SLOTS 182 { 183 VERTEX_SGV_SLOT = 0, 184 VERTEX_SGV_RTAI_COMP = 0, 185 VERTEX_SGV_VAI_COMP = 1, 186 VERTEX_SGV_POINT_SIZE_COMP = 2, 187 VERTEX_POSITION_SLOT = 1, 188 VERTEX_POSITION_END_SLOT = 1, 189 VERTEX_CLIPCULL_DIST_LO_SLOT = (1 + VERTEX_POSITION_END_SLOT), // VS writes lower 4 clip/cull dist 190 VERTEX_CLIPCULL_DIST_HI_SLOT = (2 + VERTEX_POSITION_END_SLOT), // VS writes upper 4 clip/cull dist 191 VERTEX_ATTRIB_START_SLOT = (3 + VERTEX_POSITION_END_SLOT), 192 VERTEX_ATTRIB_END_SLOT = (34 + VERTEX_POSITION_END_SLOT), 193 SWR_VTX_NUM_SLOTS = (1 + VERTEX_ATTRIB_END_SLOT) 194 }; 195 196 // SoAoSoA 197 struct simdvertex 198 { 199 simdvector attrib[SWR_VTX_NUM_SLOTS]; 200 }; 201 202 #if ENABLE_AVX512_SIMD16 203 struct simd16vertex 204 { 205 simd16vector attrib[SWR_VTX_NUM_SLOTS]; 206 }; 207 208 #endif 209 210 template<typename SIMD_T> 211 struct SIMDVERTEX_T 212 { 213 typename SIMD_T::Vec4 attrib[SWR_VTX_NUM_SLOTS]; 214 }; 215 216 ////////////////////////////////////////////////////////////////////////// 217 /// SWR_VS_CONTEXT 218 /// @brief Input to vertex shader 219 ///////////////////////////////////////////////////////////////////////// 220 struct SWR_VS_CONTEXT 221 { 222 simdvertex* pVin; // IN: SIMD input vertex data store 223 simdvertex* pVout; // OUT: SIMD output vertex data store 224 225 uint32_t InstanceID; // IN: Instance ID, constant across all verts of the SIMD 226 simdscalari VertexID; // IN: Vertex ID 227 simdscalari mask; // IN: Active mask for shader 228 #if USE_SIMD16_FRONTEND 229 uint32_t AlternateOffset; // IN: amount to offset for interleaving even/odd simd8 in simd16vertex output 230 #if USE_SIMD16_VS 231 simd16scalari mask16; // IN: Active mask for shader (16-wide) 232 simd16scalari VertexID16; // IN: Vertex ID (16-wide) 233 #endif 234 #endif 235 }; 236 237 ///////////////////////////////////////////////////////////////////////// 238 /// ScalarCPoint 239 /// @brief defines a control point element as passed from the output 240 /// of the hull shader to the input of the domain shader 241 ///////////////////////////////////////////////////////////////////////// 242 struct ScalarAttrib 243 { 244 float x; 245 float y; 246 float z; 247 float w; 248 }; 249 250 struct ScalarCPoint 251 { 252 ScalarAttrib attrib[SWR_VTX_NUM_SLOTS]; 253 }; 254 255 ////////////////////////////////////////////////////////////////////////// 256 /// SWR_TESSELLATION_FACTORS 257 /// @brief Tessellation factors structure (non-vector) 258 ///////////////////////////////////////////////////////////////////////// 259 struct SWR_TESSELLATION_FACTORS 260 { 261 float OuterTessFactors[SWR_NUM_OUTER_TESS_FACTORS]; 262 float InnerTessFactors[SWR_NUM_INNER_TESS_FACTORS]; 263 }; 264 265 #define MAX_NUM_VERTS_PER_PRIM 32 // support up to 32 control point patches 266 struct ScalarPatch 267 { 268 SWR_TESSELLATION_FACTORS tessFactors; 269 ScalarCPoint cp[MAX_NUM_VERTS_PER_PRIM]; 270 ScalarCPoint patchData; 271 }; 272 273 ////////////////////////////////////////////////////////////////////////// 274 /// SWR_HS_CONTEXT 275 /// @brief Input to hull shader 276 ///////////////////////////////////////////////////////////////////////// 277 struct SWR_HS_CONTEXT 278 { 279 simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: (SIMD) input primitive data 280 simdscalari PrimitiveID; // IN: (SIMD) primitive ID generated from the draw call 281 simdscalari mask; // IN: Active mask for shader 282 ScalarPatch* pCPout; // OUT: Output control point patch 283 // SIMD-sized-array of SCALAR patches 284 }; 285 286 ////////////////////////////////////////////////////////////////////////// 287 /// SWR_DS_CONTEXT 288 /// @brief Input to domain shader 289 ///////////////////////////////////////////////////////////////////////// 290 struct SWR_DS_CONTEXT 291 { 292 uint32_t PrimitiveID; // IN: (SCALAR) PrimitiveID for the patch associated with the DS invocation 293 uint32_t vectorOffset; // IN: (SCALAR) vector index offset into SIMD data. 294 uint32_t vectorStride; // IN: (SCALAR) stride (in vectors) of output data per attribute-component 295 uint32_t outVertexAttribOffset; // IN: (SCALAR) Offset to the attributes as processed by the next shader stage. 296 ScalarPatch* pCpIn; // IN: (SCALAR) Control patch 297 simdscalar* pDomainU; // IN: (SIMD) Domain Point U coords 298 simdscalar* pDomainV; // IN: (SIMD) Domain Point V coords 299 simdscalari mask; // IN: Active mask for shader 300 simdscalar* pOutputData; // OUT: (SIMD) Vertex Attributes (2D array of vectors, one row per attribute-component) 301 }; 302 303 ////////////////////////////////////////////////////////////////////////// 304 /// SWR_GS_CONTEXT 305 /// @brief Input to geometry shader. 306 ///////////////////////////////////////////////////////////////////////// 307 struct SWR_GS_CONTEXT 308 { 309 simdvector* pVerts; // IN: input primitive data for SIMD prims 310 uint32_t inputVertStride; // IN: input vertex stride, in attributes 311 simdscalari PrimitiveID; // IN: input primitive ID generated from the draw call 312 uint32_t InstanceID; // IN: input instance ID 313 simdscalari mask; // IN: Active mask for shader 314 uint8_t* pStreams[KNOB_SIMD_WIDTH]; // OUT: output stream (contains vertices for all output streams) 315 }; 316 317 struct PixelPositions 318 { 319 simdscalar UL; 320 simdscalar center; 321 simdscalar sample; 322 simdscalar centroid; 323 }; 324 325 #define SWR_MAX_NUM_MULTISAMPLES 16 326 327 ////////////////////////////////////////////////////////////////////////// 328 /// SWR_PS_CONTEXT 329 /// @brief Input to pixel shader. 330 ///////////////////////////////////////////////////////////////////////// 331 struct SWR_PS_CONTEXT 332 { 333 PixelPositions vX; // IN: x location(s) of pixels 334 PixelPositions vY; // IN: x location(s) of pixels 335 simdscalar vZ; // INOUT: z location of pixels 336 simdscalari activeMask; // OUT: mask for kill 337 simdscalar inputMask; // IN: input coverage mask for all samples 338 simdscalari oMask; // OUT: mask for output coverage 339 340 PixelPositions vI; // barycentric coords evaluated at pixel center, sample position, centroid 341 PixelPositions vJ; 342 PixelPositions vOneOverW; // IN: 1/w 343 344 const float* pAttribs; // IN: pointer to attribute barycentric coefficients 345 const float* pPerspAttribs; // IN: pointer to attribute/w barycentric coefficients 346 const float* pRecipW; // IN: pointer to 1/w coord for each vertex 347 const float *I; // IN: Barycentric A, B, and C coefs used to compute I 348 const float *J; // IN: Barycentric A, B, and C coefs used to compute J 349 float recipDet; // IN: 1/Det, used when barycentric interpolating attributes 350 const float* pSamplePosX; // IN: array of sample positions 351 const float* pSamplePosY; // IN: array of sample positions 352 simdvector shaded[SWR_NUM_RENDERTARGETS]; 353 // OUT: result color per rendertarget 354 355 uint32_t frontFace; // IN: front- 1, back- 0 356 uint32_t sampleIndex; // IN: sampleIndex 357 uint32_t renderTargetArrayIndex; // IN: render target array index from GS 358 uint32_t rasterizerSampleCount; // IN: sample count used by the rasterizer 359 360 uint8_t* pColorBuffer[SWR_NUM_RENDERTARGETS]; // IN: Pointers to render target hottiles 361 }; 362 363 ////////////////////////////////////////////////////////////////////////// 364 /// SWR_CS_CONTEXT 365 /// @brief Input to compute shader. 366 ///////////////////////////////////////////////////////////////////////// 367 struct SWR_CS_CONTEXT 368 { 369 // The ThreadGroupId is the current thread group index relative 370 // to all thread groups in the Dispatch call. The ThreadId, ThreadIdInGroup, 371 // and ThreadIdInGroupFlattened can be derived from ThreadGroupId in the shader. 372 373 // Compute shader accepts the following system values. 374 // o ThreadId - Current thread id relative to all other threads in dispatch. 375 // o ThreadGroupId - Current thread group id relative to all other groups in dispatch. 376 // o ThreadIdInGroup - Current thread relative to all threads in the current thread group. 377 // o ThreadIdInGroupFlattened - Flattened linear id derived from ThreadIdInGroup. 378 // 379 // All of these system values can be computed in the shader. They will be 380 // derived from the current tile counter. The tile counter is an atomic counter that 381 // resides in the draw context and is initialized to the product of the dispatch dims. 382 // 383 // tileCounter = dispatchDims.x * dispatchDims.y * dispatchDims.z 384 // 385 // Each CPU worker thread will atomically decrement this counter and passes the current 386 // count into the shader. When the count reaches 0 then all thread groups in the 387 // dispatch call have been completed. 388 389 uint32_t tileCounter; // The tile counter value for this thread group. 390 391 // Dispatch dimensions used by shader to compute system values from the tile counter. 392 uint32_t dispatchDims[3]; 393 394 uint8_t* pTGSM; // Thread Group Shared Memory pointer. 395 396 uint8_t* pSpillFillBuffer; // Spill/fill buffer for barrier support 397 398 uint8_t* pScratchSpace; // Pointer to scratch space buffer used by the shader, shader is responsible 399 // for subdividing scratch space per instance/simd 400 401 uint32_t scratchSpacePerSimd; // Scratch space per work item x SIMD_WIDTH 402 }; 403 404 // enums 405 enum SWR_TILE_MODE 406 { 407 SWR_TILE_NONE = 0x0, // Linear mode (no tiling) 408 SWR_TILE_MODE_WMAJOR, // W major tiling 409 SWR_TILE_MODE_XMAJOR, // X major tiling 410 SWR_TILE_MODE_YMAJOR, // Y major tiling 411 SWR_TILE_SWRZ, // SWR-Z tiling 412 413 SWR_TILE_MODE_COUNT 414 }; 415 416 enum SWR_SURFACE_TYPE 417 { 418 SURFACE_1D = 0, 419 SURFACE_2D = 1, 420 SURFACE_3D = 2, 421 SURFACE_CUBE = 3, 422 SURFACE_BUFFER = 4, 423 SURFACE_STRUCTURED_BUFFER = 5, 424 SURFACE_NULL = 7 425 }; 426 427 enum SWR_ZFUNCTION 428 { 429 ZFUNC_ALWAYS, 430 ZFUNC_NEVER, 431 ZFUNC_LT, 432 ZFUNC_EQ, 433 ZFUNC_LE, 434 ZFUNC_GT, 435 ZFUNC_NE, 436 ZFUNC_GE, 437 NUM_ZFUNC 438 }; 439 440 enum SWR_STENCILOP 441 { 442 STENCILOP_KEEP, 443 STENCILOP_ZERO, 444 STENCILOP_REPLACE, 445 STENCILOP_INCRSAT, 446 STENCILOP_DECRSAT, 447 STENCILOP_INCR, 448 STENCILOP_DECR, 449 STENCILOP_INVERT 450 }; 451 452 enum SWR_BLEND_FACTOR 453 { 454 BLENDFACTOR_ONE, 455 BLENDFACTOR_SRC_COLOR, 456 BLENDFACTOR_SRC_ALPHA, 457 BLENDFACTOR_DST_ALPHA, 458 BLENDFACTOR_DST_COLOR, 459 BLENDFACTOR_SRC_ALPHA_SATURATE, 460 BLENDFACTOR_CONST_COLOR, 461 BLENDFACTOR_CONST_ALPHA, 462 BLENDFACTOR_SRC1_COLOR, 463 BLENDFACTOR_SRC1_ALPHA, 464 BLENDFACTOR_ZERO, 465 BLENDFACTOR_INV_SRC_COLOR, 466 BLENDFACTOR_INV_SRC_ALPHA, 467 BLENDFACTOR_INV_DST_ALPHA, 468 BLENDFACTOR_INV_DST_COLOR, 469 BLENDFACTOR_INV_CONST_COLOR, 470 BLENDFACTOR_INV_CONST_ALPHA, 471 BLENDFACTOR_INV_SRC1_COLOR, 472 BLENDFACTOR_INV_SRC1_ALPHA 473 }; 474 475 enum SWR_BLEND_OP 476 { 477 BLENDOP_ADD, 478 BLENDOP_SUBTRACT, 479 BLENDOP_REVSUBTRACT, 480 BLENDOP_MIN, 481 BLENDOP_MAX, 482 }; 483 484 enum SWR_LOGIC_OP 485 { 486 LOGICOP_CLEAR, 487 LOGICOP_NOR, 488 LOGICOP_AND_INVERTED, 489 LOGICOP_COPY_INVERTED, 490 LOGICOP_AND_REVERSE, 491 LOGICOP_INVERT, 492 LOGICOP_XOR, 493 LOGICOP_NAND, 494 LOGICOP_AND, 495 LOGICOP_EQUIV, 496 LOGICOP_NOOP, 497 LOGICOP_OR_INVERTED, 498 LOGICOP_COPY, 499 LOGICOP_OR_REVERSE, 500 LOGICOP_OR, 501 LOGICOP_SET, 502 }; 503 504 ////////////////////////////////////////////////////////////////////////// 505 /// SWR_AUX_MODE 506 /// @brief Specifies how the auxiliary buffer is used by the driver. 507 ////////////////////////////////////////////////////////////////////////// 508 enum SWR_AUX_MODE 509 { 510 AUX_MODE_NONE, 511 AUX_MODE_COLOR, 512 AUX_MODE_UAV, 513 AUX_MODE_DEPTH, 514 }; 515 516 ////////////////////////////////////////////////////////////////////////// 517 /// SWR_SURFACE_STATE 518 ////////////////////////////////////////////////////////////////////////// 519 struct SWR_SURFACE_STATE 520 { 521 gfxptr_t xpBaseAddress; 522 SWR_SURFACE_TYPE type; // @llvm_enum 523 SWR_FORMAT format; // @llvm_enum 524 uint32_t width; 525 uint32_t height; 526 uint32_t depth; 527 uint32_t numSamples; 528 uint32_t samplePattern; 529 uint32_t pitch; 530 uint32_t qpitch; 531 uint32_t minLod; // for sampled surfaces, the most detailed LOD that can be accessed by sampler 532 uint32_t maxLod; // for sampled surfaces, the max LOD that can be accessed 533 float resourceMinLod; // for sampled surfaces, the most detailed fractional mip that can be accessed by sampler 534 uint32_t lod; // for render targets, the lod being rendered to 535 uint32_t arrayIndex; // for render targets, the array index being rendered to for arrayed surfaces 536 SWR_TILE_MODE tileMode; // @llvm_enum 537 uint32_t halign; 538 uint32_t valign; 539 uint32_t xOffset; 540 uint32_t yOffset; 541 542 uint32_t lodOffsets[2][15]; // lod offsets for sampled surfaces 543 544 gfxptr_t xpAuxBaseAddress; // Used for compression, append/consume counter, etc. 545 SWR_AUX_MODE auxMode; // @llvm_enum 546 547 548 bool bInterleavedSamples; // are MSAA samples stored interleaved or planar 549 }; 550 551 // vertex fetch state 552 // WARNING- any changes to this struct need to be reflected 553 // in the fetch shader jit 554 struct SWR_VERTEX_BUFFER_STATE 555 { 556 uint32_t index; 557 uint32_t pitch; 558 const uint8_t *pData; 559 uint32_t size; 560 uint32_t numaNode; 561 uint32_t minVertex; // min vertex (for bounds checking) 562 uint32_t maxVertex; // size / pitch. precalculated value used by fetch shader for OOB checks 563 uint32_t partialInboundsSize; // size % pitch. precalculated value used by fetch shader for partially OOB vertices 564 }; 565 566 struct SWR_INDEX_BUFFER_STATE 567 { 568 // Format type for indices (e.g. UINT16, UINT32, etc.) 569 SWR_FORMAT format; // @llvm_enum 570 const void *pIndices; 571 uint32_t size; 572 }; 573 574 575 ////////////////////////////////////////////////////////////////////////// 576 /// SWR_FETCH_CONTEXT 577 /// @brief Input to fetch shader. 578 /// @note WARNING - Changes to this struct need to be reflected in the 579 /// fetch shader jit. 580 ///////////////////////////////////////////////////////////////////////// 581 struct SWR_FETCH_CONTEXT 582 { 583 const SWR_VERTEX_BUFFER_STATE* pStreams; // IN: array of bound vertex buffers 584 const int32_t* pIndices; // IN: pointer to index buffer for indexed draws 585 const int32_t* pLastIndex; // IN: pointer to end of index buffer, used for bounds checking 586 uint32_t CurInstance; // IN: current instance 587 uint32_t BaseVertex; // IN: base vertex 588 uint32_t StartVertex; // IN: start vertex 589 uint32_t StartInstance; // IN: start instance 590 simdscalari VertexID; // OUT: vector of vertex IDs 591 simdscalari CutMask; // OUT: vector mask of indices which have the cut index value 592 #if USE_SIMD16_SHADERS 593 // simd16scalari VertexID; // OUT: vector of vertex IDs 594 // simd16scalari CutMask; // OUT: vector mask of indices which have the cut index value 595 simdscalari VertexID2; // OUT: vector of vertex IDs 596 simdscalari CutMask2; // OUT: vector mask of indices which have the cut index value 597 #endif 598 }; 599 600 ////////////////////////////////////////////////////////////////////////// 601 /// SWR_STATS 602 /// 603 /// @brief All statistics generated by SWR go here. These are public 604 /// to driver. 605 ///////////////////////////////////////////////////////////////////////// 606 OSALIGNLINE(struct) SWR_STATS 607 { 608 // Occlusion Query 609 uint64_t DepthPassCount; // Number of passing depth tests. Not exact. 610 611 // Pipeline Stats 612 uint64_t PsInvocations; // Number of Pixel Shader invocations 613 uint64_t CsInvocations; // Number of Compute Shader invocations 614 615 }; 616 617 ////////////////////////////////////////////////////////////////////////// 618 /// SWR_STATS 619 /// 620 /// @brief All statistics generated by FE. 621 ///////////////////////////////////////////////////////////////////////// 622 OSALIGNLINE(struct) SWR_STATS_FE 623 { 624 uint64_t IaVertices; // Number of Fetch Shader vertices 625 uint64_t IaPrimitives; // Number of PA primitives. 626 uint64_t VsInvocations; // Number of Vertex Shader invocations 627 uint64_t HsInvocations; // Number of Hull Shader invocations 628 uint64_t DsInvocations; // Number of Domain Shader invocations 629 uint64_t GsInvocations; // Number of Geometry Shader invocations 630 uint64_t GsPrimitives; // Number of prims GS outputs. 631 uint64_t CInvocations; // Number of clipper invocations 632 uint64_t CPrimitives; // Number of clipper primitives. 633 634 // Streamout Stats 635 uint64_t SoPrimStorageNeeded[4]; 636 uint64_t SoNumPrimsWritten[4]; 637 }; 638 639 ////////////////////////////////////////////////////////////////////////// 640 /// STREAMOUT_BUFFERS 641 ///////////////////////////////////////////////////////////////////////// 642 643 #define MAX_SO_STREAMS 4 644 #define MAX_SO_BUFFERS 4 645 #define MAX_ATTRIBUTES 32 646 647 struct SWR_STREAMOUT_BUFFER 648 { 649 bool enable; 650 bool soWriteEnable; 651 652 // Pointers to streamout buffers. 653 uint32_t* pBuffer; 654 655 // Size of buffer in dwords. 656 uint32_t bufferSize; 657 658 // Vertex pitch of buffer in dwords. 659 uint32_t pitch; 660 661 // Offset into buffer in dwords. SOS will increment this offset. 662 uint32_t streamOffset; 663 664 // Offset to the SO write offset. If not null then we update offset here. 665 uint32_t* pWriteOffset; 666 667 }; 668 669 ////////////////////////////////////////////////////////////////////////// 670 /// STREAMOUT_STATE 671 ///////////////////////////////////////////////////////////////////////// 672 struct SWR_STREAMOUT_STATE 673 { 674 // This disables stream output. 675 bool soEnable; 676 677 // which streams are enabled for streamout 678 bool streamEnable[MAX_SO_STREAMS]; 679 680 // If set then do not send any streams to the rasterizer. 681 bool rasterizerDisable; 682 683 // Specifies which stream to send to the rasterizer. 684 uint32_t streamToRasterizer; 685 686 // The stream masks specify which attributes are sent to which streams. 687 // These masks help the FE to setup the pPrimData buffer that is passed 688 // the Stream Output Shader (SOS) function. 689 uint32_t streamMasks[MAX_SO_STREAMS]; 690 691 // Number of attributes, including position, per vertex that are streamed out. 692 // This should match number of bits in stream mask. 693 uint32_t streamNumEntries[MAX_SO_STREAMS]; 694 695 // Offset to the start of the attributes of the input vertices, in simdvector units 696 uint32_t vertexAttribOffset[MAX_SO_STREAMS]; 697 }; 698 699 ////////////////////////////////////////////////////////////////////////// 700 /// STREAMOUT_CONTEXT - Passed to SOS 701 ///////////////////////////////////////////////////////////////////////// 702 struct SWR_STREAMOUT_CONTEXT 703 { 704 uint32_t* pPrimData; 705 SWR_STREAMOUT_BUFFER* pBuffer[MAX_SO_STREAMS]; 706 707 // Num prims written for this stream 708 uint32_t numPrimsWritten; 709 710 // Num prims that should have been written if there were no overflow. 711 uint32_t numPrimStorageNeeded; 712 }; 713 714 ////////////////////////////////////////////////////////////////////////// 715 /// SWR_GS_STATE - Geometry shader state 716 ///////////////////////////////////////////////////////////////////////// 717 struct SWR_GS_STATE 718 { 719 bool gsEnable; 720 721 // Number of input attributes per vertex. Used by the frontend to 722 // optimize assembling primitives for GS 723 uint32_t numInputAttribs; 724 725 // Stride of incoming verts in attributes 726 uint32_t inputVertStride; 727 728 // Output topology - can be point, tristrip, or linestrip 729 PRIMITIVE_TOPOLOGY outputTopology; // @llvm_enum 730 731 // Maximum number of verts that can be emitted by a single instance of the GS 732 uint32_t maxNumVerts; 733 734 // Instance count 735 uint32_t instanceCount; 736 737 // If true, geometry shader emits a single stream, with separate cut buffer. 738 // If false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer 739 // to map vertices to streams 740 bool isSingleStream; 741 742 // When single stream is enabled, singleStreamID dictates which stream is being output. 743 // field ignored if isSingleStream is false 744 uint32_t singleStreamID; 745 746 // Total amount of memory to allocate for one instance of the shader output in bytes 747 uint32_t allocationSize; 748 749 // Offset to the start of the attributes of the input vertices, in simdvector units, as read by the GS 750 uint32_t vertexAttribOffset; 751 752 // Offset to the attributes as stored by the preceding shader stage. 753 uint32_t srcVertexAttribOffset; 754 755 // Size of the control data section which contains cut or streamID data, in simdscalar units. Should be sized to handle 756 // the maximum number of verts output by the GS. Can be 0 if there are no cuts or streamID bits. 757 uint32_t controlDataSize; 758 759 // Offset to the control data section, in bytes 760 uint32_t controlDataOffset; 761 762 // Total size of an output vertex, in simdvector units 763 uint32_t outputVertexSize; 764 765 // Offset to the start of the vertex section, in bytes 766 uint32_t outputVertexOffset; 767 768 // Set this to non-zero to indicate that the shader outputs a static number of verts. If zero, shader is 769 // expected to store the final vertex count in the first dword of the gs output stream. 770 uint32_t staticVertexCount; 771 }; 772 773 774 ////////////////////////////////////////////////////////////////////////// 775 /// SWR_TS_OUTPUT_TOPOLOGY - Defines data output by the tessellator / DS 776 ///////////////////////////////////////////////////////////////////////// 777 enum SWR_TS_OUTPUT_TOPOLOGY 778 { 779 SWR_TS_OUTPUT_POINT, 780 SWR_TS_OUTPUT_LINE, 781 SWR_TS_OUTPUT_TRI_CW, 782 SWR_TS_OUTPUT_TRI_CCW, 783 784 SWR_TS_OUTPUT_TOPOLOGY_COUNT 785 }; 786 787 ////////////////////////////////////////////////////////////////////////// 788 /// SWR_TS_PARTITIONING - Defines tessellation algorithm 789 ///////////////////////////////////////////////////////////////////////// 790 enum SWR_TS_PARTITIONING 791 { 792 SWR_TS_INTEGER, 793 SWR_TS_ODD_FRACTIONAL, 794 SWR_TS_EVEN_FRACTIONAL, 795 796 SWR_TS_PARTITIONING_COUNT 797 }; 798 799 ////////////////////////////////////////////////////////////////////////// 800 /// SWR_TS_DOMAIN - Defines Tessellation Domain 801 ///////////////////////////////////////////////////////////////////////// 802 enum SWR_TS_DOMAIN 803 { 804 SWR_TS_QUAD, 805 SWR_TS_TRI, 806 SWR_TS_ISOLINE, 807 808 SWR_TS_DOMAIN_COUNT 809 }; 810 811 ////////////////////////////////////////////////////////////////////////// 812 /// SWR_TS_STATE - Tessellation state 813 ///////////////////////////////////////////////////////////////////////// 814 struct SWR_TS_STATE 815 { 816 bool tsEnable; 817 SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology; // @llvm_enum 818 SWR_TS_PARTITIONING partitioning; // @llvm_enum 819 SWR_TS_DOMAIN domain; // @llvm_enum 820 821 PRIMITIVE_TOPOLOGY postDSTopology; // @llvm_enum 822 823 uint32_t numHsInputAttribs; 824 uint32_t numHsOutputAttribs; 825 uint32_t numDsOutputAttribs; 826 uint32_t dsAllocationSize; 827 uint32_t dsOutVtxAttribOffset; 828 829 // Offset to the start of the attributes of the input vertices, in simdvector units 830 uint32_t vertexAttribOffset; 831 }; 832 833 // output merger state 834 struct SWR_RENDER_TARGET_BLEND_STATE 835 { 836 uint8_t writeDisableRed : 1; 837 uint8_t writeDisableGreen : 1; 838 uint8_t writeDisableBlue : 1; 839 uint8_t writeDisableAlpha : 1; 840 }; 841 static_assert(sizeof(SWR_RENDER_TARGET_BLEND_STATE) == 1, "Invalid SWR_RENDER_TARGET_BLEND_STATE size"); 842 843 enum SWR_MULTISAMPLE_COUNT 844 { 845 SWR_MULTISAMPLE_1X = 0, 846 SWR_MULTISAMPLE_2X, 847 SWR_MULTISAMPLE_4X, 848 SWR_MULTISAMPLE_8X, 849 SWR_MULTISAMPLE_16X, 850 SWR_MULTISAMPLE_TYPE_COUNT 851 }; 852 853 INLINE uint32_t GetNumSamples(SWR_MULTISAMPLE_COUNT sampleCount) // @llvm_func_start 854 { 855 static const uint32_t sampleCountLUT[SWR_MULTISAMPLE_TYPE_COUNT] {1, 2, 4, 8, 16}; 856 assert(sampleCount < SWR_MULTISAMPLE_TYPE_COUNT); 857 return sampleCountLUT[sampleCount]; 858 } // @llvm_func_end 859 860 struct SWR_BLEND_STATE 861 { 862 // constant blend factor color in RGBA float 863 float constantColor[4]; 864 865 // alpha test reference value in unorm8 or float32 866 uint32_t alphaTestReference; 867 uint32_t sampleMask; 868 // all RT's have the same sample count 869 ///@todo move this to Output Merger state when we refactor 870 SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum 871 872 SWR_RENDER_TARGET_BLEND_STATE renderTarget[SWR_NUM_RENDERTARGETS]; 873 }; 874 static_assert(sizeof(SWR_BLEND_STATE) == 36, "Invalid SWR_BLEND_STATE size"); 875 876 ////////////////////////////////////////////////////////////////////////// 877 /// FUNCTION POINTERS FOR SHADERS 878 879 #if USE_SIMD16_SHADERS 880 typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simd16vertex& out); 881 #else 882 typedef void(__cdecl *PFN_FETCH_FUNC)(HANDLE hPrivateData, SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out); 883 #endif 884 typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, SWR_VS_CONTEXT* pVsContext); 885 typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, SWR_HS_CONTEXT* pHsContext); 886 typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, SWR_DS_CONTEXT* pDsContext); 887 typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, SWR_GS_CONTEXT* pGsContext); 888 typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, SWR_CS_CONTEXT* pCsContext); 889 typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext); 890 typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext); 891 typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext); 892 typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*, 893 simdvector& vSrc, simdvector& vSrc1, simdscalar& vSrc0Alpha, uint32_t sample, 894 uint8_t* pDst, simdvector& vResult, simdscalari* vOMask, simdscalari* vCoverageMask); 895 typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar const &); 896 897 898 899 ////////////////////////////////////////////////////////////////////////// 900 /// FRONTEND_STATE 901 ///////////////////////////////////////////////////////////////////////// 902 struct SWR_FRONTEND_STATE 903 { 904 // skip clip test, perspective divide, and viewport transform 905 // intended for verts in screen space 906 bool vpTransformDisable; 907 bool bEnableCutIndex; 908 union 909 { 910 struct 911 { 912 uint32_t triFan : 2; 913 uint32_t lineStripList : 1; 914 uint32_t triStripList : 2; 915 }; 916 uint32_t bits; 917 } provokingVertex; 918 uint32_t topologyProvokingVertex; // provoking vertex for the draw topology 919 920 // Size of a vertex in simdvector units. Should be sized to the 921 // maximum of the input/output of the vertex shader. 922 uint32_t vsVertexSize; 923 }; 924 925 ////////////////////////////////////////////////////////////////////////// 926 /// VIEWPORT_MATRIX 927 ///////////////////////////////////////////////////////////////////////// 928 struct SWR_VIEWPORT_MATRIX 929 { 930 float m00; 931 float m11; 932 float m22; 933 float m30; 934 float m31; 935 float m32; 936 }; 937 938 ////////////////////////////////////////////////////////////////////////// 939 /// VIEWPORT_MATRIXES 940 ///////////////////////////////////////////////////////////////////////// 941 struct SWR_VIEWPORT_MATRICES 942 { 943 float m00[KNOB_NUM_VIEWPORTS_SCISSORS]; 944 float m11[KNOB_NUM_VIEWPORTS_SCISSORS]; 945 float m22[KNOB_NUM_VIEWPORTS_SCISSORS]; 946 float m30[KNOB_NUM_VIEWPORTS_SCISSORS]; 947 float m31[KNOB_NUM_VIEWPORTS_SCISSORS]; 948 float m32[KNOB_NUM_VIEWPORTS_SCISSORS]; 949 }; 950 951 ////////////////////////////////////////////////////////////////////////// 952 /// SWR_VIEWPORT 953 ///////////////////////////////////////////////////////////////////////// 954 struct SWR_VIEWPORT 955 { 956 float x; 957 float y; 958 float width; 959 float height; 960 float minZ; 961 float maxZ; 962 }; 963 964 ////////////////////////////////////////////////////////////////////////// 965 /// SWR_CULLMODE 966 ////////////////////////////////////////////////////////////////////////// 967 enum SWR_CULLMODE 968 { 969 SWR_CULLMODE_BOTH, 970 SWR_CULLMODE_NONE, 971 SWR_CULLMODE_FRONT, 972 SWR_CULLMODE_BACK 973 }; 974 975 enum SWR_FILLMODE 976 { 977 SWR_FILLMODE_POINT, 978 SWR_FILLMODE_WIREFRAME, 979 SWR_FILLMODE_SOLID 980 }; 981 982 enum SWR_FRONTWINDING 983 { 984 SWR_FRONTWINDING_CW, 985 SWR_FRONTWINDING_CCW 986 }; 987 988 989 enum SWR_PIXEL_LOCATION 990 { 991 SWR_PIXEL_LOCATION_CENTER, 992 SWR_PIXEL_LOCATION_UL, 993 }; 994 995 // fixed point screen space sample locations within a pixel 996 struct SWR_MULTISAMPLE_POS 997 { 998 public: 999 INLINE void SetXi(uint32_t sampleNum, uint32_t val) { _xi[sampleNum] = val; }; // @llvm_func 1000 INLINE void SetYi(uint32_t sampleNum, uint32_t val) { _yi[sampleNum] = val; }; // @llvm_func 1001 INLINE uint32_t Xi(uint32_t sampleNum) const { return _xi[sampleNum]; }; // @llvm_func 1002 INLINE uint32_t Yi(uint32_t sampleNum) const { return _yi[sampleNum]; }; // @llvm_func 1003 INLINE void SetX(uint32_t sampleNum, float val) { _x[sampleNum] = val; }; // @llvm_func 1004 INLINE void SetY(uint32_t sampleNum, float val) { _y[sampleNum] = val; }; // @llvm_func 1005 INLINE float X(uint32_t sampleNum) const { return _x[sampleNum]; }; // @llvm_func 1006 INLINE float Y(uint32_t sampleNum) const { return _y[sampleNum]; }; // @llvm_func 1007 typedef const float(&sampleArrayT)[SWR_MAX_NUM_MULTISAMPLES]; //@llvm_typedef 1008 INLINE sampleArrayT X() const { return _x; }; // @llvm_func 1009 INLINE sampleArrayT Y() const { return _y; }; // @llvm_func 1010 INLINE const __m128i& vXi(uint32_t sampleNum) const { return _vXi[sampleNum]; }; // @llvm_func 1011 INLINE const __m128i& vYi(uint32_t sampleNum) const { return _vYi[sampleNum]; }; // @llvm_func 1012 INLINE const simdscalar& vX(uint32_t sampleNum) const { return _vX[sampleNum]; }; // @llvm_func 1013 INLINE const simdscalar& vY(uint32_t sampleNum) const { return _vY[sampleNum]; }; // @llvm_func 1014 INLINE const __m128i& TileSampleOffsetsX() const { return tileSampleOffsetsX; }; // @llvm_func 1015 INLINE const __m128i& TileSampleOffsetsY() const { return tileSampleOffsetsY; }; // @llvm_func 1016 1017 INLINE void PrecalcSampleData(int numSamples); //@llvm_func 1018 1019 private: 1020 template <typename MaskT> 1021 INLINE __m128i expandThenBlend4(uint32_t* min, uint32_t* max); // @llvm_func 1022 INLINE void CalcTileSampleOffsets(int numSamples); // @llvm_func 1023 1024 // scalar sample values 1025 uint32_t _xi[SWR_MAX_NUM_MULTISAMPLES]; 1026 uint32_t _yi[SWR_MAX_NUM_MULTISAMPLES]; 1027 float _x[SWR_MAX_NUM_MULTISAMPLES]; 1028 float _y[SWR_MAX_NUM_MULTISAMPLES]; 1029 1030 // precalc'd / vectorized samples 1031 __m128i _vXi[SWR_MAX_NUM_MULTISAMPLES]; 1032 __m128i _vYi[SWR_MAX_NUM_MULTISAMPLES]; 1033 simdscalar _vX[SWR_MAX_NUM_MULTISAMPLES]; 1034 simdscalar _vY[SWR_MAX_NUM_MULTISAMPLES]; 1035 __m128i tileSampleOffsetsX; 1036 __m128i tileSampleOffsetsY; 1037 }; 1038 1039 ////////////////////////////////////////////////////////////////////////// 1040 /// SWR_RASTSTATE 1041 ////////////////////////////////////////////////////////////////////////// 1042 struct SWR_RASTSTATE 1043 { 1044 uint32_t cullMode : 2; 1045 uint32_t fillMode : 2; 1046 uint32_t frontWinding : 1; 1047 uint32_t scissorEnable : 1; 1048 uint32_t depthClipEnable : 1; 1049 uint32_t clipHalfZ : 1; 1050 uint32_t pointParam : 1; 1051 uint32_t pointSpriteEnable : 1; 1052 uint32_t pointSpriteTopOrigin : 1; 1053 uint32_t forcedSampleCount : 1; 1054 uint32_t pixelOffset : 1; 1055 uint32_t depthBiasPreAdjusted : 1; ///< depth bias constant is in float units, not per-format Z units 1056 uint32_t conservativeRast : 1; 1057 1058 float pointSize; 1059 float lineWidth; 1060 1061 float depthBias; 1062 float slopeScaledDepthBias; 1063 float depthBiasClamp; 1064 SWR_FORMAT depthFormat; // @llvm_enum 1065 1066 // sample count the rasterizer is running at 1067 SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum 1068 uint32_t pixelLocation; // UL or Center 1069 SWR_MULTISAMPLE_POS samplePositions; // @llvm_struct 1070 bool bIsCenterPattern; // @llvm_enum 1071 }; 1072 1073 1074 enum SWR_CONSTANT_SOURCE 1075 { 1076 SWR_CONSTANT_SOURCE_CONST_0000, 1077 SWR_CONSTANT_SOURCE_CONST_0001_FLOAT, 1078 SWR_CONSTANT_SOURCE_CONST_1111_FLOAT, 1079 SWR_CONSTANT_SOURCE_PRIM_ID 1080 }; 1081 1082 struct SWR_ATTRIB_SWIZZLE 1083 { 1084 uint16_t sourceAttrib : 5; // source attribute 1085 uint16_t constantSource : 2; // constant source to apply 1086 uint16_t componentOverrideMask : 4; // override component with constant source 1087 }; 1088 1089 // backend state 1090 struct SWR_BACKEND_STATE 1091 { 1092 uint32_t constantInterpolationMask; // bitmask indicating which attributes have constant interpolation 1093 uint32_t pointSpriteTexCoordMask; // bitmask indicating the attribute(s) which should be interpreted as tex coordinates 1094 1095 uint8_t numAttributes; // total number of attributes to send to backend (up to 32) 1096 uint8_t numComponents[32]; // number of components to setup per attribute, this reduces some calculations for unneeded components 1097 1098 bool swizzleEnable; // when enabled, core will parse the swizzle map when 1099 // setting up attributes for the backend, otherwise 1100 // all attributes up to numAttributes will be sent 1101 SWR_ATTRIB_SWIZZLE swizzleMap[32]; 1102 1103 bool readRenderTargetArrayIndex; // Forward render target array index from last FE stage to the backend 1104 bool readViewportArrayIndex; // Read viewport array index from last FE stage during binning 1105 1106 // Offset to the start of the attributes of the input vertices, in simdvector units 1107 uint32_t vertexAttribOffset; 1108 1109 // User clip/cull distance enables 1110 uint8_t cullDistanceMask; 1111 uint8_t clipDistanceMask; 1112 1113 // Offset to clip/cull attrib section of the vertex, in simdvector units 1114 uint32_t vertexClipCullOffset; 1115 }; 1116 1117 1118 union SWR_DEPTH_STENCIL_STATE 1119 { 1120 struct 1121 { 1122 // dword 0 1123 uint32_t depthWriteEnable : 1; 1124 uint32_t depthTestEnable : 1; 1125 uint32_t stencilWriteEnable : 1; 1126 uint32_t stencilTestEnable : 1; 1127 uint32_t doubleSidedStencilTestEnable : 1; 1128 1129 uint32_t depthTestFunc : 3; 1130 uint32_t stencilTestFunc : 3; 1131 1132 uint32_t backfaceStencilPassDepthPassOp : 3; 1133 uint32_t backfaceStencilPassDepthFailOp : 3; 1134 uint32_t backfaceStencilFailOp : 3; 1135 uint32_t backfaceStencilTestFunc : 3; 1136 uint32_t stencilPassDepthPassOp : 3; 1137 uint32_t stencilPassDepthFailOp : 3; 1138 uint32_t stencilFailOp : 3; 1139 1140 // dword 1 1141 uint8_t backfaceStencilWriteMask; 1142 uint8_t backfaceStencilTestMask; 1143 uint8_t stencilWriteMask; 1144 uint8_t stencilTestMask; 1145 1146 // dword 2 1147 uint8_t backfaceStencilRefValue; 1148 uint8_t stencilRefValue; 1149 }; 1150 uint32_t value[3]; 1151 }; 1152 1153 enum SWR_SHADING_RATE 1154 { 1155 SWR_SHADING_RATE_PIXEL, 1156 SWR_SHADING_RATE_SAMPLE, 1157 SWR_SHADING_RATE_COUNT, 1158 }; 1159 1160 enum SWR_INPUT_COVERAGE 1161 { 1162 SWR_INPUT_COVERAGE_NONE, 1163 SWR_INPUT_COVERAGE_NORMAL, 1164 SWR_INPUT_COVERAGE_INNER_CONSERVATIVE, 1165 SWR_INPUT_COVERAGE_COUNT, 1166 }; 1167 1168 enum SWR_PS_POSITION_OFFSET 1169 { 1170 SWR_PS_POSITION_SAMPLE_NONE, 1171 SWR_PS_POSITION_SAMPLE_OFFSET, 1172 SWR_PS_POSITION_CENTROID_OFFSET, 1173 SWR_PS_POSITION_OFFSET_COUNT, 1174 }; 1175 1176 enum SWR_BARYCENTRICS_MASK 1177 { 1178 SWR_BARYCENTRIC_PER_PIXEL_MASK = 0x1, 1179 SWR_BARYCENTRIC_CENTROID_MASK = 0x2, 1180 SWR_BARYCENTRIC_PER_SAMPLE_MASK = 0x4, 1181 }; 1182 1183 // pixel shader state 1184 struct SWR_PS_STATE 1185 { 1186 // dword 0-1 1187 PFN_PIXEL_KERNEL pfnPixelShader; // @llvm_pfn 1188 1189 // dword 2 1190 uint32_t killsPixel : 1; // pixel shader can kill pixels 1191 uint32_t inputCoverage : 2; // ps uses input coverage 1192 uint32_t writesODepth : 1; // pixel shader writes to depth 1193 uint32_t usesSourceDepth : 1; // pixel shader reads depth 1194 uint32_t shadingRate : 2; // shading per pixel / sample / coarse pixel 1195 uint32_t posOffset : 2; // type of offset (none, sample, centroid) to add to pixel position 1196 uint32_t barycentricsMask : 3; // which type(s) of barycentric coords does the PS interpolate attributes with 1197 uint32_t usesUAV : 1; // pixel shader accesses UAV 1198 uint32_t forceEarlyZ : 1; // force execution of early depth/stencil test 1199 1200 uint8_t renderTargetMask; // Mask of render targets written 1201 }; 1202 1203 // depth bounds state 1204 struct SWR_DEPTH_BOUNDS_STATE 1205 { 1206 bool depthBoundsTestEnable; 1207 float depthBoundsTestMinValue; 1208 float depthBoundsTestMaxValue; 1209 }; 1210 1211