1 /**************************************************************************** 2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * @file state.h 24 * 25 * @brief Definitions for API state. 26 * 27 ******************************************************************************/ 28 #pragma once 29 30 #include "common/formats.h" 31 #include "common/simdintrin.h" 32 33 ////////////////////////////////////////////////////////////////////////// 34 /// PRIMITIVE_TOPOLOGY. 35 ////////////////////////////////////////////////////////////////////////// 36 enum PRIMITIVE_TOPOLOGY 37 { 38 TOP_UNKNOWN = 0x0, 39 TOP_POINT_LIST = 0x1, 40 TOP_LINE_LIST = 0x2, 41 TOP_LINE_STRIP = 0x3, 42 TOP_TRIANGLE_LIST = 0x4, 43 TOP_TRIANGLE_STRIP = 0x5, 44 TOP_TRIANGLE_FAN = 0x6, 45 TOP_QUAD_LIST = 0x7, 46 TOP_QUAD_STRIP = 0x8, 47 TOP_LINE_LIST_ADJ = 0x9, 48 TOP_LISTSTRIP_ADJ = 0xA, 49 TOP_TRI_LIST_ADJ = 0xB, 50 TOP_TRI_STRIP_ADJ = 0xC, 51 TOP_TRI_STRIP_REVERSE = 0xD, 52 TOP_POLYGON = 0xE, 53 TOP_RECT_LIST = 0xF, 54 TOP_LINE_LOOP = 0x10, 55 TOP_POINT_LIST_BF = 0x11, 56 TOP_LINE_STRIP_CONT = 0x12, 57 TOP_LINE_STRIP_BF = 0x13, 58 TOP_LINE_STRIP_CONT_BF = 0x14, 59 TOP_TRIANGLE_FAN_NOSTIPPLE = 0x16, 60 TOP_TRIANGLE_DISC = 0x17, /// @todo What is this?? 61 62 TOP_PATCHLIST_BASE = 0x1F, // Invalid topology, used to calculate num verts for a patchlist. 63 TOP_PATCHLIST_1 = 0x20, // List of 1-vertex patches 64 TOP_PATCHLIST_2 = 0x21, 65 TOP_PATCHLIST_3 = 0x22, 66 TOP_PATCHLIST_4 = 0x23, 67 TOP_PATCHLIST_5 = 0x24, 68 TOP_PATCHLIST_6 = 0x25, 69 TOP_PATCHLIST_7 = 0x26, 70 TOP_PATCHLIST_8 = 0x27, 71 TOP_PATCHLIST_9 = 0x28, 72 TOP_PATCHLIST_10 = 0x29, 73 TOP_PATCHLIST_11 = 0x2A, 74 TOP_PATCHLIST_12 = 0x2B, 75 TOP_PATCHLIST_13 = 0x2C, 76 TOP_PATCHLIST_14 = 0x2D, 77 TOP_PATCHLIST_15 = 0x2E, 78 TOP_PATCHLIST_16 = 0x2F, 79 TOP_PATCHLIST_17 = 0x30, 80 TOP_PATCHLIST_18 = 0x31, 81 TOP_PATCHLIST_19 = 0x32, 82 TOP_PATCHLIST_20 = 0x33, 83 TOP_PATCHLIST_21 = 0x34, 84 TOP_PATCHLIST_22 = 0x35, 85 TOP_PATCHLIST_23 = 0x36, 86 TOP_PATCHLIST_24 = 0x37, 87 TOP_PATCHLIST_25 = 0x38, 88 TOP_PATCHLIST_26 = 0x39, 89 TOP_PATCHLIST_27 = 0x3A, 90 TOP_PATCHLIST_28 = 0x3B, 91 TOP_PATCHLIST_29 = 0x3C, 92 TOP_PATCHLIST_30 = 0x3D, 93 TOP_PATCHLIST_31 = 0x3E, 94 TOP_PATCHLIST_32 = 0x3F, // List of 32-vertex patches 95 }; 96 97 ////////////////////////////////////////////////////////////////////////// 98 /// SWR_SHADER_TYPE 99 ////////////////////////////////////////////////////////////////////////// 100 enum SWR_SHADER_TYPE 101 { 102 SHADER_VERTEX, 103 SHADER_GEOMETRY, 104 SHADER_DOMAIN, 105 SHADER_HULL, 106 SHADER_PIXEL, 107 SHADER_COMPUTE, 108 109 NUM_SHADER_TYPES, 110 }; 111 112 ////////////////////////////////////////////////////////////////////////// 113 /// SWR_RENDERTARGET_ATTACHMENT 114 /// @todo Its not clear what an "attachment" means. Its not common term. 115 ////////////////////////////////////////////////////////////////////////// 116 enum SWR_RENDERTARGET_ATTACHMENT 117 { 118 SWR_ATTACHMENT_COLOR0, 119 SWR_ATTACHMENT_COLOR1, 120 SWR_ATTACHMENT_COLOR2, 121 SWR_ATTACHMENT_COLOR3, 122 SWR_ATTACHMENT_COLOR4, 123 SWR_ATTACHMENT_COLOR5, 124 SWR_ATTACHMENT_COLOR6, 125 SWR_ATTACHMENT_COLOR7, 126 SWR_ATTACHMENT_DEPTH, 127 SWR_ATTACHMENT_STENCIL, 128 129 SWR_NUM_ATTACHMENTS 130 }; 131 132 #define SWR_NUM_RENDERTARGETS 8 133 134 #define SWR_ATTACHMENT_COLOR0_BIT 0x001 135 #define SWR_ATTACHMENT_COLOR1_BIT 0x002 136 #define SWR_ATTACHMENT_COLOR2_BIT 0x004 137 #define SWR_ATTACHMENT_COLOR3_BIT 0x008 138 #define SWR_ATTACHMENT_COLOR4_BIT 0x010 139 #define SWR_ATTACHMENT_COLOR5_BIT 0x020 140 #define SWR_ATTACHMENT_COLOR6_BIT 0x040 141 #define SWR_ATTACHMENT_COLOR7_BIT 0x080 142 #define SWR_ATTACHMENT_DEPTH_BIT 0x100 143 #define SWR_ATTACHMENT_STENCIL_BIT 0x200 144 #define SWR_ATTACHMENT_MASK_ALL 0x3ff 145 #define SWR_ATTACHMENT_MASK_COLOR 0x0ff 146 147 148 ////////////////////////////////////////////////////////////////////////// 149 /// @brief SWR Inner Tessellation factor ID 150 /// See above GetTessFactorOutputPosition code for documentation 151 enum SWR_INNER_TESSFACTOR_ID 152 { 153 SWR_QUAD_U_TRI_INSIDE, 154 SWR_QUAD_V_INSIDE, 155 156 SWR_NUM_INNER_TESS_FACTORS, 157 }; 158 159 ////////////////////////////////////////////////////////////////////////// 160 /// @brief SWR Outer Tessellation factor ID 161 /// See above GetTessFactorOutputPosition code for documentation 162 enum SWR_OUTER_TESSFACTOR_ID 163 { 164 SWR_QUAD_U_EQ0_TRI_U_LINE_DETAIL, 165 SWR_QUAD_V_EQ0_TRI_V_LINE_DENSITY, 166 SWR_QUAD_U_EQ1_TRI_W, 167 SWR_QUAD_V_EQ1, 168 169 SWR_NUM_OUTER_TESS_FACTORS, 170 }; 171 172 173 ///////////////////////////////////////////////////////////////////////// 174 /// simdvertex 175 /// @brief Defines a vertex element that holds all the data for SIMD vertices. 176 /// Contains position in clip space, hardcoded to attribute 0, 177 /// space for up to 32 attributes, as well as any SGV values generated 178 /// by the pipeline 179 ///////////////////////////////////////////////////////////////////////// 180 #define VERTEX_POSITION_SLOT 0 181 #define VERTEX_ATTRIB_START_SLOT 1 182 #define VERTEX_ATTRIB_END_SLOT 32 183 #define VERTEX_RTAI_SLOT 33 // GS writes RenderTargetArrayIndex here 184 #define VERTEX_PRIMID_SLOT 34 // GS writes PrimId here 185 #define VERTEX_CLIPCULL_DIST_LO_SLOT 35 // VS writes lower 4 clip/cull dist 186 #define VERTEX_CLIPCULL_DIST_HI_SLOT 36 // VS writes upper 4 clip/cull dist 187 #define VERTEX_POINT_SIZE_SLOT 37 // VS writes point size here 188 #define VERTEX_VIEWPORT_ARRAY_INDEX_SLOT 38 189 // SoAoSoA 190 struct simdvertex 191 { 192 simdvector attrib[KNOB_NUM_ATTRIBUTES]; 193 }; 194 195 ////////////////////////////////////////////////////////////////////////// 196 /// SWR_VS_CONTEXT 197 /// @brief Input to vertex shader 198 ///////////////////////////////////////////////////////////////////////// 199 struct SWR_VS_CONTEXT 200 { 201 simdvertex* pVin; // IN: SIMD input vertex data store 202 simdvertex* pVout; // OUT: SIMD output vertex data store 203 204 uint32_t InstanceID; // IN: Instance ID, constant across all verts of the SIMD 205 simdscalari VertexID; // IN: Vertex ID 206 simdscalari mask; // IN: Active mask for shader 207 }; 208 209 ///////////////////////////////////////////////////////////////////////// 210 /// ScalarCPoint 211 /// @brief defines a control point element as passed from the output 212 /// of the hull shader to the input of the domain shader 213 ///////////////////////////////////////////////////////////////////////// 214 struct ScalarAttrib 215 { 216 float x; 217 float y; 218 float z; 219 float w; 220 }; 221 222 struct ScalarCPoint 223 { 224 ScalarAttrib attrib[KNOB_NUM_ATTRIBUTES]; 225 }; 226 227 ////////////////////////////////////////////////////////////////////////// 228 /// SWR_TESSELLATION_FACTORS 229 /// @brief Tessellation factors structure (non-vector) 230 ///////////////////////////////////////////////////////////////////////// 231 struct SWR_TESSELLATION_FACTORS 232 { 233 float OuterTessFactors[SWR_NUM_OUTER_TESS_FACTORS]; 234 float InnerTessFactors[SWR_NUM_INNER_TESS_FACTORS]; 235 }; 236 237 #define MAX_NUM_VERTS_PER_PRIM 32 // support up to 32 control point patches 238 struct ScalarPatch 239 { 240 SWR_TESSELLATION_FACTORS tessFactors; 241 ScalarCPoint cp[MAX_NUM_VERTS_PER_PRIM]; 242 ScalarCPoint patchData; 243 }; 244 245 ////////////////////////////////////////////////////////////////////////// 246 /// SWR_HS_CONTEXT 247 /// @brief Input to hull shader 248 ///////////////////////////////////////////////////////////////////////// 249 struct SWR_HS_CONTEXT 250 { 251 simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: (SIMD) input primitive data 252 simdscalari PrimitiveID; // IN: (SIMD) primitive ID generated from the draw call 253 simdscalari mask; // IN: Active mask for shader 254 ScalarPatch* pCPout; // OUT: Output control point patch 255 // SIMD-sized-array of SCALAR patches 256 }; 257 258 ////////////////////////////////////////////////////////////////////////// 259 /// SWR_DS_CONTEXT 260 /// @brief Input to domain shader 261 ///////////////////////////////////////////////////////////////////////// 262 struct SWR_DS_CONTEXT 263 { 264 uint32_t PrimitiveID; // IN: (SCALAR) PrimitiveID for the patch associated with the DS invocation 265 uint32_t vectorOffset; // IN: (SCALAR) vector index offset into SIMD data. 266 uint32_t vectorStride; // IN: (SCALAR) stride (in vectors) of output data per attribute-component 267 ScalarPatch* pCpIn; // IN: (SCALAR) Control patch 268 simdscalar* pDomainU; // IN: (SIMD) Domain Point U coords 269 simdscalar* pDomainV; // IN: (SIMD) Domain Point V coords 270 simdscalari mask; // IN: Active mask for shader 271 simdscalar* pOutputData; // OUT: (SIMD) Vertex Attributes (2D array of vectors, one row per attribute-component) 272 }; 273 274 ////////////////////////////////////////////////////////////////////////// 275 /// SWR_GS_CONTEXT 276 /// @brief Input to geometry shader. 277 ///////////////////////////////////////////////////////////////////////// 278 struct SWR_GS_CONTEXT 279 { 280 simdvertex vert[MAX_NUM_VERTS_PER_PRIM]; // IN: input primitive data for SIMD prims 281 simdscalari PrimitiveID; // IN: input primitive ID generated from the draw call 282 uint32_t InstanceID; // IN: input instance ID 283 simdscalari mask; // IN: Active mask for shader 284 uint8_t* pStream; // OUT: output stream (contains vertices for all output streams) 285 uint8_t* pCutOrStreamIdBuffer; // OUT: cut or stream id buffer 286 simdscalari vertexCount; // OUT: num vertices emitted per SIMD lane 287 }; 288 289 struct PixelPositions 290 { 291 simdscalar UL; 292 simdscalar center; 293 simdscalar sample; 294 simdscalar centroid; 295 }; 296 297 #define SWR_MAX_NUM_MULTISAMPLES 16 298 299 ////////////////////////////////////////////////////////////////////////// 300 /// SWR_PS_CONTEXT 301 /// @brief Input to pixel shader. 302 ///////////////////////////////////////////////////////////////////////// 303 struct SWR_PS_CONTEXT 304 { 305 PixelPositions vX; // IN: x location(s) of pixels 306 PixelPositions vY; // IN: x location(s) of pixels 307 simdscalar vZ; // INOUT: z location of pixels 308 simdscalari activeMask; // OUT: mask for kill 309 simdscalar inputMask; // IN: input coverage mask for all samples 310 simdscalari oMask; // OUT: mask for output coverage 311 312 PixelPositions vI; // barycentric coords evaluated at pixel center, sample position, centroid 313 PixelPositions vJ; 314 PixelPositions vOneOverW; // IN: 1/w 315 316 const float* pAttribs; // IN: pointer to attribute barycentric coefficients 317 const float* pPerspAttribs; // IN: pointer to attribute/w barycentric coefficients 318 const float* pRecipW; // IN: pointer to 1/w coord for each vertex 319 const float *I; // IN: Barycentric A, B, and C coefs used to compute I 320 const float *J; // IN: Barycentric A, B, and C coefs used to compute J 321 float recipDet; // IN: 1/Det, used when barycentric interpolating attributes 322 const float* pSamplePosX; // IN: array of sample positions 323 const float* pSamplePosY; // IN: array of sample positions 324 simdvector shaded[SWR_NUM_RENDERTARGETS]; 325 // OUT: result color per rendertarget 326 327 uint32_t frontFace; // IN: front- 1, back- 0 328 uint32_t primID; // IN: primitive ID 329 uint32_t sampleIndex; // IN: sampleIndex 330 331 uint32_t rasterizerSampleCount; // IN: sample count used by the rasterizer 332 333 }; 334 335 ////////////////////////////////////////////////////////////////////////// 336 /// SWR_CS_CONTEXT 337 /// @brief Input to compute shader. 338 ///////////////////////////////////////////////////////////////////////// 339 struct SWR_CS_CONTEXT 340 { 341 // The ThreadGroupId is the current thread group index relative 342 // to all thread groups in the Dispatch call. The ThreadId, ThreadIdInGroup, 343 // and ThreadIdInGroupFlattened can be derived from ThreadGroupId in the shader. 344 345 // Compute shader accepts the following system values. 346 // o ThreadId - Current thread id relative to all other threads in dispatch. 347 // o ThreadGroupId - Current thread group id relative to all other groups in dispatch. 348 // o ThreadIdInGroup - Current thread relative to all threads in the current thread group. 349 // o ThreadIdInGroupFlattened - Flattened linear id derived from ThreadIdInGroup. 350 // 351 // All of these system values can be computed in the shader. They will be 352 // derived from the current tile counter. The tile counter is an atomic counter that 353 // resides in the draw context and is initialized to the product of the dispatch dims. 354 // 355 // tileCounter = dispatchDims.x * dispatchDims.y * dispatchDims.z 356 // 357 // Each CPU worker thread will atomically decrement this counter and passes the current 358 // count into the shader. When the count reaches 0 then all thread groups in the 359 // dispatch call have been completed. 360 361 uint32_t tileCounter; // The tile counter value for this thread group. 362 363 // Dispatch dimensions used by shader to compute system values from the tile counter. 364 uint32_t dispatchDims[3]; 365 366 uint8_t* pTGSM; // Thread Group Shared Memory pointer. 367 368 uint8_t* pSpillFillBuffer; // Spill/fill buffer for barrier support 369 }; 370 371 // enums 372 enum SWR_TILE_MODE 373 { 374 SWR_TILE_NONE = 0x0, // Linear mode (no tiling) 375 SWR_TILE_MODE_WMAJOR, // W major tiling 376 SWR_TILE_MODE_XMAJOR, // X major tiling 377 SWR_TILE_MODE_YMAJOR, // Y major tiling 378 SWR_TILE_SWRZ, // SWR-Z tiling 379 380 SWR_TILE_MODE_COUNT 381 }; 382 383 enum SWR_SURFACE_TYPE 384 { 385 SURFACE_1D = 0, 386 SURFACE_2D = 1, 387 SURFACE_3D = 2, 388 SURFACE_CUBE = 3, 389 SURFACE_BUFFER = 4, 390 SURFACE_STRUCTURED_BUFFER = 5, 391 SURFACE_NULL = 7 392 }; 393 394 enum SWR_ZFUNCTION 395 { 396 ZFUNC_ALWAYS, 397 ZFUNC_NEVER, 398 ZFUNC_LT, 399 ZFUNC_EQ, 400 ZFUNC_LE, 401 ZFUNC_GT, 402 ZFUNC_NE, 403 ZFUNC_GE, 404 NUM_ZFUNC 405 }; 406 407 enum SWR_STENCILOP 408 { 409 STENCILOP_KEEP, 410 STENCILOP_ZERO, 411 STENCILOP_REPLACE, 412 STENCILOP_INCRSAT, 413 STENCILOP_DECRSAT, 414 STENCILOP_INCR, 415 STENCILOP_DECR, 416 STENCILOP_INVERT 417 }; 418 419 enum SWR_BLEND_FACTOR 420 { 421 BLENDFACTOR_ONE, 422 BLENDFACTOR_SRC_COLOR, 423 BLENDFACTOR_SRC_ALPHA, 424 BLENDFACTOR_DST_ALPHA, 425 BLENDFACTOR_DST_COLOR, 426 BLENDFACTOR_SRC_ALPHA_SATURATE, 427 BLENDFACTOR_CONST_COLOR, 428 BLENDFACTOR_CONST_ALPHA, 429 BLENDFACTOR_SRC1_COLOR, 430 BLENDFACTOR_SRC1_ALPHA, 431 BLENDFACTOR_ZERO, 432 BLENDFACTOR_INV_SRC_COLOR, 433 BLENDFACTOR_INV_SRC_ALPHA, 434 BLENDFACTOR_INV_DST_ALPHA, 435 BLENDFACTOR_INV_DST_COLOR, 436 BLENDFACTOR_INV_CONST_COLOR, 437 BLENDFACTOR_INV_CONST_ALPHA, 438 BLENDFACTOR_INV_SRC1_COLOR, 439 BLENDFACTOR_INV_SRC1_ALPHA 440 }; 441 442 enum SWR_BLEND_OP 443 { 444 BLENDOP_ADD, 445 BLENDOP_SUBTRACT, 446 BLENDOP_REVSUBTRACT, 447 BLENDOP_MIN, 448 BLENDOP_MAX, 449 }; 450 451 enum SWR_LOGIC_OP 452 { 453 LOGICOP_CLEAR, 454 LOGICOP_NOR, 455 LOGICOP_AND_INVERTED, 456 LOGICOP_COPY_INVERTED, 457 LOGICOP_AND_REVERSE, 458 LOGICOP_INVERT, 459 LOGICOP_XOR, 460 LOGICOP_NAND, 461 LOGICOP_AND, 462 LOGICOP_EQUIV, 463 LOGICOP_NOOP, 464 LOGICOP_OR_INVERTED, 465 LOGICOP_COPY, 466 LOGICOP_OR_REVERSE, 467 LOGICOP_OR, 468 LOGICOP_SET, 469 }; 470 471 ////////////////////////////////////////////////////////////////////////// 472 /// SWR_AUX_MODE 473 /// @brief Specifies how the auxiliary buffer is used by the driver. 474 ////////////////////////////////////////////////////////////////////////// 475 enum SWR_AUX_MODE 476 { 477 AUX_MODE_NONE, 478 AUX_MODE_COLOR, 479 AUX_MODE_UAV, 480 AUX_MODE_DEPTH, 481 }; 482 483 ////////////////////////////////////////////////////////////////////////// 484 /// SWR_SURFACE_STATE 485 ////////////////////////////////////////////////////////////////////////// 486 struct SWR_SURFACE_STATE 487 { 488 uint8_t *pBaseAddress; 489 SWR_SURFACE_TYPE type; // @llvm_enum 490 SWR_FORMAT format; // @llvm_enum 491 uint32_t width; 492 uint32_t height; 493 uint32_t depth; 494 uint32_t numSamples; 495 uint32_t samplePattern; 496 uint32_t pitch; 497 uint32_t qpitch; 498 uint32_t minLod; // for sampled surfaces, the most detailed LOD that can be accessed by sampler 499 uint32_t maxLod; // for sampled surfaces, the max LOD that can be accessed 500 float resourceMinLod; // for sampled surfaces, the most detailed fractional mip that can be accessed by sampler 501 uint32_t lod; // for render targets, the lod being rendered to 502 uint32_t arrayIndex; // for render targets, the array index being rendered to for arrayed surfaces 503 SWR_TILE_MODE tileMode; // @llvm_enum 504 uint32_t halign; 505 uint32_t valign; 506 uint32_t xOffset; 507 uint32_t yOffset; 508 509 uint32_t lodOffsets[2][15]; // lod offsets for sampled surfaces 510 511 uint8_t *pAuxBaseAddress; // Used for compression, append/consume counter, etc. 512 SWR_AUX_MODE auxMode; // @llvm_enum 513 514 bool bInterleavedSamples; // are MSAA samples stored interleaved or planar 515 }; 516 517 // vertex fetch state 518 // WARNING- any changes to this struct need to be reflected 519 // in the fetch shader jit 520 struct SWR_VERTEX_BUFFER_STATE 521 { 522 uint32_t index; 523 uint32_t pitch; 524 const uint8_t *pData; 525 uint32_t size; 526 uint32_t numaNode; 527 uint32_t maxVertex; // size / pitch. precalculated value used by fetch shader for OOB checks 528 uint32_t partialInboundsSize; // size % pitch. precalculated value used by fetch shader for partially OOB vertices 529 }; 530 531 struct SWR_INDEX_BUFFER_STATE 532 { 533 // Format type for indices (e.g. UINT16, UINT32, etc.) 534 SWR_FORMAT format; // @llvm_enum 535 const void *pIndices; 536 uint32_t size; 537 }; 538 539 540 ////////////////////////////////////////////////////////////////////////// 541 /// SWR_FETCH_CONTEXT 542 /// @brief Input to fetch shader. 543 /// @note WARNING - Changes to this struct need to be reflected in the 544 /// fetch shader jit. 545 ///////////////////////////////////////////////////////////////////////// 546 struct SWR_FETCH_CONTEXT 547 { 548 const SWR_VERTEX_BUFFER_STATE* pStreams; // IN: array of bound vertex buffers 549 const int32_t* pIndices; // IN: pointer to index buffer for indexed draws 550 const int32_t* pLastIndex; // IN: pointer to end of index buffer, used for bounds checking 551 uint32_t CurInstance; // IN: current instance 552 uint32_t BaseVertex; // IN: base vertex 553 uint32_t StartVertex; // IN: start vertex 554 uint32_t StartInstance; // IN: start instance 555 simdscalari VertexID; // OUT: vector of vertex IDs 556 simdscalari CutMask; // OUT: vector mask of indices which have the cut index value 557 }; 558 559 ////////////////////////////////////////////////////////////////////////// 560 /// SWR_STATS 561 /// 562 /// @brief All statistics generated by SWR go here. These are public 563 /// to driver. 564 ///////////////////////////////////////////////////////////////////////// 565 OSALIGNLINE(struct) SWR_STATS 566 { 567 // Occlusion Query 568 uint64_t DepthPassCount; // Number of passing depth tests. Not exact. 569 570 // Pipeline Stats 571 uint64_t PsInvocations; // Number of Pixel Shader invocations 572 uint64_t CsInvocations; // Number of Compute Shader invocations 573 574 }; 575 576 ////////////////////////////////////////////////////////////////////////// 577 /// SWR_STATS 578 /// 579 /// @brief All statistics generated by FE. 580 ///////////////////////////////////////////////////////////////////////// 581 OSALIGNLINE(struct) SWR_STATS_FE 582 { 583 uint64_t IaVertices; // Number of Fetch Shader vertices 584 uint64_t IaPrimitives; // Number of PA primitives. 585 uint64_t VsInvocations; // Number of Vertex Shader invocations 586 uint64_t HsInvocations; // Number of Hull Shader invocations 587 uint64_t DsInvocations; // Number of Domain Shader invocations 588 uint64_t GsInvocations; // Number of Geometry Shader invocations 589 uint64_t GsPrimitives; // Number of prims GS outputs. 590 uint64_t CInvocations; // Number of clipper invocations 591 uint64_t CPrimitives; // Number of clipper primitives. 592 593 // Streamout Stats 594 uint64_t SoPrimStorageNeeded[4]; 595 uint64_t SoNumPrimsWritten[4]; 596 }; 597 598 ////////////////////////////////////////////////////////////////////////// 599 /// STREAMOUT_BUFFERS 600 ///////////////////////////////////////////////////////////////////////// 601 602 #define MAX_SO_STREAMS 4 603 #define MAX_SO_BUFFERS 4 604 #define MAX_ATTRIBUTES 32 605 606 struct SWR_STREAMOUT_BUFFER 607 { 608 bool enable; 609 bool soWriteEnable; 610 611 // Pointers to streamout buffers. 612 uint32_t* pBuffer; 613 614 // Size of buffer in dwords. 615 uint32_t bufferSize; 616 617 // Vertex pitch of buffer in dwords. 618 uint32_t pitch; 619 620 // Offset into buffer in dwords. SOS will increment this offset. 621 uint32_t streamOffset; 622 623 // Offset to the SO write offset. If not null then we update offset here. 624 uint32_t* pWriteOffset; 625 626 }; 627 628 ////////////////////////////////////////////////////////////////////////// 629 /// STREAMOUT_STATE 630 ///////////////////////////////////////////////////////////////////////// 631 struct SWR_STREAMOUT_STATE 632 { 633 // This disables stream output. 634 bool soEnable; 635 636 // which streams are enabled for streamout 637 bool streamEnable[MAX_SO_STREAMS]; 638 639 // If set then do not send any streams to the rasterizer. 640 bool rasterizerDisable; 641 642 // Specifies which stream to send to the rasterizer. 643 uint32_t streamToRasterizer; 644 645 // The stream masks specify which attributes are sent to which streams. 646 // These masks help the FE to setup the pPrimData buffer that is passed 647 // the Stream Output Shader (SOS) function. 648 uint32_t streamMasks[MAX_SO_STREAMS]; 649 650 // Number of attributes, including position, per vertex that are streamed out. 651 // This should match number of bits in stream mask. 652 uint32_t streamNumEntries[MAX_SO_STREAMS]; 653 }; 654 655 ////////////////////////////////////////////////////////////////////////// 656 /// STREAMOUT_CONTEXT - Passed to SOS 657 ///////////////////////////////////////////////////////////////////////// 658 struct SWR_STREAMOUT_CONTEXT 659 { 660 uint32_t* pPrimData; 661 SWR_STREAMOUT_BUFFER* pBuffer[MAX_SO_STREAMS]; 662 663 // Num prims written for this stream 664 uint32_t numPrimsWritten; 665 666 // Num prims that should have been written if there were no overflow. 667 uint32_t numPrimStorageNeeded; 668 }; 669 670 ////////////////////////////////////////////////////////////////////////// 671 /// SWR_GS_STATE - Geometry shader state 672 ///////////////////////////////////////////////////////////////////////// 673 struct SWR_GS_STATE 674 { 675 bool gsEnable; 676 677 // number of input attributes per vertex. used by the frontend to 678 // optimize assembling primitives for GS 679 uint32_t numInputAttribs; 680 681 // output topology - can be point, tristrip, or linestrip 682 PRIMITIVE_TOPOLOGY outputTopology; // @llvm_enum 683 684 // maximum number of verts that can be emitted by a single instance of the GS 685 uint32_t maxNumVerts; 686 687 // instance count 688 uint32_t instanceCount; 689 690 // geometry shader emits renderTargetArrayIndex 691 bool emitsRenderTargetArrayIndex; 692 693 // geometry shader emits PrimitiveID 694 bool emitsPrimitiveID; 695 696 // geometry shader emits ViewportArrayIndex 697 bool emitsViewportArrayIndex; 698 699 // if true, geometry shader emits a single stream, with separate cut buffer. 700 // if false, geometry shader emits vertices for multiple streams to the stream buffer, with a separate StreamID buffer 701 // to map vertices to streams 702 bool isSingleStream; 703 704 // when single stream is enabled, singleStreamID dictates which stream is being output. 705 // field ignored if isSingleStream is false 706 uint32_t singleStreamID; 707 }; 708 709 710 ////////////////////////////////////////////////////////////////////////// 711 /// SWR_TS_OUTPUT_TOPOLOGY - Defines data output by the tessellator / DS 712 ///////////////////////////////////////////////////////////////////////// 713 enum SWR_TS_OUTPUT_TOPOLOGY 714 { 715 SWR_TS_OUTPUT_POINT, 716 SWR_TS_OUTPUT_LINE, 717 SWR_TS_OUTPUT_TRI_CW, 718 SWR_TS_OUTPUT_TRI_CCW, 719 720 SWR_TS_OUTPUT_TOPOLOGY_COUNT 721 }; 722 723 ////////////////////////////////////////////////////////////////////////// 724 /// SWR_TS_PARTITIONING - Defines tessellation algorithm 725 ///////////////////////////////////////////////////////////////////////// 726 enum SWR_TS_PARTITIONING 727 { 728 SWR_TS_INTEGER, 729 SWR_TS_ODD_FRACTIONAL, 730 SWR_TS_EVEN_FRACTIONAL, 731 732 SWR_TS_PARTITIONING_COUNT 733 }; 734 735 ////////////////////////////////////////////////////////////////////////// 736 /// SWR_TS_DOMAIN - Defines Tessellation Domain 737 ///////////////////////////////////////////////////////////////////////// 738 enum SWR_TS_DOMAIN 739 { 740 SWR_TS_QUAD, 741 SWR_TS_TRI, 742 SWR_TS_ISOLINE, 743 744 SWR_TS_DOMAIN_COUNT 745 }; 746 747 ////////////////////////////////////////////////////////////////////////// 748 /// SWR_TS_STATE - Tessellation state 749 ///////////////////////////////////////////////////////////////////////// 750 struct SWR_TS_STATE 751 { 752 bool tsEnable; 753 SWR_TS_OUTPUT_TOPOLOGY tsOutputTopology; // @llvm_enum 754 SWR_TS_PARTITIONING partitioning; // @llvm_enum 755 SWR_TS_DOMAIN domain; // @llvm_enum 756 757 PRIMITIVE_TOPOLOGY postDSTopology; // @llvm_enum 758 759 uint32_t numHsInputAttribs; 760 uint32_t numHsOutputAttribs; 761 uint32_t numDsOutputAttribs; 762 }; 763 764 // output merger state 765 struct SWR_RENDER_TARGET_BLEND_STATE 766 { 767 uint8_t writeDisableRed : 1; 768 uint8_t writeDisableGreen : 1; 769 uint8_t writeDisableBlue : 1; 770 uint8_t writeDisableAlpha : 1; 771 }; 772 static_assert(sizeof(SWR_RENDER_TARGET_BLEND_STATE) == 1, "Invalid SWR_RENDER_TARGET_BLEND_STATE size"); 773 774 enum SWR_MULTISAMPLE_COUNT 775 { 776 SWR_MULTISAMPLE_1X = 0, 777 SWR_MULTISAMPLE_2X, 778 SWR_MULTISAMPLE_4X, 779 SWR_MULTISAMPLE_8X, 780 SWR_MULTISAMPLE_16X, 781 SWR_MULTISAMPLE_TYPE_COUNT 782 }; 783 784 struct SWR_BLEND_STATE 785 { 786 // constant blend factor color in RGBA float 787 float constantColor[4]; 788 789 // alpha test reference value in unorm8 or float32 790 uint32_t alphaTestReference; 791 uint32_t sampleMask; 792 // all RT's have the same sample count 793 ///@todo move this to Output Merger state when we refactor 794 SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum 795 796 SWR_RENDER_TARGET_BLEND_STATE renderTarget[SWR_NUM_RENDERTARGETS]; 797 }; 798 static_assert(sizeof(SWR_BLEND_STATE) == 36, "Invalid SWR_BLEND_STATE size"); 799 800 ////////////////////////////////////////////////////////////////////////// 801 /// FUNCTION POINTERS FOR SHADERS 802 803 typedef void(__cdecl *PFN_FETCH_FUNC)(SWR_FETCH_CONTEXT& fetchInfo, simdvertex& out); 804 typedef void(__cdecl *PFN_VERTEX_FUNC)(HANDLE hPrivateData, SWR_VS_CONTEXT* pVsContext); 805 typedef void(__cdecl *PFN_HS_FUNC)(HANDLE hPrivateData, SWR_HS_CONTEXT* pHsContext); 806 typedef void(__cdecl *PFN_DS_FUNC)(HANDLE hPrivateData, SWR_DS_CONTEXT* pDsContext); 807 typedef void(__cdecl *PFN_GS_FUNC)(HANDLE hPrivateData, SWR_GS_CONTEXT* pGsContext); 808 typedef void(__cdecl *PFN_CS_FUNC)(HANDLE hPrivateData, SWR_CS_CONTEXT* pCsContext); 809 typedef void(__cdecl *PFN_SO_FUNC)(SWR_STREAMOUT_CONTEXT& soContext); 810 typedef void(__cdecl *PFN_PIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext); 811 typedef void(__cdecl *PFN_CPIXEL_KERNEL)(HANDLE hPrivateData, SWR_PS_CONTEXT *pContext); 812 typedef void(__cdecl *PFN_BLEND_JIT_FUNC)(const SWR_BLEND_STATE*, 813 simdvector& vSrc, simdvector& vSrc1, simdscalar& vSrc0Alpha, uint32_t sample, 814 uint8_t* pDst, simdvector& vResult, simdscalari* vOMask, simdscalari* vCoverageMask); 815 typedef simdscalar(*PFN_QUANTIZE_DEPTH)(simdscalar); 816 817 818 819 ////////////////////////////////////////////////////////////////////////// 820 /// FRONTEND_STATE 821 ///////////////////////////////////////////////////////////////////////// 822 struct SWR_FRONTEND_STATE 823 { 824 // skip clip test, perspective divide, and viewport transform 825 // intended for verts in screen space 826 bool vpTransformDisable; 827 bool bEnableCutIndex; 828 union 829 { 830 struct 831 { 832 uint32_t triFan : 2; 833 uint32_t lineStripList : 1; 834 uint32_t triStripList : 2; 835 }; 836 uint32_t bits; 837 } provokingVertex; 838 uint32_t topologyProvokingVertex; // provoking vertex for the draw topology 839 }; 840 841 ////////////////////////////////////////////////////////////////////////// 842 /// VIEWPORT_MATRIX 843 ///////////////////////////////////////////////////////////////////////// 844 struct SWR_VIEWPORT_MATRIX 845 { 846 float m00; 847 float m11; 848 float m22; 849 float m30; 850 float m31; 851 float m32; 852 }; 853 854 ////////////////////////////////////////////////////////////////////////// 855 /// VIEWPORT_MATRIXES 856 ///////////////////////////////////////////////////////////////////////// 857 struct SWR_VIEWPORT_MATRICES 858 { 859 float m00[KNOB_NUM_VIEWPORTS_SCISSORS]; 860 float m11[KNOB_NUM_VIEWPORTS_SCISSORS]; 861 float m22[KNOB_NUM_VIEWPORTS_SCISSORS]; 862 float m30[KNOB_NUM_VIEWPORTS_SCISSORS]; 863 float m31[KNOB_NUM_VIEWPORTS_SCISSORS]; 864 float m32[KNOB_NUM_VIEWPORTS_SCISSORS]; 865 }; 866 867 ////////////////////////////////////////////////////////////////////////// 868 /// SWR_VIEWPORT 869 ///////////////////////////////////////////////////////////////////////// 870 struct SWR_VIEWPORT 871 { 872 float x; 873 float y; 874 float width; 875 float height; 876 float minZ; 877 float maxZ; 878 }; 879 880 ////////////////////////////////////////////////////////////////////////// 881 /// SWR_CULLMODE 882 ////////////////////////////////////////////////////////////////////////// 883 enum SWR_CULLMODE 884 { 885 SWR_CULLMODE_BOTH, 886 SWR_CULLMODE_NONE, 887 SWR_CULLMODE_FRONT, 888 SWR_CULLMODE_BACK 889 }; 890 891 enum SWR_FILLMODE 892 { 893 SWR_FILLMODE_POINT, 894 SWR_FILLMODE_WIREFRAME, 895 SWR_FILLMODE_SOLID 896 }; 897 898 enum SWR_FRONTWINDING 899 { 900 SWR_FRONTWINDING_CW, 901 SWR_FRONTWINDING_CCW 902 }; 903 904 905 enum SWR_MSAA_SAMPLE_PATTERN 906 { 907 SWR_MSAA_CENTER_PATTERN, 908 SWR_MSAA_STANDARD_PATTERN, 909 SWR_MSAA_SAMPLE_PATTERN_COUNT 910 }; 911 912 enum SWR_PIXEL_LOCATION 913 { 914 SWR_PIXEL_LOCATION_CENTER, 915 SWR_PIXEL_LOCATION_UL, 916 }; 917 918 // fixed point screen space sample locations within a pixel 919 struct SWR_MULTISAMPLE_POS 920 { 921 uint32_t x; 922 uint32_t y; 923 }; 924 925 enum SWR_MSAA_RASTMODE 926 { 927 SWR_MSAA_RASTMODE_OFF_PIXEL, 928 SWR_MSAA_RASTMODE_OFF_PATTERN, 929 SWR_MSAA_RASTMODE_ON_PIXEL, 930 SWR_MSAA_RASTMODE_ON_PATTERN 931 }; 932 933 ////////////////////////////////////////////////////////////////////////// 934 /// SWR_RASTSTATE 935 ////////////////////////////////////////////////////////////////////////// 936 struct SWR_RASTSTATE 937 { 938 uint32_t cullMode : 2; 939 uint32_t fillMode : 2; 940 uint32_t frontWinding : 1; 941 uint32_t scissorEnable : 1; 942 uint32_t depthClipEnable : 1; 943 uint32_t clipHalfZ : 1; 944 uint32_t pointParam : 1; 945 uint32_t pointSpriteEnable : 1; 946 uint32_t pointSpriteTopOrigin : 1; 947 uint32_t msaaRastEnable : 1; 948 uint32_t forcedSampleCount : 1; 949 uint32_t pixelOffset : 1; 950 uint32_t depthBiasPreAdjusted : 1; ///< depth bias constant is in float units, not per-format Z units 951 uint32_t conservativeRast : 1; 952 953 float pointSize; 954 float lineWidth; 955 956 float depthBias; 957 float slopeScaledDepthBias; 958 float depthBiasClamp; 959 SWR_FORMAT depthFormat; // @llvm_enum 960 961 ///@todo: MSAA lines 962 // multisample state for MSAA lines 963 SWR_MSAA_RASTMODE rastMode; // @llvm_enum 964 965 // sample count the rasterizer is running at 966 SWR_MULTISAMPLE_COUNT sampleCount; // @llvm_enum 967 uint32_t pixelLocation; // UL or Center 968 SWR_MULTISAMPLE_POS iSamplePos[SWR_MAX_NUM_MULTISAMPLES]; 969 SWR_MSAA_SAMPLE_PATTERN samplePattern; // @llvm_enum 970 971 // user clip/cull distance enables 972 uint8_t cullDistanceMask; 973 uint8_t clipDistanceMask; 974 }; 975 976 enum SWR_CONSTANT_SOURCE 977 { 978 SWR_CONSTANT_SOURCE_CONST_0000, 979 SWR_CONSTANT_SOURCE_CONST_0001_FLOAT, 980 SWR_CONSTANT_SOURCE_CONST_1111_FLOAT, 981 SWR_CONSTANT_SOURCE_PRIM_ID 982 }; 983 984 struct SWR_ATTRIB_SWIZZLE 985 { 986 uint16_t sourceAttrib : 5; // source attribute 987 uint16_t constantSource : 2; // constant source to apply 988 uint16_t componentOverrideMask : 4; // override component with constant source 989 }; 990 991 // backend state 992 struct SWR_BACKEND_STATE 993 { 994 uint32_t constantInterpolationMask; // bitmask indicating which attributes have constant interpolation 995 uint32_t pointSpriteTexCoordMask; // bitmask indicating the attribute(s) which should be interpreted as tex coordinates 996 997 uint8_t numAttributes; // total number of attributes to send to backend (up to 32) 998 uint8_t numComponents[32]; // number of components to setup per attribute, this reduces some calculations for unneeded components 999 1000 bool swizzleEnable; // when enabled, core will parse the swizzle map when 1001 // setting up attributes for the backend, otherwise 1002 // all attributes up to numAttributes will be sent 1003 SWR_ATTRIB_SWIZZLE swizzleMap[32]; 1004 }; 1005 1006 1007 union SWR_DEPTH_STENCIL_STATE 1008 { 1009 struct 1010 { 1011 // dword 0 1012 uint32_t depthWriteEnable : 1; 1013 uint32_t depthTestEnable : 1; 1014 uint32_t stencilWriteEnable : 1; 1015 uint32_t stencilTestEnable : 1; 1016 uint32_t doubleSidedStencilTestEnable : 1; 1017 1018 uint32_t depthTestFunc : 3; 1019 uint32_t stencilTestFunc : 3; 1020 1021 uint32_t backfaceStencilPassDepthPassOp : 3; 1022 uint32_t backfaceStencilPassDepthFailOp : 3; 1023 uint32_t backfaceStencilFailOp : 3; 1024 uint32_t backfaceStencilTestFunc : 3; 1025 uint32_t stencilPassDepthPassOp : 3; 1026 uint32_t stencilPassDepthFailOp : 3; 1027 uint32_t stencilFailOp : 3; 1028 1029 // dword 1 1030 uint8_t backfaceStencilWriteMask; 1031 uint8_t backfaceStencilTestMask; 1032 uint8_t stencilWriteMask; 1033 uint8_t stencilTestMask; 1034 1035 // dword 2 1036 uint8_t backfaceStencilRefValue; 1037 uint8_t stencilRefValue; 1038 }; 1039 uint32_t value[3]; 1040 }; 1041 1042 enum SWR_SHADING_RATE 1043 { 1044 SWR_SHADING_RATE_PIXEL, 1045 SWR_SHADING_RATE_SAMPLE, 1046 SWR_SHADING_RATE_COUNT, 1047 }; 1048 1049 enum SWR_INPUT_COVERAGE 1050 { 1051 SWR_INPUT_COVERAGE_NONE, 1052 SWR_INPUT_COVERAGE_NORMAL, 1053 SWR_INPUT_COVERAGE_INNER_CONSERVATIVE, 1054 SWR_INPUT_COVERAGE_COUNT, 1055 }; 1056 1057 enum SWR_PS_POSITION_OFFSET 1058 { 1059 SWR_PS_POSITION_SAMPLE_NONE, 1060 SWR_PS_POSITION_SAMPLE_OFFSET, 1061 SWR_PS_POSITION_CENTROID_OFFSET, 1062 SWR_PS_POSITION_OFFSET_COUNT, 1063 }; 1064 1065 enum SWR_BARYCENTRICS_MASK 1066 { 1067 SWR_BARYCENTRIC_PER_PIXEL_MASK = 0x1, 1068 SWR_BARYCENTRIC_CENTROID_MASK = 0x2, 1069 SWR_BARYCENTRIC_PER_SAMPLE_MASK = 0x4, 1070 }; 1071 1072 // pixel shader state 1073 struct SWR_PS_STATE 1074 { 1075 // dword 0-1 1076 PFN_PIXEL_KERNEL pfnPixelShader; // @llvm_pfn 1077 1078 // dword 2 1079 uint32_t killsPixel : 1; // pixel shader can kill pixels 1080 uint32_t inputCoverage : 2; // ps uses input coverage 1081 uint32_t writesODepth : 1; // pixel shader writes to depth 1082 uint32_t usesSourceDepth : 1; // pixel shader reads depth 1083 uint32_t shadingRate : 2; // shading per pixel / sample / coarse pixel 1084 uint32_t numRenderTargets : 4; // number of render target outputs in use (0-8) 1085 uint32_t posOffset : 2; // type of offset (none, sample, centroid) to add to pixel position 1086 uint32_t barycentricsMask : 3; // which type(s) of barycentric coords does the PS interpolate attributes with 1087 uint32_t usesUAV : 1; // pixel shader accesses UAV 1088 uint32_t forceEarlyZ : 1; // force execution of early depth/stencil test 1089 1090 }; 1091 1092 // depth bounds state 1093 struct SWR_DEPTH_BOUNDS_STATE 1094 { 1095 bool depthBoundsTestEnable; 1096 float depthBoundsTestMinValue; 1097 float depthBoundsTestMaxValue; 1098 }; 1099 1100