1 /**************************************************************************** 2 * Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * @file api.cpp 24 * 25 * @brief API implementation 26 * 27 ******************************************************************************/ 28 29 #include <cfloat> 30 #include <cmath> 31 #include <cstdio> 32 #include <new> 33 34 #include "core/api.h" 35 #include "core/backend.h" 36 #include "core/context.h" 37 #include "core/depthstencil.h" 38 #include "core/frontend.h" 39 #include "core/rasterizer.h" 40 #include "core/rdtsc_core.h" 41 #include "core/threads.h" 42 #include "core/tilemgr.h" 43 #include "core/clip.h" 44 #include "core/utils.h" 45 46 #include "common/os.h" 47 48 static const SWR_RECT g_MaxScissorRect = { 0, 0, KNOB_MAX_SCISSOR_X, KNOB_MAX_SCISSOR_Y }; 49 50 void SetupDefaultState(SWR_CONTEXT *pContext); 51 52 static INLINE SWR_CONTEXT* GetContext(HANDLE hContext) 53 { 54 return (SWR_CONTEXT*)hContext; 55 } 56 57 void WakeAllThreads(SWR_CONTEXT *pContext) 58 { 59 pContext->FifosNotEmpty.notify_all(); 60 } 61 62 ////////////////////////////////////////////////////////////////////////// 63 /// @brief Create SWR Context. 64 /// @param pCreateInfo - pointer to creation info. 65 HANDLE SwrCreateContext( 66 SWR_CREATECONTEXT_INFO* pCreateInfo) 67 { 68 RDTSC_RESET(); 69 RDTSC_INIT(0); 70 71 void* pContextMem = AlignedMalloc(sizeof(SWR_CONTEXT), KNOB_SIMD_WIDTH * 4); 72 memset(pContextMem, 0, sizeof(SWR_CONTEXT)); 73 SWR_CONTEXT *pContext = new (pContextMem) SWR_CONTEXT(); 74 75 pContext->privateStateSize = pCreateInfo->privateStateSize; 76 77 pContext->MAX_DRAWS_IN_FLIGHT = KNOB_MAX_DRAWS_IN_FLIGHT; 78 if (pCreateInfo->MAX_DRAWS_IN_FLIGHT != 0) 79 { 80 pContext->MAX_DRAWS_IN_FLIGHT = pCreateInfo->MAX_DRAWS_IN_FLIGHT; 81 } 82 83 pContext->dcRing.Init(pContext->MAX_DRAWS_IN_FLIGHT); 84 pContext->dsRing.Init(pContext->MAX_DRAWS_IN_FLIGHT); 85 86 pContext->pMacroTileManagerArray = (MacroTileMgr*)AlignedMalloc(sizeof(MacroTileMgr) * pContext->MAX_DRAWS_IN_FLIGHT, 64); 87 pContext->pDispatchQueueArray = (DispatchQueue*)AlignedMalloc(sizeof(DispatchQueue) * pContext->MAX_DRAWS_IN_FLIGHT, 64); 88 89 for (uint32_t dc = 0; dc < pContext->MAX_DRAWS_IN_FLIGHT; ++dc) 90 { 91 pContext->dcRing[dc].pArena = new CachingArena(pContext->cachingArenaAllocator); 92 new (&pContext->pMacroTileManagerArray[dc]) MacroTileMgr(*pContext->dcRing[dc].pArena); 93 new (&pContext->pDispatchQueueArray[dc]) DispatchQueue(); 94 95 pContext->dsRing[dc].pArena = new CachingArena(pContext->cachingArenaAllocator); 96 } 97 98 if (pCreateInfo->pThreadInfo) 99 { 100 pContext->threadInfo = *pCreateInfo->pThreadInfo; 101 } 102 else 103 { 104 pContext->threadInfo.MAX_WORKER_THREADS = KNOB_MAX_WORKER_THREADS; 105 pContext->threadInfo.BASE_NUMA_NODE = KNOB_BASE_NUMA_NODE; 106 pContext->threadInfo.BASE_CORE = KNOB_BASE_CORE; 107 pContext->threadInfo.BASE_THREAD = KNOB_BASE_THREAD; 108 pContext->threadInfo.MAX_NUMA_NODES = KNOB_MAX_NUMA_NODES; 109 pContext->threadInfo.MAX_CORES_PER_NUMA_NODE = KNOB_MAX_CORES_PER_NUMA_NODE; 110 pContext->threadInfo.MAX_THREADS_PER_CORE = KNOB_MAX_THREADS_PER_CORE; 111 pContext->threadInfo.SINGLE_THREADED = KNOB_SINGLE_THREADED; 112 } 113 114 if (pCreateInfo->pApiThreadInfo) 115 { 116 pContext->apiThreadInfo = *pCreateInfo->pApiThreadInfo; 117 } 118 else 119 { 120 pContext->apiThreadInfo.bindAPIThread0 = true; 121 pContext->apiThreadInfo.numAPIReservedThreads = 1; 122 pContext->apiThreadInfo.numAPIThreadsPerCore = 1; 123 } 124 125 memset(&pContext->WaitLock, 0, sizeof(pContext->WaitLock)); 126 memset(&pContext->FifosNotEmpty, 0, sizeof(pContext->FifosNotEmpty)); 127 new (&pContext->WaitLock) std::mutex(); 128 new (&pContext->FifosNotEmpty) std::condition_variable(); 129 130 CreateThreadPool(pContext, &pContext->threadPool); 131 132 if (pContext->apiThreadInfo.bindAPIThread0) 133 { 134 BindApiThread(pContext, 0); 135 } 136 137 pContext->ppScratch = new uint8_t*[pContext->NumWorkerThreads]; 138 pContext->pStats = (SWR_STATS*)AlignedMalloc(sizeof(SWR_STATS) * pContext->NumWorkerThreads, 64); 139 140 #if defined(KNOB_ENABLE_AR) 141 // Setup ArchRast thread contexts which includes +1 for API thread. 142 pContext->pArContext = new HANDLE[pContext->NumWorkerThreads+1]; 143 pContext->pArContext[pContext->NumWorkerThreads] = ArchRast::CreateThreadContext(ArchRast::AR_THREAD::API); 144 #endif 145 146 // Allocate scratch space for workers. 147 ///@note We could lazily allocate this but its rather small amount of memory. 148 for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i) 149 { 150 #if defined(_WIN32) 151 uint32_t numaNode = pContext->threadPool.pThreadData ? 152 pContext->threadPool.pThreadData[i].numaId : 0; 153 pContext->ppScratch[i] = (uint8_t*)VirtualAllocExNuma( 154 GetCurrentProcess(), nullptr, 32 * sizeof(KILOBYTE), 155 MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE, 156 numaNode); 157 #else 158 pContext->ppScratch[i] = (uint8_t*)AlignedMalloc(32 * sizeof(KILOBYTE), KNOB_SIMD_WIDTH * 4); 159 #endif 160 161 #if defined(KNOB_ENABLE_AR) 162 // Initialize worker thread context for ArchRast. 163 pContext->pArContext[i] = ArchRast::CreateThreadContext(ArchRast::AR_THREAD::WORKER); 164 #endif 165 } 166 167 #if defined(KNOB_ENABLE_AR) 168 // cache the API thread event manager, for use with sim layer 169 pCreateInfo->hArEventManager = pContext->pArContext[pContext->NumWorkerThreads + 1]; 170 #endif 171 172 // State setup AFTER context is fully initialized 173 SetupDefaultState(pContext); 174 175 // initialize hot tile manager 176 pContext->pHotTileMgr = new HotTileMgr(); 177 178 // initialize callback functions 179 pContext->pfnLoadTile = pCreateInfo->pfnLoadTile; 180 pContext->pfnStoreTile = pCreateInfo->pfnStoreTile; 181 pContext->pfnClearTile = pCreateInfo->pfnClearTile; 182 pContext->pfnUpdateSoWriteOffset = pCreateInfo->pfnUpdateSoWriteOffset; 183 pContext->pfnUpdateStats = pCreateInfo->pfnUpdateStats; 184 pContext->pfnUpdateStatsFE = pCreateInfo->pfnUpdateStatsFE; 185 186 187 // pass pointer to bucket manager back to caller 188 #ifdef KNOB_ENABLE_RDTSC 189 pCreateInfo->pBucketMgr = &gBucketMgr; 190 #endif 191 192 pCreateInfo->contextSaveSize = sizeof(API_STATE); 193 194 StartThreadPool(pContext, &pContext->threadPool); 195 196 return (HANDLE)pContext; 197 } 198 199 void CopyState(DRAW_STATE& dst, const DRAW_STATE& src) 200 { 201 memcpy(&dst.state, &src.state, sizeof(API_STATE)); 202 } 203 204 template<bool IsDraw> 205 void QueueWork(SWR_CONTEXT *pContext) 206 { 207 DRAW_CONTEXT* pDC = pContext->pCurDrawContext; 208 uint32_t dcIndex = pDC->drawId % pContext->MAX_DRAWS_IN_FLIGHT; 209 210 if (IsDraw) 211 { 212 pDC->pTileMgr = &pContext->pMacroTileManagerArray[dcIndex]; 213 pDC->pTileMgr->initialize(); 214 } 215 216 // Each worker thread looks at a DC for both FE and BE work at different times and so we 217 // multiply threadDone by 2. When the threadDone counter has reached 0 then all workers 218 // have moved past this DC. (i.e. Each worker has checked this DC for both FE and BE work and 219 // then moved on if all work is done.) 220 pContext->pCurDrawContext->threadsDone = pContext->NumFEThreads + pContext->NumBEThreads; 221 222 if (IsDraw) 223 { 224 InterlockedIncrement(&pContext->drawsOutstandingFE); 225 } 226 227 _ReadWriteBarrier(); 228 { 229 std::unique_lock<std::mutex> lock(pContext->WaitLock); 230 pContext->dcRing.Enqueue(); 231 } 232 233 if (pContext->threadInfo.SINGLE_THREADED) 234 { 235 // flush denormals to 0 236 uint32_t mxcsr = _mm_getcsr(); 237 _mm_setcsr(mxcsr | _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON); 238 239 if (IsDraw) 240 { 241 uint32_t curDraw[2] = { pContext->pCurDrawContext->drawId, pContext->pCurDrawContext->drawId }; 242 WorkOnFifoFE(pContext, 0, curDraw[0]); 243 WorkOnFifoBE(pContext, 0, curDraw[1], pContext->singleThreadLockedTiles, 0, 0); 244 } 245 else 246 { 247 uint32_t curDispatch = pContext->pCurDrawContext->drawId; 248 WorkOnCompute(pContext, 0, curDispatch); 249 } 250 251 // Dequeue the work here, if not already done, since we're single threaded (i.e. no workers). 252 while (CompleteDrawContext(pContext, pContext->pCurDrawContext) > 0) {} 253 254 // restore csr 255 _mm_setcsr(mxcsr); 256 } 257 else 258 { 259 AR_API_BEGIN(APIDrawWakeAllThreads, pDC->drawId); 260 WakeAllThreads(pContext); 261 AR_API_END(APIDrawWakeAllThreads, 1); 262 } 263 264 // Set current draw context to NULL so that next state call forces a new draw context to be created and populated. 265 pContext->pPrevDrawContext = pContext->pCurDrawContext; 266 pContext->pCurDrawContext = nullptr; 267 } 268 269 INLINE void QueueDraw(SWR_CONTEXT* pContext) 270 { 271 QueueWork<true>(pContext); 272 } 273 274 INLINE void QueueDispatch(SWR_CONTEXT* pContext) 275 { 276 QueueWork<false>(pContext); 277 } 278 279 DRAW_CONTEXT* GetDrawContext(SWR_CONTEXT *pContext, bool isSplitDraw = false) 280 { 281 AR_API_BEGIN(APIGetDrawContext, 0); 282 // If current draw context is null then need to obtain a new draw context to use from ring. 283 if (pContext->pCurDrawContext == nullptr) 284 { 285 // Need to wait for a free entry. 286 while (pContext->dcRing.IsFull()) 287 { 288 _mm_pause(); 289 } 290 291 uint64_t curDraw = pContext->dcRing.GetHead(); 292 uint32_t dcIndex = curDraw % pContext->MAX_DRAWS_IN_FLIGHT; 293 294 if ((pContext->frameCount - pContext->lastFrameChecked) > 2 || 295 (curDraw - pContext->lastDrawChecked) > 0x10000) 296 { 297 // Take this opportunity to clean-up old arena allocations 298 pContext->cachingArenaAllocator.FreeOldBlocks(); 299 300 pContext->lastFrameChecked = pContext->frameCount; 301 pContext->lastDrawChecked = curDraw; 302 } 303 304 DRAW_CONTEXT* pCurDrawContext = &pContext->dcRing[dcIndex]; 305 pContext->pCurDrawContext = pCurDrawContext; 306 307 // Assign next available entry in DS ring to this DC. 308 uint32_t dsIndex = pContext->curStateId % pContext->MAX_DRAWS_IN_FLIGHT; 309 pCurDrawContext->pState = &pContext->dsRing[dsIndex]; 310 311 // Copy previous state to current state. 312 if (pContext->pPrevDrawContext) 313 { 314 DRAW_CONTEXT* pPrevDrawContext = pContext->pPrevDrawContext; 315 316 // If we're splitting our draw then we can just use the same state from the previous 317 // draw. In this case, we won't increment the DS ring index so the next non-split 318 // draw can receive the state. 319 if (isSplitDraw == false) 320 { 321 CopyState(*pCurDrawContext->pState, *pPrevDrawContext->pState); 322 323 // Should have been cleaned up previously 324 SWR_ASSERT(pCurDrawContext->pState->pArena->IsEmpty() == true); 325 326 pCurDrawContext->pState->pPrivateState = nullptr; 327 328 pContext->curStateId++; // Progress state ring index forward. 329 } 330 else 331 { 332 // If its a split draw then just copy the state pointer over 333 // since its the same draw. 334 pCurDrawContext->pState = pPrevDrawContext->pState; 335 SWR_ASSERT(pPrevDrawContext->cleanupState == false); 336 } 337 } 338 else 339 { 340 SWR_ASSERT(pCurDrawContext->pState->pArena->IsEmpty() == true); 341 pContext->curStateId++; // Progress state ring index forward. 342 } 343 344 SWR_ASSERT(pCurDrawContext->pArena->IsEmpty() == true); 345 346 // Reset dependency 347 pCurDrawContext->dependent = false; 348 pCurDrawContext->dependentFE = false; 349 350 pCurDrawContext->pContext = pContext; 351 pCurDrawContext->isCompute = false; // Dispatch has to set this to true. 352 353 pCurDrawContext->doneFE = false; 354 pCurDrawContext->FeLock = 0; 355 pCurDrawContext->threadsDone = 0; 356 pCurDrawContext->retireCallback.pfnCallbackFunc = nullptr; 357 358 pCurDrawContext->dynState.Reset(pContext->NumWorkerThreads); 359 360 // Assign unique drawId for this DC 361 pCurDrawContext->drawId = pContext->dcRing.GetHead(); 362 363 pCurDrawContext->cleanupState = true; 364 } 365 else 366 { 367 SWR_ASSERT(isSplitDraw == false, "Split draw should only be used when obtaining a new DC"); 368 } 369 370 AR_API_END(APIGetDrawContext, 0); 371 return pContext->pCurDrawContext; 372 } 373 374 API_STATE* GetDrawState(SWR_CONTEXT *pContext) 375 { 376 DRAW_CONTEXT* pDC = GetDrawContext(pContext); 377 SWR_ASSERT(pDC->pState != nullptr); 378 379 return &pDC->pState->state; 380 } 381 382 void SwrDestroyContext(HANDLE hContext) 383 { 384 SWR_CONTEXT *pContext = GetContext(hContext); 385 DRAW_CONTEXT* pDC = GetDrawContext(pContext); 386 387 pDC->FeWork.type = SHUTDOWN; 388 pDC->FeWork.pfnWork = ProcessShutdown; 389 390 //enqueue 391 QueueDraw(pContext); 392 393 DestroyThreadPool(pContext, &pContext->threadPool); 394 395 // free the fifos 396 for (uint32_t i = 0; i < pContext->MAX_DRAWS_IN_FLIGHT; ++i) 397 { 398 AlignedFree(pContext->dcRing[i].dynState.pStats); 399 delete pContext->dcRing[i].pArena; 400 delete pContext->dsRing[i].pArena; 401 pContext->pMacroTileManagerArray[i].~MacroTileMgr(); 402 pContext->pDispatchQueueArray[i].~DispatchQueue(); 403 } 404 405 AlignedFree(pContext->pDispatchQueueArray); 406 AlignedFree(pContext->pMacroTileManagerArray); 407 408 // Free scratch space. 409 for (uint32_t i = 0; i < pContext->NumWorkerThreads; ++i) 410 { 411 #if defined(_WIN32) 412 VirtualFree(pContext->ppScratch[i], 0, MEM_RELEASE); 413 #else 414 AlignedFree(pContext->ppScratch[i]); 415 #endif 416 417 #if defined(KNOB_ENABLE_AR) 418 ArchRast::DestroyThreadContext(pContext->pArContext[i]); 419 #endif 420 } 421 422 delete[] pContext->ppScratch; 423 AlignedFree(pContext->pStats); 424 425 delete(pContext->pHotTileMgr); 426 427 pContext->~SWR_CONTEXT(); 428 AlignedFree(GetContext(hContext)); 429 } 430 431 void SwrBindApiThread(HANDLE hContext, uint32_t apiThreadId) 432 { 433 SWR_CONTEXT *pContext = GetContext(hContext); 434 BindApiThread(pContext, apiThreadId); 435 } 436 437 void SWR_API SwrSaveState( 438 HANDLE hContext, 439 void* pOutputStateBlock, 440 size_t memSize) 441 { 442 SWR_CONTEXT *pContext = GetContext(hContext); 443 auto pSrc = GetDrawState(pContext); 444 SWR_ASSERT(pOutputStateBlock && memSize >= sizeof(*pSrc)); 445 446 memcpy(pOutputStateBlock, pSrc, sizeof(*pSrc)); 447 } 448 449 void SWR_API SwrRestoreState( 450 HANDLE hContext, 451 const void* pStateBlock, 452 size_t memSize) 453 { 454 SWR_CONTEXT *pContext = GetContext(hContext); 455 auto pDst = GetDrawState(pContext); 456 SWR_ASSERT(pStateBlock && memSize >= sizeof(*pDst)); 457 458 memcpy(pDst, pStateBlock, sizeof(*pDst)); 459 } 460 461 void SetupDefaultState(SWR_CONTEXT *pContext) 462 { 463 API_STATE* pState = GetDrawState(pContext); 464 465 pState->rastState.cullMode = SWR_CULLMODE_NONE; 466 pState->rastState.frontWinding = SWR_FRONTWINDING_CCW; 467 468 pState->depthBoundsState.depthBoundsTestEnable = false; 469 pState->depthBoundsState.depthBoundsTestMinValue = 0.0f; 470 pState->depthBoundsState.depthBoundsTestMaxValue = 1.0f; 471 } 472 473 void SwrSync(HANDLE hContext, PFN_CALLBACK_FUNC pfnFunc, uint64_t userData, uint64_t userData2, uint64_t userData3) 474 { 475 SWR_ASSERT(pfnFunc != nullptr); 476 477 SWR_CONTEXT *pContext = GetContext(hContext); 478 DRAW_CONTEXT* pDC = GetDrawContext(pContext); 479 480 AR_API_BEGIN(APISync, 0); 481 482 pDC->FeWork.type = SYNC; 483 pDC->FeWork.pfnWork = ProcessSync; 484 485 // Setup callback function 486 pDC->retireCallback.pfnCallbackFunc = pfnFunc; 487 pDC->retireCallback.userData = userData; 488 pDC->retireCallback.userData2 = userData2; 489 pDC->retireCallback.userData3 = userData3; 490 491 AR_API_EVENT(SwrSyncEvent(pDC->drawId)); 492 493 //enqueue 494 QueueDraw(pContext); 495 496 AR_API_END(APISync, 1); 497 } 498 499 void SwrStallBE(HANDLE hContext) 500 { 501 SWR_CONTEXT* pContext = GetContext(hContext); 502 DRAW_CONTEXT* pDC = GetDrawContext(pContext); 503 504 pDC->dependent = true; 505 } 506 507 void SwrWaitForIdle(HANDLE hContext) 508 { 509 SWR_CONTEXT *pContext = GetContext(hContext); 510 511 AR_API_BEGIN(APIWaitForIdle, 0); 512 513 while (!pContext->dcRing.IsEmpty()) 514 { 515 _mm_pause(); 516 } 517 518 AR_API_END(APIWaitForIdle, 1); 519 } 520 521 void SwrWaitForIdleFE(HANDLE hContext) 522 { 523 SWR_CONTEXT *pContext = GetContext(hContext); 524 525 AR_API_BEGIN(APIWaitForIdle, 0); 526 527 while (pContext->drawsOutstandingFE > 0) 528 { 529 _mm_pause(); 530 } 531 532 AR_API_END(APIWaitForIdle, 1); 533 } 534 535 void SwrSetVertexBuffers( 536 HANDLE hContext, 537 uint32_t numBuffers, 538 const SWR_VERTEX_BUFFER_STATE* pVertexBuffers) 539 { 540 API_STATE* pState = GetDrawState(GetContext(hContext)); 541 542 for (uint32_t i = 0; i < numBuffers; ++i) 543 { 544 const SWR_VERTEX_BUFFER_STATE *pVB = &pVertexBuffers[i]; 545 pState->vertexBuffers[pVB->index] = *pVB; 546 } 547 } 548 549 void SwrSetIndexBuffer( 550 HANDLE hContext, 551 const SWR_INDEX_BUFFER_STATE* pIndexBuffer) 552 { 553 API_STATE* pState = GetDrawState(GetContext(hContext)); 554 555 pState->indexBuffer = *pIndexBuffer; 556 } 557 558 void SwrSetFetchFunc( 559 HANDLE hContext, 560 PFN_FETCH_FUNC pfnFetchFunc) 561 { 562 API_STATE* pState = GetDrawState(GetContext(hContext)); 563 564 pState->pfnFetchFunc = pfnFetchFunc; 565 } 566 567 void SwrSetSoFunc( 568 HANDLE hContext, 569 PFN_SO_FUNC pfnSoFunc, 570 uint32_t streamIndex) 571 { 572 API_STATE* pState = GetDrawState(GetContext(hContext)); 573 574 SWR_ASSERT(streamIndex < MAX_SO_STREAMS); 575 576 pState->pfnSoFunc[streamIndex] = pfnSoFunc; 577 } 578 579 void SwrSetSoState( 580 HANDLE hContext, 581 SWR_STREAMOUT_STATE* pSoState) 582 { 583 API_STATE* pState = GetDrawState(GetContext(hContext)); 584 585 pState->soState = *pSoState; 586 } 587 588 void SwrSetSoBuffers( 589 HANDLE hContext, 590 SWR_STREAMOUT_BUFFER* pSoBuffer, 591 uint32_t slot) 592 { 593 API_STATE* pState = GetDrawState(GetContext(hContext)); 594 595 SWR_ASSERT((slot < 4), "There are only 4 SO buffer slots [0, 3]\nSlot requested: %d", slot); 596 597 pState->soBuffer[slot] = *pSoBuffer; 598 } 599 600 void SwrSetVertexFunc( 601 HANDLE hContext, 602 PFN_VERTEX_FUNC pfnVertexFunc) 603 { 604 API_STATE* pState = GetDrawState(GetContext(hContext)); 605 606 pState->pfnVertexFunc = pfnVertexFunc; 607 } 608 609 void SwrSetFrontendState( 610 HANDLE hContext, 611 SWR_FRONTEND_STATE *pFEState) 612 { 613 API_STATE* pState = GetDrawState(GetContext(hContext)); 614 pState->frontendState = *pFEState; 615 } 616 617 void SwrSetGsState( 618 HANDLE hContext, 619 SWR_GS_STATE *pGSState) 620 { 621 API_STATE* pState = GetDrawState(GetContext(hContext)); 622 pState->gsState = *pGSState; 623 } 624 625 void SwrSetGsFunc( 626 HANDLE hContext, 627 PFN_GS_FUNC pfnGsFunc) 628 { 629 API_STATE* pState = GetDrawState(GetContext(hContext)); 630 pState->pfnGsFunc = pfnGsFunc; 631 } 632 633 void SwrSetCsFunc( 634 HANDLE hContext, 635 PFN_CS_FUNC pfnCsFunc, 636 uint32_t totalThreadsInGroup, 637 uint32_t totalSpillFillSize, 638 uint32_t scratchSpaceSizePerInstance, 639 uint32_t numInstances) 640 { 641 API_STATE* pState = GetDrawState(GetContext(hContext)); 642 pState->pfnCsFunc = pfnCsFunc; 643 pState->totalThreadsInGroup = totalThreadsInGroup; 644 pState->totalSpillFillSize = totalSpillFillSize; 645 pState->scratchSpaceSize = scratchSpaceSizePerInstance; 646 pState->scratchSpaceNumInstances = numInstances; 647 } 648 649 void SwrSetTsState( 650 HANDLE hContext, 651 SWR_TS_STATE *pState) 652 { 653 API_STATE* pApiState = GetDrawState(GetContext(hContext)); 654 pApiState->tsState = *pState; 655 } 656 657 void SwrSetHsFunc( 658 HANDLE hContext, 659 PFN_HS_FUNC pfnFunc) 660 { 661 API_STATE* pApiState = GetDrawState(GetContext(hContext)); 662 pApiState->pfnHsFunc = pfnFunc; 663 } 664 665 void SwrSetDsFunc( 666 HANDLE hContext, 667 PFN_DS_FUNC pfnFunc) 668 { 669 API_STATE* pApiState = GetDrawState(GetContext(hContext)); 670 pApiState->pfnDsFunc = pfnFunc; 671 } 672 673 void SwrSetDepthStencilState( 674 HANDLE hContext, 675 SWR_DEPTH_STENCIL_STATE *pDSState) 676 { 677 API_STATE* pState = GetDrawState(GetContext(hContext)); 678 679 pState->depthStencilState = *pDSState; 680 } 681 682 void SwrSetBackendState( 683 HANDLE hContext, 684 SWR_BACKEND_STATE *pBEState) 685 { 686 API_STATE* pState = GetDrawState(GetContext(hContext)); 687 688 pState->backendState = *pBEState; 689 } 690 691 void SwrSetDepthBoundsState( 692 HANDLE hContext, 693 SWR_DEPTH_BOUNDS_STATE *pDBState) 694 { 695 API_STATE* pState = GetDrawState(GetContext(hContext)); 696 697 pState->depthBoundsState = *pDBState; 698 } 699 700 void SwrSetPixelShaderState( 701 HANDLE hContext, 702 SWR_PS_STATE *pPSState) 703 { 704 API_STATE *pState = GetDrawState(GetContext(hContext)); 705 pState->psState = *pPSState; 706 } 707 708 void SwrSetBlendState( 709 HANDLE hContext, 710 SWR_BLEND_STATE *pBlendState) 711 { 712 API_STATE *pState = GetDrawState(GetContext(hContext)); 713 memcpy(&pState->blendState, pBlendState, sizeof(SWR_BLEND_STATE)); 714 } 715 716 void SwrSetBlendFunc( 717 HANDLE hContext, 718 uint32_t renderTarget, 719 PFN_BLEND_JIT_FUNC pfnBlendFunc) 720 { 721 SWR_ASSERT(renderTarget < SWR_NUM_RENDERTARGETS); 722 API_STATE *pState = GetDrawState(GetContext(hContext)); 723 pState->pfnBlendFunc[renderTarget] = pfnBlendFunc; 724 } 725 726 // update guardband multipliers for the viewport 727 void updateGuardbands(API_STATE *pState) 728 { 729 uint32_t numGbs = pState->backendState.readViewportArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1; 730 731 for(uint32_t i = 0; i < numGbs; ++i) 732 { 733 // guardband center is viewport center 734 pState->gbState.left[i] = KNOB_GUARDBAND_WIDTH / pState->vp[i].width; 735 pState->gbState.right[i] = KNOB_GUARDBAND_WIDTH / pState->vp[i].width; 736 pState->gbState.top[i] = KNOB_GUARDBAND_HEIGHT / pState->vp[i].height; 737 pState->gbState.bottom[i] = KNOB_GUARDBAND_HEIGHT / pState->vp[i].height; 738 } 739 } 740 741 void SwrSetRastState( 742 HANDLE hContext, 743 const SWR_RASTSTATE *pRastState) 744 { 745 SWR_CONTEXT *pContext = GetContext(hContext); 746 API_STATE* pState = GetDrawState(pContext); 747 748 memcpy(&pState->rastState, pRastState, sizeof(SWR_RASTSTATE)); 749 } 750 751 void SwrSetViewports( 752 HANDLE hContext, 753 uint32_t numViewports, 754 const SWR_VIEWPORT* pViewports, 755 const SWR_VIEWPORT_MATRICES* pMatrices) 756 { 757 SWR_ASSERT(numViewports <= KNOB_NUM_VIEWPORTS_SCISSORS, 758 "Invalid number of viewports."); 759 760 SWR_CONTEXT *pContext = GetContext(hContext); 761 API_STATE* pState = GetDrawState(pContext); 762 763 memcpy(&pState->vp[0], pViewports, sizeof(SWR_VIEWPORT) * numViewports); 764 // @todo Faster to copy portions of the SOA or just copy all of it? 765 memcpy(&pState->vpMatrices, pMatrices, sizeof(SWR_VIEWPORT_MATRICES)); 766 767 updateGuardbands(pState); 768 } 769 770 void SwrSetScissorRects( 771 HANDLE hContext, 772 uint32_t numScissors, 773 const SWR_RECT* pScissors) 774 { 775 SWR_ASSERT(numScissors <= KNOB_NUM_VIEWPORTS_SCISSORS, 776 "Invalid number of scissor rects."); 777 778 API_STATE* pState = GetDrawState(GetContext(hContext)); 779 memcpy(&pState->scissorRects[0], pScissors, numScissors * sizeof(pScissors[0])); 780 }; 781 782 void SetupMacroTileScissors(DRAW_CONTEXT *pDC) 783 { 784 API_STATE *pState = &pDC->pState->state; 785 uint32_t numScissors = pState->backendState.readViewportArrayIndex ? KNOB_NUM_VIEWPORTS_SCISSORS : 1; 786 pState->scissorsTileAligned = true; 787 788 for (uint32_t index = 0; index < numScissors; ++index) 789 { 790 SWR_RECT &scissorInFixedPoint = pState->scissorsInFixedPoint[index]; 791 792 // Set up scissor dimensions based on scissor or viewport 793 if (pState->rastState.scissorEnable) 794 { 795 scissorInFixedPoint = pState->scissorRects[index]; 796 } 797 else 798 { 799 // the vp width and height must be added to origin un-rounded then the result round to -inf. 800 // The cast to int works for rounding assuming all [left, right, top, bottom] are positive. 801 scissorInFixedPoint.xmin = (int32_t)pState->vp[index].x; 802 scissorInFixedPoint.xmax = (int32_t)(pState->vp[index].x + pState->vp[index].width); 803 scissorInFixedPoint.ymin = (int32_t)pState->vp[index].y; 804 scissorInFixedPoint.ymax = (int32_t)(pState->vp[index].y + pState->vp[index].height); 805 } 806 807 // Clamp to max rect 808 scissorInFixedPoint &= g_MaxScissorRect; 809 810 // Test for tile alignment 811 bool tileAligned; 812 tileAligned = (scissorInFixedPoint.xmin % KNOB_TILE_X_DIM) == 0; 813 tileAligned &= (scissorInFixedPoint.ymin % KNOB_TILE_Y_DIM) == 0; 814 tileAligned &= (scissorInFixedPoint.xmax % KNOB_TILE_X_DIM) == 0; 815 tileAligned &= (scissorInFixedPoint.ymax % KNOB_TILE_Y_DIM) == 0; 816 817 pState->scissorsTileAligned &= tileAligned; 818 819 // Scale to fixed point 820 scissorInFixedPoint.xmin *= FIXED_POINT_SCALE; 821 scissorInFixedPoint.xmax *= FIXED_POINT_SCALE; 822 scissorInFixedPoint.ymin *= FIXED_POINT_SCALE; 823 scissorInFixedPoint.ymax *= FIXED_POINT_SCALE; 824 825 // Make scissor inclusive 826 scissorInFixedPoint.xmax -= 1; 827 scissorInFixedPoint.ymax -= 1; 828 } 829 } 830 831 832 // templated backend function tables 833 834 void SetupPipeline(DRAW_CONTEXT *pDC) 835 { 836 DRAW_STATE* pState = pDC->pState; 837 const SWR_RASTSTATE &rastState = pState->state.rastState; 838 const SWR_PS_STATE &psState = pState->state.psState; 839 BACKEND_FUNCS& backendFuncs = pState->backendFuncs; 840 841 // setup backend 842 if (psState.pfnPixelShader == nullptr) 843 { 844 backendFuncs.pfnBackend = gBackendNullPs[pState->state.rastState.sampleCount]; 845 } 846 else 847 { 848 const uint32_t forcedSampleCount = (rastState.forcedSampleCount) ? 1 : 0; 849 const bool bMultisampleEnable = ((rastState.sampleCount > SWR_MULTISAMPLE_1X) || forcedSampleCount) ? 1 : 0; 850 const uint32_t centroid = ((psState.barycentricsMask & SWR_BARYCENTRIC_CENTROID_MASK) > 0) ? 1 : 0; 851 const uint32_t canEarlyZ = (psState.forceEarlyZ || (!psState.writesODepth && !psState.usesUAV)) ? 1 : 0; 852 SWR_BARYCENTRICS_MASK barycentricsMask = (SWR_BARYCENTRICS_MASK)psState.barycentricsMask; 853 854 // select backend function 855 switch(psState.shadingRate) 856 { 857 case SWR_SHADING_RATE_PIXEL: 858 if(bMultisampleEnable) 859 { 860 // always need to generate I & J per sample for Z interpolation 861 barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK); 862 backendFuncs.pfnBackend = gBackendPixelRateTable[rastState.sampleCount][rastState.bIsCenterPattern][psState.inputCoverage] 863 [centroid][forcedSampleCount][canEarlyZ] 864 ; 865 } 866 else 867 { 868 // always need to generate I & J per pixel for Z interpolation 869 barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_PIXEL_MASK); 870 backendFuncs.pfnBackend = gBackendSingleSample[psState.inputCoverage][centroid][canEarlyZ]; 871 } 872 break; 873 case SWR_SHADING_RATE_SAMPLE: 874 SWR_ASSERT(rastState.bIsCenterPattern != true); 875 // always need to generate I & J per sample for Z interpolation 876 barycentricsMask = (SWR_BARYCENTRICS_MASK)(barycentricsMask | SWR_BARYCENTRIC_PER_SAMPLE_MASK); 877 backendFuncs.pfnBackend = gBackendSampleRateTable[rastState.sampleCount][psState.inputCoverage][centroid][canEarlyZ]; 878 break; 879 default: 880 SWR_ASSERT(0 && "Invalid shading rate"); 881 break; 882 } 883 } 884 885 SWR_ASSERT(backendFuncs.pfnBackend); 886 887 PFN_PROCESS_PRIMS pfnBinner; 888 #if USE_SIMD16_FRONTEND 889 PFN_PROCESS_PRIMS_SIMD16 pfnBinner_simd16; 890 #endif 891 switch (pState->state.topology) 892 { 893 case TOP_POINT_LIST: 894 pState->pfnProcessPrims = ClipPoints; 895 pfnBinner = BinPoints; 896 #if USE_SIMD16_FRONTEND 897 pState->pfnProcessPrims_simd16 = ClipPoints_simd16; 898 pfnBinner_simd16 = BinPoints_simd16; 899 #endif 900 break; 901 case TOP_LINE_LIST: 902 case TOP_LINE_STRIP: 903 case TOP_LINE_LOOP: 904 case TOP_LINE_LIST_ADJ: 905 case TOP_LISTSTRIP_ADJ: 906 pState->pfnProcessPrims = ClipLines; 907 pfnBinner = BinLines; 908 #if USE_SIMD16_FRONTEND 909 pState->pfnProcessPrims_simd16 = ClipLines_simd16; 910 pfnBinner_simd16 = BinLines_simd16; 911 #endif 912 break; 913 default: 914 pState->pfnProcessPrims = ClipTriangles; 915 pfnBinner = GetBinTrianglesFunc((rastState.conservativeRast > 0)); 916 #if USE_SIMD16_FRONTEND 917 pState->pfnProcessPrims_simd16 = ClipTriangles_simd16; 918 pfnBinner_simd16 = GetBinTrianglesFunc_simd16((rastState.conservativeRast > 0)); 919 #endif 920 break; 921 }; 922 923 924 // disable clipper if viewport transform is disabled 925 if (pState->state.frontendState.vpTransformDisable) 926 { 927 pState->pfnProcessPrims = pfnBinner; 928 #if USE_SIMD16_FRONTEND 929 pState->pfnProcessPrims_simd16 = pfnBinner_simd16; 930 #endif 931 } 932 933 if ((pState->state.psState.pfnPixelShader == nullptr) && 934 (pState->state.depthStencilState.depthTestEnable == FALSE) && 935 (pState->state.depthStencilState.depthWriteEnable == FALSE) && 936 (pState->state.depthStencilState.stencilTestEnable == FALSE) && 937 (pState->state.depthStencilState.stencilWriteEnable == FALSE) && 938 (pState->state.backendState.numAttributes == 0)) 939 { 940 pState->pfnProcessPrims = nullptr; 941 #if USE_SIMD16_FRONTEND 942 pState->pfnProcessPrims_simd16 = nullptr; 943 #endif 944 } 945 946 if (pState->state.soState.rasterizerDisable == true) 947 { 948 pState->pfnProcessPrims = nullptr; 949 #if USE_SIMD16_FRONTEND 950 pState->pfnProcessPrims_simd16 = nullptr; 951 #endif 952 } 953 954 955 // set up the frontend attribute count 956 pState->state.feNumAttributes = 0; 957 const SWR_BACKEND_STATE& backendState = pState->state.backendState; 958 if (backendState.swizzleEnable) 959 { 960 // attribute swizzling is enabled, iterate over the map and record the max attribute used 961 for (uint32_t i = 0; i < backendState.numAttributes; ++i) 962 { 963 pState->state.feNumAttributes = std::max(pState->state.feNumAttributes, (uint32_t)backendState.swizzleMap[i].sourceAttrib + 1); 964 } 965 } 966 else 967 { 968 pState->state.feNumAttributes = pState->state.backendState.numAttributes; 969 } 970 971 if (pState->state.soState.soEnable) 972 { 973 uint32_t streamMasks = 0; 974 for (uint32_t i = 0; i < 4; ++i) 975 { 976 streamMasks |= pState->state.soState.streamMasks[i]; 977 } 978 979 DWORD maxAttrib; 980 if (_BitScanReverse(&maxAttrib, streamMasks)) 981 { 982 pState->state.feNumAttributes = std::max(pState->state.feNumAttributes, (uint32_t)(maxAttrib + 1)); 983 } 984 } 985 986 // complicated logic to test for cases where we don't need backing hottile memory for a draw 987 // have to check for the special case where depth/stencil test is enabled but depthwrite is disabled. 988 pState->state.depthHottileEnable = ((!(pState->state.depthStencilState.depthTestEnable && 989 !pState->state.depthStencilState.depthWriteEnable && 990 !pState->state.depthBoundsState.depthBoundsTestEnable && 991 pState->state.depthStencilState.depthTestFunc == ZFUNC_ALWAYS)) && 992 (pState->state.depthStencilState.depthTestEnable || 993 pState->state.depthStencilState.depthWriteEnable || 994 pState->state.depthBoundsState.depthBoundsTestEnable)) ? true : false; 995 996 pState->state.stencilHottileEnable = (((!(pState->state.depthStencilState.stencilTestEnable && 997 !pState->state.depthStencilState.stencilWriteEnable && 998 pState->state.depthStencilState.stencilTestFunc == ZFUNC_ALWAYS)) || 999 // for stencil we have to check the double sided state as well 1000 (!(pState->state.depthStencilState.doubleSidedStencilTestEnable && 1001 !pState->state.depthStencilState.stencilWriteEnable && 1002 pState->state.depthStencilState.backfaceStencilTestFunc == ZFUNC_ALWAYS))) && 1003 (pState->state.depthStencilState.stencilTestEnable || 1004 pState->state.depthStencilState.stencilWriteEnable)) ? true : false; 1005 1006 1007 uint32_t hotTileEnable = pState->state.psState.renderTargetMask; 1008 1009 // Disable hottile for surfaces with no writes 1010 if (psState.pfnPixelShader != nullptr) 1011 { 1012 DWORD rt; 1013 uint32_t rtMask = pState->state.psState.renderTargetMask; 1014 while (_BitScanForward(&rt, rtMask)) 1015 { 1016 rtMask &= ~(1 << rt); 1017 1018 if (pState->state.blendState.renderTarget[rt].writeDisableAlpha && 1019 pState->state.blendState.renderTarget[rt].writeDisableRed && 1020 pState->state.blendState.renderTarget[rt].writeDisableGreen && 1021 pState->state.blendState.renderTarget[rt].writeDisableBlue) 1022 { 1023 hotTileEnable &= ~(1 << rt); 1024 } 1025 } 1026 } 1027 1028 pState->state.colorHottileEnable = hotTileEnable; 1029 1030 1031 // Setup depth quantization function 1032 if (pState->state.depthHottileEnable) 1033 { 1034 switch (pState->state.rastState.depthFormat) 1035 { 1036 case R32_FLOAT_X8X24_TYPELESS: pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT_X8X24_TYPELESS > ; break; 1037 case R32_FLOAT: pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT > ; break; 1038 case R24_UNORM_X8_TYPELESS: pState->state.pfnQuantizeDepth = QuantizeDepth < R24_UNORM_X8_TYPELESS > ; break; 1039 case R16_UNORM: pState->state.pfnQuantizeDepth = QuantizeDepth < R16_UNORM > ; break; 1040 default: SWR_INVALID("Unsupported depth format for depth quantiztion."); 1041 pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT > ; 1042 } 1043 } 1044 else 1045 { 1046 // set up pass-through quantize if depth isn't enabled 1047 pState->state.pfnQuantizeDepth = QuantizeDepth < R32_FLOAT > ; 1048 } 1049 } 1050 1051 ////////////////////////////////////////////////////////////////////////// 1052 /// @brief InitDraw 1053 /// @param pDC - Draw context to initialize for this draw. 1054 void InitDraw( 1055 DRAW_CONTEXT *pDC, 1056 bool isSplitDraw) 1057 { 1058 // We don't need to re-setup the scissors/pipeline state again for split draw. 1059 if (isSplitDraw == false) 1060 { 1061 SetupMacroTileScissors(pDC); 1062 SetupPipeline(pDC); 1063 } 1064 1065 1066 } 1067 1068 ////////////////////////////////////////////////////////////////////////// 1069 /// @brief We can split the draw for certain topologies for better performance. 1070 /// @param totalVerts - Total vertices for draw 1071 /// @param topology - Topology used for draw 1072 uint32_t MaxVertsPerDraw( 1073 DRAW_CONTEXT* pDC, 1074 uint32_t totalVerts, 1075 PRIMITIVE_TOPOLOGY topology) 1076 { 1077 API_STATE& state = pDC->pState->state; 1078 1079 uint32_t vertsPerDraw = totalVerts; 1080 1081 if (state.soState.soEnable) 1082 { 1083 return totalVerts; 1084 } 1085 1086 switch (topology) 1087 { 1088 case TOP_POINT_LIST: 1089 case TOP_TRIANGLE_LIST: 1090 vertsPerDraw = KNOB_MAX_PRIMS_PER_DRAW; 1091 break; 1092 1093 case TOP_PATCHLIST_1: 1094 case TOP_PATCHLIST_2: 1095 case TOP_PATCHLIST_3: 1096 case TOP_PATCHLIST_4: 1097 case TOP_PATCHLIST_5: 1098 case TOP_PATCHLIST_6: 1099 case TOP_PATCHLIST_7: 1100 case TOP_PATCHLIST_8: 1101 case TOP_PATCHLIST_9: 1102 case TOP_PATCHLIST_10: 1103 case TOP_PATCHLIST_11: 1104 case TOP_PATCHLIST_12: 1105 case TOP_PATCHLIST_13: 1106 case TOP_PATCHLIST_14: 1107 case TOP_PATCHLIST_15: 1108 case TOP_PATCHLIST_16: 1109 case TOP_PATCHLIST_17: 1110 case TOP_PATCHLIST_18: 1111 case TOP_PATCHLIST_19: 1112 case TOP_PATCHLIST_20: 1113 case TOP_PATCHLIST_21: 1114 case TOP_PATCHLIST_22: 1115 case TOP_PATCHLIST_23: 1116 case TOP_PATCHLIST_24: 1117 case TOP_PATCHLIST_25: 1118 case TOP_PATCHLIST_26: 1119 case TOP_PATCHLIST_27: 1120 case TOP_PATCHLIST_28: 1121 case TOP_PATCHLIST_29: 1122 case TOP_PATCHLIST_30: 1123 case TOP_PATCHLIST_31: 1124 case TOP_PATCHLIST_32: 1125 if (pDC->pState->state.tsState.tsEnable) 1126 { 1127 uint32_t vertsPerPrim = topology - TOP_PATCHLIST_BASE; 1128 vertsPerDraw = vertsPerPrim * KNOB_MAX_TESS_PRIMS_PER_DRAW; 1129 } 1130 break; 1131 1132 // The Primitive Assembly code can only handle 1 RECT at a time. 1133 case TOP_RECT_LIST: 1134 vertsPerDraw = 3; 1135 break; 1136 1137 default: 1138 // We are not splitting up draws for other topologies. 1139 break; 1140 } 1141 1142 return vertsPerDraw; 1143 } 1144 1145 1146 ////////////////////////////////////////////////////////////////////////// 1147 /// @brief DrawInstanced 1148 /// @param hContext - Handle passed back from SwrCreateContext 1149 /// @param topology - Specifies topology for draw. 1150 /// @param numVerts - How many vertices to read sequentially from vertex data (per instance). 1151 /// @param startVertex - Specifies start vertex for draw. (vertex data) 1152 /// @param numInstances - How many instances to render. 1153 /// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data) 1154 void DrawInstanced( 1155 HANDLE hContext, 1156 PRIMITIVE_TOPOLOGY topology, 1157 uint32_t numVertices, 1158 uint32_t startVertex, 1159 uint32_t numInstances = 1, 1160 uint32_t startInstance = 0) 1161 { 1162 if (KNOB_TOSS_DRAW) 1163 { 1164 return; 1165 } 1166 1167 SWR_CONTEXT *pContext = GetContext(hContext); 1168 DRAW_CONTEXT* pDC = GetDrawContext(pContext); 1169 1170 AR_API_BEGIN(APIDraw, pDC->drawId); 1171 AR_API_EVENT(DrawInstancedEvent(pDC->drawId, topology, numVertices, startVertex, numInstances, startInstance)); 1172 1173 uint32_t maxVertsPerDraw = MaxVertsPerDraw(pDC, numVertices, topology); 1174 uint32_t primsPerDraw = GetNumPrims(topology, maxVertsPerDraw); 1175 uint32_t remainingVerts = numVertices; 1176 1177 API_STATE *pState = &pDC->pState->state; 1178 pState->topology = topology; 1179 pState->forceFront = false; 1180 1181 // disable culling for points/lines 1182 uint32_t oldCullMode = pState->rastState.cullMode; 1183 if (topology == TOP_POINT_LIST) 1184 { 1185 pState->rastState.cullMode = SWR_CULLMODE_NONE; 1186 pState->forceFront = true; 1187 } 1188 else if (topology == TOP_RECT_LIST) 1189 { 1190 pState->rastState.cullMode = SWR_CULLMODE_NONE; 1191 } 1192 1193 int draw = 0; 1194 while (remainingVerts) 1195 { 1196 uint32_t numVertsForDraw = (remainingVerts < maxVertsPerDraw) ? 1197 remainingVerts : maxVertsPerDraw; 1198 1199 bool isSplitDraw = (draw > 0) ? true : false; 1200 DRAW_CONTEXT* pDC = GetDrawContext(pContext, isSplitDraw); 1201 InitDraw(pDC, isSplitDraw); 1202 1203 pDC->FeWork.type = DRAW; 1204 pDC->FeWork.pfnWork = GetProcessDrawFunc( 1205 false, // IsIndexed 1206 false, // bEnableCutIndex 1207 pState->tsState.tsEnable, 1208 pState->gsState.gsEnable, 1209 pState->soState.soEnable, 1210 pDC->pState->pfnProcessPrims != nullptr); 1211 pDC->FeWork.desc.draw.numVerts = numVertsForDraw; 1212 pDC->FeWork.desc.draw.startVertex = startVertex; 1213 pDC->FeWork.desc.draw.numInstances = numInstances; 1214 pDC->FeWork.desc.draw.startInstance = startInstance; 1215 pDC->FeWork.desc.draw.startPrimID = draw * primsPerDraw; 1216 pDC->FeWork.desc.draw.startVertexID = draw * maxVertsPerDraw; 1217 1218 pDC->cleanupState = (remainingVerts == numVertsForDraw); 1219 1220 //enqueue DC 1221 QueueDraw(pContext); 1222 1223 AR_API_EVENT(DrawInstancedSplitEvent(pDC->drawId)); 1224 1225 remainingVerts -= numVertsForDraw; 1226 draw++; 1227 } 1228 1229 // restore culling state 1230 pDC = GetDrawContext(pContext); 1231 pDC->pState->state.rastState.cullMode = oldCullMode; 1232 1233 AR_API_END(APIDraw, numVertices * numInstances); 1234 } 1235 1236 ////////////////////////////////////////////////////////////////////////// 1237 /// @brief SwrDraw 1238 /// @param hContext - Handle passed back from SwrCreateContext 1239 /// @param topology - Specifies topology for draw. 1240 /// @param startVertex - Specifies start vertex in vertex buffer for draw. 1241 /// @param primCount - Number of vertices. 1242 void SwrDraw( 1243 HANDLE hContext, 1244 PRIMITIVE_TOPOLOGY topology, 1245 uint32_t startVertex, 1246 uint32_t numVertices) 1247 { 1248 DrawInstanced(hContext, topology, numVertices, startVertex); 1249 } 1250 1251 ////////////////////////////////////////////////////////////////////////// 1252 /// @brief SwrDrawInstanced 1253 /// @param hContext - Handle passed back from SwrCreateContext 1254 /// @param topology - Specifies topology for draw. 1255 /// @param numVertsPerInstance - How many vertices to read sequentially from vertex data. 1256 /// @param numInstances - How many instances to render. 1257 /// @param startVertex - Specifies start vertex for draw. (vertex data) 1258 /// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data) 1259 void SwrDrawInstanced( 1260 HANDLE hContext, 1261 PRIMITIVE_TOPOLOGY topology, 1262 uint32_t numVertsPerInstance, 1263 uint32_t numInstances, 1264 uint32_t startVertex, 1265 uint32_t startInstance 1266 ) 1267 { 1268 DrawInstanced(hContext, topology, numVertsPerInstance, startVertex, numInstances, startInstance); 1269 } 1270 1271 ////////////////////////////////////////////////////////////////////////// 1272 /// @brief DrawIndexedInstanced 1273 /// @param hContext - Handle passed back from SwrCreateContext 1274 /// @param topology - Specifies topology for draw. 1275 /// @param numIndices - Number of indices to read sequentially from index buffer. 1276 /// @param indexOffset - Starting index into index buffer. 1277 /// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed. 1278 /// @param numInstances - Number of instances to render. 1279 /// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data) 1280 void DrawIndexedInstance( 1281 HANDLE hContext, 1282 PRIMITIVE_TOPOLOGY topology, 1283 uint32_t numIndices, 1284 uint32_t indexOffset, 1285 int32_t baseVertex, 1286 uint32_t numInstances = 1, 1287 uint32_t startInstance = 0) 1288 { 1289 if (KNOB_TOSS_DRAW) 1290 { 1291 return; 1292 } 1293 1294 SWR_CONTEXT *pContext = GetContext(hContext); 1295 DRAW_CONTEXT* pDC = GetDrawContext(pContext); 1296 API_STATE* pState = &pDC->pState->state; 1297 1298 AR_API_BEGIN(APIDrawIndexed, pDC->drawId); 1299 AR_API_EVENT(DrawIndexedInstancedEvent(pDC->drawId, topology, numIndices, indexOffset, baseVertex, numInstances, startInstance)); 1300 1301 uint32_t maxIndicesPerDraw = MaxVertsPerDraw(pDC, numIndices, topology); 1302 uint32_t primsPerDraw = GetNumPrims(topology, maxIndicesPerDraw); 1303 uint32_t remainingIndices = numIndices; 1304 1305 uint32_t indexSize = 0; 1306 switch (pState->indexBuffer.format) 1307 { 1308 case R32_UINT: indexSize = sizeof(uint32_t); break; 1309 case R16_UINT: indexSize = sizeof(uint16_t); break; 1310 case R8_UINT: indexSize = sizeof(uint8_t); break; 1311 default: 1312 SWR_INVALID("Invalid index buffer format: %d", pState->indexBuffer.format); 1313 } 1314 1315 int draw = 0; 1316 uint8_t *pIB = (uint8_t*)pState->indexBuffer.pIndices; 1317 pIB += (uint64_t)indexOffset * (uint64_t)indexSize; 1318 1319 pState->topology = topology; 1320 pState->forceFront = false; 1321 1322 // disable culling for points/lines 1323 uint32_t oldCullMode = pState->rastState.cullMode; 1324 if (topology == TOP_POINT_LIST) 1325 { 1326 pState->rastState.cullMode = SWR_CULLMODE_NONE; 1327 pState->forceFront = true; 1328 } 1329 else if (topology == TOP_RECT_LIST) 1330 { 1331 pState->rastState.cullMode = SWR_CULLMODE_NONE; 1332 } 1333 1334 while (remainingIndices) 1335 { 1336 uint32_t numIndicesForDraw = (remainingIndices < maxIndicesPerDraw) ? 1337 remainingIndices : maxIndicesPerDraw; 1338 1339 // When breaking up draw, we need to obtain new draw context for each iteration. 1340 bool isSplitDraw = (draw > 0) ? true : false; 1341 1342 pDC = GetDrawContext(pContext, isSplitDraw); 1343 InitDraw(pDC, isSplitDraw); 1344 1345 pDC->FeWork.type = DRAW; 1346 pDC->FeWork.pfnWork = GetProcessDrawFunc( 1347 true, // IsIndexed 1348 pState->frontendState.bEnableCutIndex, 1349 pState->tsState.tsEnable, 1350 pState->gsState.gsEnable, 1351 pState->soState.soEnable, 1352 pDC->pState->pfnProcessPrims != nullptr); 1353 pDC->FeWork.desc.draw.pDC = pDC; 1354 pDC->FeWork.desc.draw.numIndices = numIndicesForDraw; 1355 pDC->FeWork.desc.draw.pIB = (int*)pIB; 1356 pDC->FeWork.desc.draw.type = pDC->pState->state.indexBuffer.format; 1357 1358 pDC->FeWork.desc.draw.numInstances = numInstances; 1359 pDC->FeWork.desc.draw.startInstance = startInstance; 1360 pDC->FeWork.desc.draw.baseVertex = baseVertex; 1361 pDC->FeWork.desc.draw.startPrimID = draw * primsPerDraw; 1362 1363 pDC->cleanupState = (remainingIndices == numIndicesForDraw); 1364 1365 //enqueue DC 1366 QueueDraw(pContext); 1367 1368 AR_API_EVENT(DrawIndexedInstancedSplitEvent(pDC->drawId)); 1369 1370 pIB += maxIndicesPerDraw * indexSize; 1371 remainingIndices -= numIndicesForDraw; 1372 draw++; 1373 } 1374 1375 // Restore culling state 1376 pDC = GetDrawContext(pContext); 1377 pDC->pState->state.rastState.cullMode = oldCullMode; 1378 1379 AR_API_END(APIDrawIndexed, numIndices * numInstances); 1380 } 1381 1382 1383 ////////////////////////////////////////////////////////////////////////// 1384 /// @brief DrawIndexed 1385 /// @param hContext - Handle passed back from SwrCreateContext 1386 /// @param topology - Specifies topology for draw. 1387 /// @param numIndices - Number of indices to read sequentially from index buffer. 1388 /// @param indexOffset - Starting index into index buffer. 1389 /// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed. 1390 void SwrDrawIndexed( 1391 HANDLE hContext, 1392 PRIMITIVE_TOPOLOGY topology, 1393 uint32_t numIndices, 1394 uint32_t indexOffset, 1395 int32_t baseVertex 1396 ) 1397 { 1398 DrawIndexedInstance(hContext, topology, numIndices, indexOffset, baseVertex); 1399 } 1400 1401 ////////////////////////////////////////////////////////////////////////// 1402 /// @brief SwrDrawIndexedInstanced 1403 /// @param hContext - Handle passed back from SwrCreateContext 1404 /// @param topology - Specifies topology for draw. 1405 /// @param numIndices - Number of indices to read sequentially from index buffer. 1406 /// @param numInstances - Number of instances to render. 1407 /// @param indexOffset - Starting index into index buffer. 1408 /// @param baseVertex - Vertex in vertex buffer to consider as index "0". Note value is signed. 1409 /// @param startInstance - Which instance to start sequentially fetching from in each buffer (instanced data) 1410 void SwrDrawIndexedInstanced( 1411 HANDLE hContext, 1412 PRIMITIVE_TOPOLOGY topology, 1413 uint32_t numIndices, 1414 uint32_t numInstances, 1415 uint32_t indexOffset, 1416 int32_t baseVertex, 1417 uint32_t startInstance) 1418 { 1419 DrawIndexedInstance(hContext, topology, numIndices, indexOffset, baseVertex, numInstances, startInstance); 1420 } 1421 1422 ////////////////////////////////////////////////////////////////////////// 1423 /// @brief SwrInvalidateTiles 1424 /// @param hContext - Handle passed back from SwrCreateContext 1425 /// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to invalidate. 1426 /// @param invalidateRect - The pixel-coordinate rectangle to invalidate. This will be expanded to 1427 /// be hottile size-aligned. 1428 void SWR_API SwrInvalidateTiles( 1429 HANDLE hContext, 1430 uint32_t attachmentMask, 1431 const SWR_RECT& invalidateRect) 1432 { 1433 if (KNOB_TOSS_DRAW) 1434 { 1435 return; 1436 } 1437 1438 SWR_CONTEXT *pContext = GetContext(hContext); 1439 DRAW_CONTEXT* pDC = GetDrawContext(pContext); 1440 1441 pDC->FeWork.type = DISCARDINVALIDATETILES; 1442 pDC->FeWork.pfnWork = ProcessDiscardInvalidateTiles; 1443 pDC->FeWork.desc.discardInvalidateTiles.attachmentMask = attachmentMask; 1444 pDC->FeWork.desc.discardInvalidateTiles.rect = invalidateRect; 1445 pDC->FeWork.desc.discardInvalidateTiles.rect &= g_MaxScissorRect; 1446 pDC->FeWork.desc.discardInvalidateTiles.newTileState = SWR_TILE_INVALID; 1447 pDC->FeWork.desc.discardInvalidateTiles.createNewTiles = false; 1448 pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly = false; 1449 1450 //enqueue 1451 QueueDraw(pContext); 1452 1453 AR_API_EVENT(SwrInvalidateTilesEvent(pDC->drawId)); 1454 } 1455 1456 ////////////////////////////////////////////////////////////////////////// 1457 /// @brief SwrDiscardRect 1458 /// @param hContext - Handle passed back from SwrCreateContext 1459 /// @param attachmentMask - The mask specifies which surfaces attached to the hottiles to discard. 1460 /// @param rect - The pixel-coordinate rectangle to discard. Only fully-covered hottiles will be 1461 /// discarded. 1462 void SWR_API SwrDiscardRect( 1463 HANDLE hContext, 1464 uint32_t attachmentMask, 1465 const SWR_RECT& rect) 1466 { 1467 if (KNOB_TOSS_DRAW) 1468 { 1469 return; 1470 } 1471 1472 SWR_CONTEXT *pContext = GetContext(hContext); 1473 DRAW_CONTEXT* pDC = GetDrawContext(pContext); 1474 1475 // Queue a load to the hottile 1476 pDC->FeWork.type = DISCARDINVALIDATETILES; 1477 pDC->FeWork.pfnWork = ProcessDiscardInvalidateTiles; 1478 pDC->FeWork.desc.discardInvalidateTiles.attachmentMask = attachmentMask; 1479 pDC->FeWork.desc.discardInvalidateTiles.rect = rect; 1480 pDC->FeWork.desc.discardInvalidateTiles.rect &= g_MaxScissorRect; 1481 pDC->FeWork.desc.discardInvalidateTiles.newTileState = SWR_TILE_RESOLVED; 1482 pDC->FeWork.desc.discardInvalidateTiles.createNewTiles = true; 1483 pDC->FeWork.desc.discardInvalidateTiles.fullTilesOnly = true; 1484 1485 //enqueue 1486 QueueDraw(pContext); 1487 1488 AR_API_EVENT(SwrDiscardRectEvent(pDC->drawId)); 1489 } 1490 1491 ////////////////////////////////////////////////////////////////////////// 1492 /// @brief SwrDispatch 1493 /// @param hContext - Handle passed back from SwrCreateContext 1494 /// @param threadGroupCountX - Number of thread groups dispatched in X direction 1495 /// @param threadGroupCountY - Number of thread groups dispatched in Y direction 1496 /// @param threadGroupCountZ - Number of thread groups dispatched in Z direction 1497 void SwrDispatch( 1498 HANDLE hContext, 1499 uint32_t threadGroupCountX, 1500 uint32_t threadGroupCountY, 1501 uint32_t threadGroupCountZ) 1502 { 1503 if (KNOB_TOSS_DRAW) 1504 { 1505 return; 1506 } 1507 1508 SWR_CONTEXT *pContext = GetContext(hContext); 1509 DRAW_CONTEXT* pDC = GetDrawContext(pContext); 1510 1511 AR_API_BEGIN(APIDispatch, pDC->drawId); 1512 AR_API_EVENT(DispatchEvent(pDC->drawId, threadGroupCountX, threadGroupCountY, threadGroupCountZ)); 1513 pDC->isCompute = true; // This is a compute context. 1514 1515 COMPUTE_DESC* pTaskData = (COMPUTE_DESC*)pDC->pArena->AllocAligned(sizeof(COMPUTE_DESC), 64); 1516 1517 pTaskData->threadGroupCountX = threadGroupCountX; 1518 pTaskData->threadGroupCountY = threadGroupCountY; 1519 pTaskData->threadGroupCountZ = threadGroupCountZ; 1520 1521 uint32_t totalThreadGroups = threadGroupCountX * threadGroupCountY * threadGroupCountZ; 1522 uint32_t dcIndex = pDC->drawId % pContext->MAX_DRAWS_IN_FLIGHT; 1523 pDC->pDispatch = &pContext->pDispatchQueueArray[dcIndex]; 1524 pDC->pDispatch->initialize(totalThreadGroups, pTaskData, &ProcessComputeBE); 1525 1526 QueueDispatch(pContext); 1527 AR_API_END(APIDispatch, threadGroupCountX * threadGroupCountY * threadGroupCountZ); 1528 } 1529 1530 // Deswizzles, converts and stores current contents of the hot tiles to surface 1531 // described by pState 1532 void SWR_API SwrStoreTiles( 1533 HANDLE hContext, 1534 uint32_t attachmentMask, 1535 SWR_TILE_STATE postStoreTileState, 1536 const SWR_RECT& storeRect) 1537 { 1538 if (KNOB_TOSS_DRAW) 1539 { 1540 return; 1541 } 1542 1543 SWR_CONTEXT *pContext = GetContext(hContext); 1544 DRAW_CONTEXT* pDC = GetDrawContext(pContext); 1545 1546 AR_API_BEGIN(APIStoreTiles, pDC->drawId); 1547 1548 pDC->FeWork.type = STORETILES; 1549 pDC->FeWork.pfnWork = ProcessStoreTiles; 1550 pDC->FeWork.desc.storeTiles.attachmentMask = attachmentMask; 1551 pDC->FeWork.desc.storeTiles.postStoreTileState = postStoreTileState; 1552 pDC->FeWork.desc.storeTiles.rect = storeRect; 1553 pDC->FeWork.desc.storeTiles.rect &= g_MaxScissorRect; 1554 1555 //enqueue 1556 QueueDraw(pContext); 1557 1558 AR_API_EVENT(SwrStoreTilesEvent(pDC->drawId)); 1559 1560 AR_API_END(APIStoreTiles, 1); 1561 } 1562 1563 ////////////////////////////////////////////////////////////////////////// 1564 /// @brief SwrClearRenderTarget - Clear attached render targets / depth / stencil 1565 /// @param hContext - Handle passed back from SwrCreateContext 1566 /// @param attachmentMask - combination of SWR_ATTACHMENT_*_BIT attachments to clear 1567 /// @param renderTargetArrayIndex - the RT array index to clear 1568 /// @param clearColor - color use for clearing render targets 1569 /// @param z - depth value use for clearing depth buffer 1570 /// @param stencil - stencil value used for clearing stencil buffer 1571 /// @param clearRect - The pixel-coordinate rectangle to clear in all cleared buffers 1572 void SWR_API SwrClearRenderTarget( 1573 HANDLE hContext, 1574 uint32_t attachmentMask, 1575 uint32_t renderTargetArrayIndex, 1576 const float clearColor[4], 1577 float z, 1578 uint8_t stencil, 1579 const SWR_RECT& clearRect) 1580 { 1581 if (KNOB_TOSS_DRAW) 1582 { 1583 return; 1584 } 1585 1586 SWR_CONTEXT *pContext = GetContext(hContext); 1587 DRAW_CONTEXT* pDC = GetDrawContext(pContext); 1588 1589 AR_API_BEGIN(APIClearRenderTarget, pDC->drawId); 1590 1591 pDC->FeWork.type = CLEAR; 1592 pDC->FeWork.pfnWork = ProcessClear; 1593 pDC->FeWork.desc.clear.rect = clearRect; 1594 pDC->FeWork.desc.clear.rect &= g_MaxScissorRect; 1595 pDC->FeWork.desc.clear.attachmentMask = attachmentMask; 1596 pDC->FeWork.desc.clear.renderTargetArrayIndex = renderTargetArrayIndex; 1597 pDC->FeWork.desc.clear.clearDepth = z; 1598 pDC->FeWork.desc.clear.clearRTColor[0] = clearColor[0]; 1599 pDC->FeWork.desc.clear.clearRTColor[1] = clearColor[1]; 1600 pDC->FeWork.desc.clear.clearRTColor[2] = clearColor[2]; 1601 pDC->FeWork.desc.clear.clearRTColor[3] = clearColor[3]; 1602 pDC->FeWork.desc.clear.clearStencil = stencil; 1603 1604 // enqueue draw 1605 QueueDraw(pContext); 1606 1607 AR_API_END(APIClearRenderTarget, 1); 1608 } 1609 1610 ////////////////////////////////////////////////////////////////////////// 1611 /// @brief Returns a pointer to the private context state for the current 1612 /// draw operation. This is used for external componets such as the 1613 /// sampler. 1614 /// SWR is responsible for the allocation of the private context state. 1615 /// @param hContext - Handle passed back from SwrCreateContext 1616 VOID* SwrGetPrivateContextState( 1617 HANDLE hContext) 1618 { 1619 SWR_CONTEXT* pContext = GetContext(hContext); 1620 DRAW_CONTEXT* pDC = GetDrawContext(pContext); 1621 DRAW_STATE* pState = pDC->pState; 1622 1623 if (pState->pPrivateState == nullptr) 1624 { 1625 pState->pPrivateState = pState->pArena->AllocAligned(pContext->privateStateSize, KNOB_SIMD_WIDTH*sizeof(float)); 1626 } 1627 1628 return pState->pPrivateState; 1629 } 1630 1631 ////////////////////////////////////////////////////////////////////////// 1632 /// @brief Clients can use this to allocate memory for draw/dispatch 1633 /// operations. The memory will automatically be freed once operation 1634 /// has completed. Client can use this to allocate binding tables, 1635 /// etc. needed for shader execution. 1636 /// @param hContext - Handle passed back from SwrCreateContext 1637 /// @param size - Size of allocation 1638 /// @param align - Alignment needed for allocation. 1639 VOID* SwrAllocDrawContextMemory( 1640 HANDLE hContext, 1641 uint32_t size, 1642 uint32_t align) 1643 { 1644 SWR_CONTEXT* pContext = GetContext(hContext); 1645 DRAW_CONTEXT* pDC = GetDrawContext(pContext); 1646 1647 return pDC->pState->pArena->AllocAligned(size, align); 1648 } 1649 1650 ////////////////////////////////////////////////////////////////////////// 1651 /// @brief Enables stats counting 1652 /// @param hContext - Handle passed back from SwrCreateContext 1653 /// @param enable - If true then counts are incremented. 1654 void SwrEnableStatsFE( 1655 HANDLE hContext, 1656 bool enable) 1657 { 1658 SWR_CONTEXT *pContext = GetContext(hContext); 1659 DRAW_CONTEXT* pDC = GetDrawContext(pContext); 1660 1661 pDC->pState->state.enableStatsFE = enable; 1662 } 1663 1664 ////////////////////////////////////////////////////////////////////////// 1665 /// @brief Enables stats counting 1666 /// @param hContext - Handle passed back from SwrCreateContext 1667 /// @param enable - If true then counts are incremented. 1668 void SwrEnableStatsBE( 1669 HANDLE hContext, 1670 bool enable) 1671 { 1672 SWR_CONTEXT *pContext = GetContext(hContext); 1673 DRAW_CONTEXT* pDC = GetDrawContext(pContext); 1674 1675 pDC->pState->state.enableStatsBE = enable; 1676 } 1677 1678 ////////////////////////////////////////////////////////////////////////// 1679 /// @brief Mark end of frame - used for performance profiling 1680 /// @param hContext - Handle passed back from SwrCreateContext 1681 void SWR_API SwrEndFrame( 1682 HANDLE hContext) 1683 { 1684 SWR_CONTEXT *pContext = GetContext(hContext); 1685 DRAW_CONTEXT* pDC = GetDrawContext(pContext); 1686 (void)pDC; // var used 1687 1688 RDTSC_ENDFRAME(); 1689 AR_API_EVENT(FrameEndEvent(pContext->frameCount, pDC->drawId)); 1690 1691 pContext->frameCount++; 1692 } 1693 1694 void InitSimLoadTilesTable(); 1695 void InitSimStoreTilesTable(); 1696 void InitSimClearTilesTable(); 1697 1698 void InitClearTilesTable(); 1699 void InitBackendFuncTables(); 1700 1701 ////////////////////////////////////////////////////////////////////////// 1702 /// @brief Initialize swr backend and memory internal tables 1703 void SwrInit() 1704 { 1705 InitSimLoadTilesTable(); 1706 InitSimStoreTilesTable(); 1707 InitSimClearTilesTable(); 1708 1709 InitClearTilesTable(); 1710 InitBackendFuncTables(); 1711 InitRasterizerFunctions(); 1712 } 1713 1714 void SwrGetInterface(SWR_INTERFACE &out_funcs) 1715 { 1716 out_funcs.pfnSwrCreateContext = SwrCreateContext; 1717 out_funcs.pfnSwrDestroyContext = SwrDestroyContext; 1718 out_funcs.pfnSwrBindApiThread = SwrBindApiThread; 1719 out_funcs.pfnSwrSaveState = SwrSaveState; 1720 out_funcs.pfnSwrRestoreState = SwrRestoreState; 1721 out_funcs.pfnSwrSync = SwrSync; 1722 out_funcs.pfnSwrStallBE = SwrStallBE; 1723 out_funcs.pfnSwrWaitForIdle = SwrWaitForIdle; 1724 out_funcs.pfnSwrWaitForIdleFE = SwrWaitForIdleFE; 1725 out_funcs.pfnSwrSetVertexBuffers = SwrSetVertexBuffers; 1726 out_funcs.pfnSwrSetIndexBuffer = SwrSetIndexBuffer; 1727 out_funcs.pfnSwrSetFetchFunc = SwrSetFetchFunc; 1728 out_funcs.pfnSwrSetSoFunc = SwrSetSoFunc; 1729 out_funcs.pfnSwrSetSoState = SwrSetSoState; 1730 out_funcs.pfnSwrSetSoBuffers = SwrSetSoBuffers; 1731 out_funcs.pfnSwrSetVertexFunc = SwrSetVertexFunc; 1732 out_funcs.pfnSwrSetFrontendState = SwrSetFrontendState; 1733 out_funcs.pfnSwrSetGsState = SwrSetGsState; 1734 out_funcs.pfnSwrSetGsFunc = SwrSetGsFunc; 1735 out_funcs.pfnSwrSetCsFunc = SwrSetCsFunc; 1736 out_funcs.pfnSwrSetTsState = SwrSetTsState; 1737 out_funcs.pfnSwrSetHsFunc = SwrSetHsFunc; 1738 out_funcs.pfnSwrSetDsFunc = SwrSetDsFunc; 1739 out_funcs.pfnSwrSetDepthStencilState = SwrSetDepthStencilState; 1740 out_funcs.pfnSwrSetBackendState = SwrSetBackendState; 1741 out_funcs.pfnSwrSetDepthBoundsState = SwrSetDepthBoundsState; 1742 out_funcs.pfnSwrSetPixelShaderState = SwrSetPixelShaderState; 1743 out_funcs.pfnSwrSetBlendState = SwrSetBlendState; 1744 out_funcs.pfnSwrSetBlendFunc = SwrSetBlendFunc; 1745 out_funcs.pfnSwrDraw = SwrDraw; 1746 out_funcs.pfnSwrDrawInstanced = SwrDrawInstanced; 1747 out_funcs.pfnSwrDrawIndexed = SwrDrawIndexed; 1748 out_funcs.pfnSwrDrawIndexedInstanced = SwrDrawIndexedInstanced; 1749 out_funcs.pfnSwrInvalidateTiles = SwrInvalidateTiles; 1750 out_funcs.pfnSwrDiscardRect = SwrDiscardRect; 1751 out_funcs.pfnSwrDispatch = SwrDispatch; 1752 out_funcs.pfnSwrStoreTiles = SwrStoreTiles; 1753 out_funcs.pfnSwrClearRenderTarget = SwrClearRenderTarget; 1754 out_funcs.pfnSwrSetRastState = SwrSetRastState; 1755 out_funcs.pfnSwrSetViewports = SwrSetViewports; 1756 out_funcs.pfnSwrSetScissorRects = SwrSetScissorRects; 1757 out_funcs.pfnSwrGetPrivateContextState = SwrGetPrivateContextState; 1758 out_funcs.pfnSwrAllocDrawContextMemory = SwrAllocDrawContextMemory; 1759 out_funcs.pfnSwrEnableStatsFE = SwrEnableStatsFE; 1760 out_funcs.pfnSwrEnableStatsBE = SwrEnableStatsBE; 1761 out_funcs.pfnSwrEndFrame = SwrEndFrame; 1762 out_funcs.pfnSwrInit = SwrInit; 1763 out_funcs.pfnSwrLoadHotTile = SwrLoadHotTile; 1764 out_funcs.pfnSwrStoreHotTileToSurface = SwrStoreHotTileToSurface; 1765 out_funcs.pfnSwrStoreHotTileClear = SwrStoreHotTileClear; 1766 } 1767