1 /**************************************************************************** 2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * @file rasterizer.cpp 24 * 25 * @brief Implementation for the rasterizer. 26 * 27 ******************************************************************************/ 28 29 #include <vector> 30 #include <algorithm> 31 32 #include "rasterizer.h" 33 #include "backends/gen_rasterizer.hpp" 34 #include "rdtsc_core.h" 35 #include "backend.h" 36 #include "utils.h" 37 #include "frontend.h" 38 #include "tilemgr.h" 39 #include "memory/tilingtraits.h" 40 #include "rasterizer_impl.h" 41 42 PFN_WORK_FUNC gRasterizerFuncs[SWR_MULTISAMPLE_TYPE_COUNT][2][2][SWR_INPUT_COVERAGE_COUNT][STATE_VALID_TRI_EDGE_COUNT][2]; 43 44 void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData) 45 { 46 SWR_CONTEXT *pContext = pDC->pContext; 47 const TRIANGLE_WORK_DESC &workDesc = *((TRIANGLE_WORK_DESC*)pData); 48 #if KNOB_ENABLE_TOSS_POINTS 49 if (KNOB_TOSS_BIN_TRIS) 50 { 51 return; 52 } 53 #endif 54 55 // bloat line to two tris and call the triangle rasterizer twice 56 AR_BEGIN(BERasterizeLine, pDC->drawId); 57 58 const API_STATE &state = GetApiState(pDC); 59 const SWR_RASTSTATE &rastState = state.rastState; 60 61 // macrotile dimensioning 62 uint32_t macroX, macroY; 63 MacroTileMgr::getTileIndices(macroTile, macroX, macroY); 64 int32_t macroBoxLeft = macroX * KNOB_MACROTILE_X_DIM_FIXED; 65 int32_t macroBoxRight = macroBoxLeft + KNOB_MACROTILE_X_DIM_FIXED - 1; 66 int32_t macroBoxTop = macroY * KNOB_MACROTILE_Y_DIM_FIXED; 67 int32_t macroBoxBottom = macroBoxTop + KNOB_MACROTILE_Y_DIM_FIXED - 1; 68 69 const SWR_RECT &scissorInFixedPoint = state.scissorsInFixedPoint[workDesc.triFlags.viewportIndex]; 70 71 // create a copy of the triangle buffer to write our adjusted vertices to 72 OSALIGNSIMD(float) newTriBuffer[4 * 4]; 73 TRIANGLE_WORK_DESC newWorkDesc = workDesc; 74 newWorkDesc.pTriBuffer = &newTriBuffer[0]; 75 76 // create a copy of the attrib buffer to write our adjusted attribs to 77 OSALIGNSIMD(float) newAttribBuffer[4 * 3 * SWR_VTX_NUM_SLOTS]; 78 newWorkDesc.pAttribs = &newAttribBuffer[0]; 79 80 const __m128 vBloat0 = _mm_set_ps(0.5f, -0.5f, -0.5f, 0.5f); 81 const __m128 vBloat1 = _mm_set_ps(0.5f, 0.5f, 0.5f, -0.5f); 82 83 __m128 vX, vY, vZ, vRecipW; 84 85 vX = _mm_load_ps(workDesc.pTriBuffer); 86 vY = _mm_load_ps(workDesc.pTriBuffer + 4); 87 vZ = _mm_load_ps(workDesc.pTriBuffer + 8); 88 vRecipW = _mm_load_ps(workDesc.pTriBuffer + 12); 89 90 // triangle 0 91 // v0,v1 -> v0,v0,v1 92 __m128 vXa = _mm_shuffle_ps(vX, vX, _MM_SHUFFLE(1, 1, 0, 0)); 93 __m128 vYa = _mm_shuffle_ps(vY, vY, _MM_SHUFFLE(1, 1, 0, 0)); 94 __m128 vZa = _mm_shuffle_ps(vZ, vZ, _MM_SHUFFLE(1, 1, 0, 0)); 95 __m128 vRecipWa = _mm_shuffle_ps(vRecipW, vRecipW, _MM_SHUFFLE(1, 1, 0, 0)); 96 97 __m128 vLineWidth = _mm_set1_ps(pDC->pState->state.rastState.lineWidth); 98 __m128 vAdjust = _mm_mul_ps(vLineWidth, vBloat0); 99 if (workDesc.triFlags.yMajor) 100 { 101 vXa = _mm_add_ps(vAdjust, vXa); 102 } 103 else 104 { 105 vYa = _mm_add_ps(vAdjust, vYa); 106 } 107 108 // Store triangle description for rasterizer 109 _mm_store_ps((float*)&newTriBuffer[0], vXa); 110 _mm_store_ps((float*)&newTriBuffer[4], vYa); 111 _mm_store_ps((float*)&newTriBuffer[8], vZa); 112 _mm_store_ps((float*)&newTriBuffer[12], vRecipWa); 113 114 // binner bins 3 edges for lines as v0, v1, v1 115 // tri0 needs v0, v0, v1 116 for (uint32_t a = 0; a < workDesc.numAttribs; ++a) 117 { 118 __m128 vAttrib0 = _mm_load_ps(&workDesc.pAttribs[a * 12 + 0]); 119 __m128 vAttrib1 = _mm_load_ps(&workDesc.pAttribs[a * 12 + 4]); 120 121 _mm_store_ps((float*)&newAttribBuffer[a * 12 + 0], vAttrib0); 122 _mm_store_ps((float*)&newAttribBuffer[a * 12 + 4], vAttrib0); 123 _mm_store_ps((float*)&newAttribBuffer[a * 12 + 8], vAttrib1); 124 } 125 126 // Store user clip distances for triangle 0 127 float newClipBuffer[3 * 8]; 128 uint32_t numClipDist = _mm_popcnt_u32(state.backendState.clipDistanceMask); 129 if (numClipDist) 130 { 131 newWorkDesc.pUserClipBuffer = newClipBuffer; 132 133 float* pOldBuffer = workDesc.pUserClipBuffer; 134 float* pNewBuffer = newClipBuffer; 135 for (uint32_t i = 0; i < numClipDist; ++i) 136 { 137 // read barycentric coeffs from binner 138 float a = *(pOldBuffer++); 139 float b = *(pOldBuffer++); 140 141 // reconstruct original clip distance at vertices 142 float c0 = a + b; 143 float c1 = b; 144 145 // construct triangle barycentrics 146 *(pNewBuffer++) = c0 - c1; 147 *(pNewBuffer++) = c0 - c1; 148 *(pNewBuffer++) = c1; 149 } 150 } 151 152 // setup triangle rasterizer function 153 PFN_WORK_FUNC pfnTriRast; 154 // conservative rast not supported for points/lines 155 pfnTriRast = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, false, 156 SWR_INPUT_COVERAGE_NONE, EdgeValToEdgeState(ALL_EDGES_VALID), (pDC->pState->state.scissorsTileAligned == false)); 157 158 // make sure this macrotile intersects the triangle 159 __m128i vXai = fpToFixedPoint(vXa); 160 __m128i vYai = fpToFixedPoint(vYa); 161 OSALIGNSIMD(SWR_RECT) bboxA; 162 calcBoundingBoxInt(vXai, vYai, bboxA); 163 164 if (!(bboxA.xmin > macroBoxRight || 165 bboxA.xmin > scissorInFixedPoint.xmax || 166 bboxA.xmax - 1 < macroBoxLeft || 167 bboxA.xmax - 1 < scissorInFixedPoint.xmin || 168 bboxA.ymin > macroBoxBottom || 169 bboxA.ymin > scissorInFixedPoint.ymax || 170 bboxA.ymax - 1 < macroBoxTop || 171 bboxA.ymax - 1 < scissorInFixedPoint.ymin)) { 172 // rasterize triangle 173 pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc); 174 } 175 176 // triangle 1 177 // v0,v1 -> v1,v1,v0 178 vXa = _mm_shuffle_ps(vX, vX, _MM_SHUFFLE(1, 0, 1, 1)); 179 vYa = _mm_shuffle_ps(vY, vY, _MM_SHUFFLE(1, 0, 1, 1)); 180 vZa = _mm_shuffle_ps(vZ, vZ, _MM_SHUFFLE(1, 0, 1, 1)); 181 vRecipWa = _mm_shuffle_ps(vRecipW, vRecipW, _MM_SHUFFLE(1, 0, 1, 1)); 182 183 vAdjust = _mm_mul_ps(vLineWidth, vBloat1); 184 if (workDesc.triFlags.yMajor) 185 { 186 vXa = _mm_add_ps(vAdjust, vXa); 187 } 188 else 189 { 190 vYa = _mm_add_ps(vAdjust, vYa); 191 } 192 193 // Store triangle description for rasterizer 194 _mm_store_ps((float*)&newTriBuffer[0], vXa); 195 _mm_store_ps((float*)&newTriBuffer[4], vYa); 196 _mm_store_ps((float*)&newTriBuffer[8], vZa); 197 _mm_store_ps((float*)&newTriBuffer[12], vRecipWa); 198 199 // binner bins 3 edges for lines as v0, v1, v1 200 // tri1 needs v1, v1, v0 201 for (uint32_t a = 0; a < workDesc.numAttribs; ++a) 202 { 203 __m128 vAttrib0 = _mm_load_ps(&workDesc.pAttribs[a * 12 + 0]); 204 __m128 vAttrib1 = _mm_load_ps(&workDesc.pAttribs[a * 12 + 4]); 205 206 _mm_store_ps((float*)&newAttribBuffer[a * 12 + 0], vAttrib1); 207 _mm_store_ps((float*)&newAttribBuffer[a * 12 + 4], vAttrib1); 208 _mm_store_ps((float*)&newAttribBuffer[a * 12 + 8], vAttrib0); 209 } 210 211 // store user clip distance for triangle 1 212 if (numClipDist) 213 { 214 float* pOldBuffer = workDesc.pUserClipBuffer; 215 float* pNewBuffer = newClipBuffer; 216 for (uint32_t i = 0; i < numClipDist; ++i) 217 { 218 // read barycentric coeffs from binner 219 float a = *(pOldBuffer++); 220 float b = *(pOldBuffer++); 221 222 // reconstruct original clip distance at vertices 223 float c0 = a + b; 224 float c1 = b; 225 226 // construct triangle barycentrics 227 *(pNewBuffer++) = c1 - c0; 228 *(pNewBuffer++) = c1 - c0; 229 *(pNewBuffer++) = c0; 230 } 231 } 232 233 vXai = fpToFixedPoint(vXa); 234 vYai = fpToFixedPoint(vYa); 235 calcBoundingBoxInt(vXai, vYai, bboxA); 236 237 if (!(bboxA.xmin > macroBoxRight || 238 bboxA.xmin > scissorInFixedPoint.xmax || 239 bboxA.xmax - 1 < macroBoxLeft || 240 bboxA.xmax - 1 < scissorInFixedPoint.xmin || 241 bboxA.ymin > macroBoxBottom || 242 bboxA.ymin > scissorInFixedPoint.ymax || 243 bboxA.ymax - 1 < macroBoxTop || 244 bboxA.ymax - 1 < scissorInFixedPoint.ymin)) { 245 // rasterize triangle 246 pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc); 247 } 248 249 AR_END(BERasterizeLine, 1); 250 } 251 252 void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void* pData) 253 { 254 SWR_CONTEXT *pContext = pDC->pContext; 255 256 #if KNOB_ENABLE_TOSS_POINTS 257 if (KNOB_TOSS_BIN_TRIS) 258 { 259 return; 260 } 261 #endif 262 263 const TRIANGLE_WORK_DESC& workDesc = *(const TRIANGLE_WORK_DESC*)pData; 264 const BACKEND_FUNCS& backendFuncs = pDC->pState->backendFuncs; 265 266 // map x,y relative offsets from start of raster tile to bit position in 267 // coverage mask for the point 268 static const uint32_t coverageMap[8][8] = { 269 { 0, 1, 4, 5, 8, 9, 12, 13 }, 270 { 2, 3, 6, 7, 10, 11, 14, 15 }, 271 { 16, 17, 20, 21, 24, 25, 28, 29 }, 272 { 18, 19, 22, 23, 26, 27, 30, 31 }, 273 { 32, 33, 36, 37, 40, 41, 44, 45 }, 274 { 34, 35, 38, 39, 42, 43, 46, 47 }, 275 { 48, 49, 52, 53, 56, 57, 60, 61 }, 276 { 50, 51, 54, 55, 58, 59, 62, 63 } 277 }; 278 279 OSALIGNSIMD(SWR_TRIANGLE_DESC) triDesc; 280 281 // pull point information from triangle buffer 282 // @todo use structs for readability 283 uint32_t tileAlignedX = *(uint32_t*)workDesc.pTriBuffer; 284 uint32_t tileAlignedY = *(uint32_t*)(workDesc.pTriBuffer + 1); 285 float z = *(workDesc.pTriBuffer + 2); 286 287 // construct triangle descriptor for point 288 // no interpolation, set up i,j for constant interpolation of z and attribs 289 // @todo implement an optimized backend that doesn't require triangle information 290 291 // compute coverage mask from x,y packed into the coverageMask flag 292 // mask indices by the maximum valid index for x/y of coveragemap. 293 uint32_t tX = workDesc.triFlags.coverageMask & 0x7; 294 uint32_t tY = (workDesc.triFlags.coverageMask >> 4) & 0x7; 295 // todo: multisample points? 296 triDesc.coverageMask[0] = 1ULL << coverageMap[tY][tX]; 297 298 // no persp divide needed for points 299 triDesc.pAttribs = triDesc.pPerspAttribs = workDesc.pAttribs; 300 triDesc.triFlags = workDesc.triFlags; 301 triDesc.recipDet = 1.0f; 302 triDesc.OneOverW[0] = triDesc.OneOverW[1] = triDesc.OneOverW[2] = 1.0f; 303 triDesc.I[0] = triDesc.I[1] = triDesc.I[2] = 0.0f; 304 triDesc.J[0] = triDesc.J[1] = triDesc.J[2] = 0.0f; 305 triDesc.Z[0] = triDesc.Z[1] = triDesc.Z[2] = z; 306 307 RenderOutputBuffers renderBuffers; 308 GetRenderHotTiles(pDC, macroTile, tileAlignedX >> KNOB_TILE_X_DIM_SHIFT , tileAlignedY >> KNOB_TILE_Y_DIM_SHIFT, 309 renderBuffers, triDesc.triFlags.renderTargetArrayIndex); 310 311 AR_BEGIN(BEPixelBackend, pDC->drawId); 312 backendFuncs.pfnBackend(pDC, workerId, tileAlignedX, tileAlignedY, triDesc, renderBuffers); 313 AR_END(BEPixelBackend, 0); 314 } 315 316 void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void* pData) 317 { 318 const TRIANGLE_WORK_DESC& workDesc = *(const TRIANGLE_WORK_DESC*)pData; 319 const SWR_RASTSTATE& rastState = pDC->pState->state.rastState; 320 const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState; 321 322 bool isPointSpriteTexCoordEnabled = backendState.pointSpriteTexCoordMask != 0; 323 324 // load point vertex 325 float x = *workDesc.pTriBuffer; 326 float y = *(workDesc.pTriBuffer + 1); 327 float z = *(workDesc.pTriBuffer + 2); 328 329 // create a copy of the triangle buffer to write our adjusted vertices to 330 OSALIGNSIMD(float) newTriBuffer[4 * 4]; 331 TRIANGLE_WORK_DESC newWorkDesc = workDesc; 332 newWorkDesc.pTriBuffer = &newTriBuffer[0]; 333 334 // create a copy of the attrib buffer to write our adjusted attribs to 335 OSALIGNSIMD(float) newAttribBuffer[4 * 3 * SWR_VTX_NUM_SLOTS]; 336 newWorkDesc.pAttribs = &newAttribBuffer[0]; 337 338 newWorkDesc.pUserClipBuffer = workDesc.pUserClipBuffer; 339 newWorkDesc.numAttribs = workDesc.numAttribs; 340 newWorkDesc.triFlags = workDesc.triFlags; 341 342 // construct two tris by bloating point by point size 343 float halfPointSize = workDesc.triFlags.pointSize * 0.5f; 344 float lowerX = x - halfPointSize; 345 float upperX = x + halfPointSize; 346 float lowerY = y - halfPointSize; 347 float upperY = y + halfPointSize; 348 349 // tri 0 350 float *pBuf = &newTriBuffer[0]; 351 *pBuf++ = lowerX; 352 *pBuf++ = lowerX; 353 *pBuf++ = upperX; 354 pBuf++; 355 *pBuf++ = lowerY; 356 *pBuf++ = upperY; 357 *pBuf++ = upperY; 358 pBuf++; 359 _mm_store_ps(pBuf, _mm_set1_ps(z)); 360 _mm_store_ps(pBuf += 4, _mm_set1_ps(1.0f)); 361 362 // setup triangle rasterizer function 363 PFN_WORK_FUNC pfnTriRast; 364 // conservative rast not supported for points/lines 365 pfnTriRast = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, false, 366 SWR_INPUT_COVERAGE_NONE, EdgeValToEdgeState(ALL_EDGES_VALID), (pDC->pState->state.scissorsTileAligned == false)); 367 368 // overwrite texcoords for point sprites 369 if (isPointSpriteTexCoordEnabled) 370 { 371 // copy original attribs 372 memcpy(&newAttribBuffer[0], workDesc.pAttribs, 4 * 3 * workDesc.numAttribs * sizeof(float)); 373 newWorkDesc.pAttribs = &newAttribBuffer[0]; 374 375 // overwrite texcoord for point sprites 376 uint32_t texCoordMask = backendState.pointSpriteTexCoordMask; 377 DWORD texCoordAttrib = 0; 378 379 while (_BitScanForward(&texCoordAttrib, texCoordMask)) 380 { 381 texCoordMask &= ~(1 << texCoordAttrib); 382 __m128* pTexAttrib = (__m128*)&newAttribBuffer[0] + 3 * texCoordAttrib; 383 if (rastState.pointSpriteTopOrigin) 384 { 385 pTexAttrib[0] = _mm_set_ps(1, 0, 0, 0); 386 pTexAttrib[1] = _mm_set_ps(1, 0, 1, 0); 387 pTexAttrib[2] = _mm_set_ps(1, 0, 1, 1); 388 } 389 else 390 { 391 pTexAttrib[0] = _mm_set_ps(1, 0, 1, 0); 392 pTexAttrib[1] = _mm_set_ps(1, 0, 0, 0); 393 pTexAttrib[2] = _mm_set_ps(1, 0, 0, 1); 394 } 395 } 396 } 397 else 398 { 399 // no texcoord overwrite, can reuse the attrib buffer from frontend 400 newWorkDesc.pAttribs = workDesc.pAttribs; 401 } 402 403 pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc); 404 405 // tri 1 406 pBuf = &newTriBuffer[0]; 407 *pBuf++ = lowerX; 408 *pBuf++ = upperX; 409 *pBuf++ = upperX; 410 pBuf++; 411 *pBuf++ = lowerY; 412 *pBuf++ = upperY; 413 *pBuf++ = lowerY; 414 // z, w unchanged 415 416 if (isPointSpriteTexCoordEnabled) 417 { 418 uint32_t texCoordMask = backendState.pointSpriteTexCoordMask; 419 DWORD texCoordAttrib = 0; 420 421 while (_BitScanForward(&texCoordAttrib, texCoordMask)) 422 { 423 texCoordMask &= ~(1 << texCoordAttrib); 424 __m128* pTexAttrib = (__m128*)&newAttribBuffer[0] + 3 * texCoordAttrib; 425 if (rastState.pointSpriteTopOrigin) 426 { 427 pTexAttrib[0] = _mm_set_ps(1, 0, 0, 0); 428 pTexAttrib[1] = _mm_set_ps(1, 0, 1, 1); 429 pTexAttrib[2] = _mm_set_ps(1, 0, 0, 1); 430 431 } 432 else 433 { 434 pTexAttrib[0] = _mm_set_ps(1, 0, 1, 0); 435 pTexAttrib[1] = _mm_set_ps(1, 0, 0, 1); 436 pTexAttrib[2] = _mm_set_ps(1, 0, 1, 1); 437 } 438 } 439 } 440 441 pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc); 442 } 443 444 void InitRasterizerFunctions() 445 { 446 InitRasterizerFuncs(); 447 } 448 449 // Selector for correct templated RasterizeTriangle function 450 PFN_WORK_FUNC GetRasterizerFunc( 451 SWR_MULTISAMPLE_COUNT numSamples, 452 bool IsCenter, 453 bool IsConservative, 454 SWR_INPUT_COVERAGE InputCoverage, 455 uint32_t EdgeEnable, 456 bool RasterizeScissorEdges 457 ) 458 { 459 SWR_ASSERT(numSamples >= 0 && numSamples < SWR_MULTISAMPLE_TYPE_COUNT); 460 SWR_ASSERT(InputCoverage >= 0 && InputCoverage < SWR_INPUT_COVERAGE_COUNT); 461 SWR_ASSERT(EdgeEnable < STATE_VALID_TRI_EDGE_COUNT); 462 463 PFN_WORK_FUNC func = gRasterizerFuncs[numSamples][IsCenter][IsConservative][InputCoverage][EdgeEnable][RasterizeScissorEdges]; 464 SWR_ASSERT(func); 465 466 return func; 467 } 468