Home | History | Annotate | Download | only in core
      1 /****************************************************************************
      2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice (including the next
     12 * paragraph) shall be included in all copies or substantial portions of the
     13 * Software.
     14 *
     15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21 * IN THE SOFTWARE.
     22 *
     23 * @file rasterizer.cpp
     24 *
     25 * @brief Implementation for the rasterizer.
     26 *
     27 ******************************************************************************/
     28 
     29 #include <vector>
     30 #include <algorithm>
     31 
     32 #include "rasterizer.h"
     33 #include "backends/gen_rasterizer.hpp"
     34 #include "rdtsc_core.h"
     35 #include "backend.h"
     36 #include "utils.h"
     37 #include "frontend.h"
     38 #include "tilemgr.h"
     39 #include "memory/tilingtraits.h"
     40 #include "rasterizer_impl.h"
     41 
     42 PFN_WORK_FUNC gRasterizerFuncs[SWR_MULTISAMPLE_TYPE_COUNT][2][2][SWR_INPUT_COVERAGE_COUNT][STATE_VALID_TRI_EDGE_COUNT][2];
     43 
     44 void RasterizeLine(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pData)
     45 {
     46     SWR_CONTEXT *pContext = pDC->pContext;
     47     const TRIANGLE_WORK_DESC &workDesc = *((TRIANGLE_WORK_DESC*)pData);
     48 #if KNOB_ENABLE_TOSS_POINTS
     49     if (KNOB_TOSS_BIN_TRIS)
     50     {
     51         return;
     52     }
     53 #endif
     54 
     55     // bloat line to two tris and call the triangle rasterizer twice
     56     AR_BEGIN(BERasterizeLine, pDC->drawId);
     57 
     58     const API_STATE &state = GetApiState(pDC);
     59     const SWR_RASTSTATE &rastState = state.rastState;
     60 
     61     // macrotile dimensioning
     62     uint32_t macroX, macroY;
     63     MacroTileMgr::getTileIndices(macroTile, macroX, macroY);
     64     int32_t macroBoxLeft = macroX * KNOB_MACROTILE_X_DIM_FIXED;
     65     int32_t macroBoxRight = macroBoxLeft + KNOB_MACROTILE_X_DIM_FIXED - 1;
     66     int32_t macroBoxTop = macroY * KNOB_MACROTILE_Y_DIM_FIXED;
     67     int32_t macroBoxBottom = macroBoxTop + KNOB_MACROTILE_Y_DIM_FIXED - 1;
     68 
     69     const SWR_RECT &scissorInFixedPoint = state.scissorsInFixedPoint[workDesc.triFlags.viewportIndex];
     70 
     71     // create a copy of the triangle buffer to write our adjusted vertices to
     72     OSALIGNSIMD(float) newTriBuffer[4 * 4];
     73     TRIANGLE_WORK_DESC newWorkDesc = workDesc;
     74     newWorkDesc.pTriBuffer = &newTriBuffer[0];
     75 
     76     // create a copy of the attrib buffer to write our adjusted attribs to
     77     OSALIGNSIMD(float) newAttribBuffer[4 * 3 * SWR_VTX_NUM_SLOTS];
     78     newWorkDesc.pAttribs = &newAttribBuffer[0];
     79 
     80     const __m128 vBloat0 = _mm_set_ps(0.5f, -0.5f, -0.5f, 0.5f);
     81     const __m128 vBloat1 = _mm_set_ps(0.5f, 0.5f, 0.5f, -0.5f);
     82 
     83     __m128 vX, vY, vZ, vRecipW;
     84 
     85     vX = _mm_load_ps(workDesc.pTriBuffer);
     86     vY = _mm_load_ps(workDesc.pTriBuffer + 4);
     87     vZ = _mm_load_ps(workDesc.pTriBuffer + 8);
     88     vRecipW = _mm_load_ps(workDesc.pTriBuffer + 12);
     89 
     90     // triangle 0
     91     // v0,v1 -> v0,v0,v1
     92     __m128 vXa = _mm_shuffle_ps(vX, vX, _MM_SHUFFLE(1, 1, 0, 0));
     93     __m128 vYa = _mm_shuffle_ps(vY, vY, _MM_SHUFFLE(1, 1, 0, 0));
     94     __m128 vZa = _mm_shuffle_ps(vZ, vZ, _MM_SHUFFLE(1, 1, 0, 0));
     95     __m128 vRecipWa = _mm_shuffle_ps(vRecipW, vRecipW, _MM_SHUFFLE(1, 1, 0, 0));
     96 
     97     __m128 vLineWidth = _mm_set1_ps(pDC->pState->state.rastState.lineWidth);
     98     __m128 vAdjust = _mm_mul_ps(vLineWidth, vBloat0);
     99     if (workDesc.triFlags.yMajor)
    100     {
    101         vXa = _mm_add_ps(vAdjust, vXa);
    102     }
    103     else
    104     {
    105         vYa = _mm_add_ps(vAdjust, vYa);
    106     }
    107 
    108     // Store triangle description for rasterizer
    109     _mm_store_ps((float*)&newTriBuffer[0], vXa);
    110     _mm_store_ps((float*)&newTriBuffer[4], vYa);
    111     _mm_store_ps((float*)&newTriBuffer[8], vZa);
    112     _mm_store_ps((float*)&newTriBuffer[12], vRecipWa);
    113 
    114     // binner bins 3 edges for lines as v0, v1, v1
    115     // tri0 needs v0, v0, v1
    116     for (uint32_t a = 0; a < workDesc.numAttribs; ++a)
    117     {
    118         __m128 vAttrib0 = _mm_load_ps(&workDesc.pAttribs[a * 12 + 0]);
    119         __m128 vAttrib1 = _mm_load_ps(&workDesc.pAttribs[a * 12 + 4]);
    120 
    121         _mm_store_ps((float*)&newAttribBuffer[a * 12 + 0], vAttrib0);
    122         _mm_store_ps((float*)&newAttribBuffer[a * 12 + 4], vAttrib0);
    123         _mm_store_ps((float*)&newAttribBuffer[a * 12 + 8], vAttrib1);
    124     }
    125 
    126     // Store user clip distances for triangle 0
    127     float newClipBuffer[3 * 8];
    128     uint32_t numClipDist = _mm_popcnt_u32(state.backendState.clipDistanceMask);
    129     if (numClipDist)
    130     {
    131         newWorkDesc.pUserClipBuffer = newClipBuffer;
    132 
    133         float* pOldBuffer = workDesc.pUserClipBuffer;
    134         float* pNewBuffer = newClipBuffer;
    135         for (uint32_t i = 0; i < numClipDist; ++i)
    136         {
    137             // read barycentric coeffs from binner
    138             float a = *(pOldBuffer++);
    139             float b = *(pOldBuffer++);
    140 
    141             // reconstruct original clip distance at vertices
    142             float c0 = a + b;
    143             float c1 = b;
    144 
    145             // construct triangle barycentrics
    146             *(pNewBuffer++) = c0 - c1;
    147             *(pNewBuffer++) = c0 - c1;
    148             *(pNewBuffer++) = c1;
    149         }
    150     }
    151 
    152     // setup triangle rasterizer function
    153     PFN_WORK_FUNC pfnTriRast;
    154     // conservative rast not supported for points/lines
    155     pfnTriRast = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, false,
    156         SWR_INPUT_COVERAGE_NONE, EdgeValToEdgeState(ALL_EDGES_VALID), (pDC->pState->state.scissorsTileAligned == false));
    157 
    158     // make sure this macrotile intersects the triangle
    159     __m128i vXai = fpToFixedPoint(vXa);
    160     __m128i vYai = fpToFixedPoint(vYa);
    161     OSALIGNSIMD(SWR_RECT) bboxA;
    162     calcBoundingBoxInt(vXai, vYai, bboxA);
    163 
    164     if (!(bboxA.xmin > macroBoxRight ||
    165         bboxA.xmin > scissorInFixedPoint.xmax ||
    166         bboxA.xmax - 1 < macroBoxLeft ||
    167         bboxA.xmax - 1 < scissorInFixedPoint.xmin ||
    168         bboxA.ymin > macroBoxBottom ||
    169         bboxA.ymin > scissorInFixedPoint.ymax ||
    170         bboxA.ymax - 1 < macroBoxTop ||
    171         bboxA.ymax - 1 < scissorInFixedPoint.ymin)) {
    172         // rasterize triangle
    173         pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
    174     }
    175 
    176     // triangle 1
    177     // v0,v1 -> v1,v1,v0
    178     vXa = _mm_shuffle_ps(vX, vX, _MM_SHUFFLE(1, 0, 1, 1));
    179     vYa = _mm_shuffle_ps(vY, vY, _MM_SHUFFLE(1, 0, 1, 1));
    180     vZa = _mm_shuffle_ps(vZ, vZ, _MM_SHUFFLE(1, 0, 1, 1));
    181     vRecipWa = _mm_shuffle_ps(vRecipW, vRecipW, _MM_SHUFFLE(1, 0, 1, 1));
    182 
    183     vAdjust = _mm_mul_ps(vLineWidth, vBloat1);
    184     if (workDesc.triFlags.yMajor)
    185     {
    186         vXa = _mm_add_ps(vAdjust, vXa);
    187     }
    188     else
    189     {
    190         vYa = _mm_add_ps(vAdjust, vYa);
    191     }
    192 
    193     // Store triangle description for rasterizer
    194     _mm_store_ps((float*)&newTriBuffer[0], vXa);
    195     _mm_store_ps((float*)&newTriBuffer[4], vYa);
    196     _mm_store_ps((float*)&newTriBuffer[8], vZa);
    197     _mm_store_ps((float*)&newTriBuffer[12], vRecipWa);
    198 
    199     // binner bins 3 edges for lines as v0, v1, v1
    200     // tri1 needs v1, v1, v0
    201     for (uint32_t a = 0; a < workDesc.numAttribs; ++a)
    202     {
    203         __m128 vAttrib0 = _mm_load_ps(&workDesc.pAttribs[a * 12 + 0]);
    204         __m128 vAttrib1 = _mm_load_ps(&workDesc.pAttribs[a * 12 + 4]);
    205 
    206         _mm_store_ps((float*)&newAttribBuffer[a * 12 + 0], vAttrib1);
    207         _mm_store_ps((float*)&newAttribBuffer[a * 12 + 4], vAttrib1);
    208         _mm_store_ps((float*)&newAttribBuffer[a * 12 + 8], vAttrib0);
    209     }
    210 
    211     // store user clip distance for triangle 1
    212     if (numClipDist)
    213     {
    214         float* pOldBuffer = workDesc.pUserClipBuffer;
    215         float* pNewBuffer = newClipBuffer;
    216         for (uint32_t i = 0; i < numClipDist; ++i)
    217         {
    218             // read barycentric coeffs from binner
    219             float a = *(pOldBuffer++);
    220             float b = *(pOldBuffer++);
    221 
    222             // reconstruct original clip distance at vertices
    223             float c0 = a + b;
    224             float c1 = b;
    225 
    226             // construct triangle barycentrics
    227             *(pNewBuffer++) = c1 - c0;
    228             *(pNewBuffer++) = c1 - c0;
    229             *(pNewBuffer++) = c0;
    230         }
    231     }
    232 
    233     vXai = fpToFixedPoint(vXa);
    234     vYai = fpToFixedPoint(vYa);
    235     calcBoundingBoxInt(vXai, vYai, bboxA);
    236 
    237     if (!(bboxA.xmin > macroBoxRight ||
    238         bboxA.xmin > scissorInFixedPoint.xmax ||
    239         bboxA.xmax - 1 < macroBoxLeft ||
    240         bboxA.xmax - 1 < scissorInFixedPoint.xmin ||
    241         bboxA.ymin > macroBoxBottom ||
    242         bboxA.ymin > scissorInFixedPoint.ymax ||
    243         bboxA.ymax - 1 < macroBoxTop ||
    244         bboxA.ymax - 1 < scissorInFixedPoint.ymin)) {
    245         // rasterize triangle
    246         pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
    247     }
    248 
    249     AR_END(BERasterizeLine, 1);
    250 }
    251 
    252 void RasterizeSimplePoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void* pData)
    253 {
    254     SWR_CONTEXT *pContext = pDC->pContext;
    255 
    256 #if KNOB_ENABLE_TOSS_POINTS
    257     if (KNOB_TOSS_BIN_TRIS)
    258     {
    259         return;
    260     }
    261 #endif
    262 
    263     const TRIANGLE_WORK_DESC& workDesc = *(const TRIANGLE_WORK_DESC*)pData;
    264     const BACKEND_FUNCS& backendFuncs = pDC->pState->backendFuncs;
    265 
    266     // map x,y relative offsets from start of raster tile to bit position in
    267     // coverage mask for the point
    268     static const uint32_t coverageMap[8][8] = {
    269         { 0, 1, 4, 5, 8, 9, 12, 13 },
    270         { 2, 3, 6, 7, 10, 11, 14, 15 },
    271         { 16, 17, 20, 21, 24, 25, 28, 29 },
    272         { 18, 19, 22, 23, 26, 27, 30, 31 },
    273         { 32, 33, 36, 37, 40, 41, 44, 45 },
    274         { 34, 35, 38, 39, 42, 43, 46, 47 },
    275         { 48, 49, 52, 53, 56, 57, 60, 61 },
    276         { 50, 51, 54, 55, 58, 59, 62, 63 }
    277     };
    278 
    279     OSALIGNSIMD(SWR_TRIANGLE_DESC) triDesc;
    280 
    281     // pull point information from triangle buffer
    282     // @todo use structs for readability
    283     uint32_t tileAlignedX = *(uint32_t*)workDesc.pTriBuffer;
    284     uint32_t tileAlignedY = *(uint32_t*)(workDesc.pTriBuffer + 1);
    285     float z = *(workDesc.pTriBuffer + 2);
    286 
    287     // construct triangle descriptor for point
    288     // no interpolation, set up i,j for constant interpolation of z and attribs
    289     // @todo implement an optimized backend that doesn't require triangle information
    290 
    291     // compute coverage mask from x,y packed into the coverageMask flag
    292     // mask indices by the maximum valid index for x/y of coveragemap.
    293     uint32_t tX = workDesc.triFlags.coverageMask & 0x7;
    294     uint32_t tY = (workDesc.triFlags.coverageMask >> 4) & 0x7;
    295     // todo: multisample points?
    296     triDesc.coverageMask[0] = 1ULL << coverageMap[tY][tX];
    297 
    298     // no persp divide needed for points
    299     triDesc.pAttribs = triDesc.pPerspAttribs = workDesc.pAttribs;
    300     triDesc.triFlags = workDesc.triFlags;
    301     triDesc.recipDet = 1.0f;
    302     triDesc.OneOverW[0] = triDesc.OneOverW[1] = triDesc.OneOverW[2] = 1.0f;
    303     triDesc.I[0] = triDesc.I[1] = triDesc.I[2] = 0.0f;
    304     triDesc.J[0] = triDesc.J[1] = triDesc.J[2] = 0.0f;
    305     triDesc.Z[0] = triDesc.Z[1] = triDesc.Z[2] = z;
    306 
    307     RenderOutputBuffers renderBuffers;
    308     GetRenderHotTiles(pDC, macroTile, tileAlignedX >> KNOB_TILE_X_DIM_SHIFT , tileAlignedY >> KNOB_TILE_Y_DIM_SHIFT,
    309         renderBuffers, triDesc.triFlags.renderTargetArrayIndex);
    310 
    311     AR_BEGIN(BEPixelBackend, pDC->drawId);
    312     backendFuncs.pfnBackend(pDC, workerId, tileAlignedX, tileAlignedY, triDesc, renderBuffers);
    313     AR_END(BEPixelBackend, 0);
    314 }
    315 
    316 void RasterizeTriPoint(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void* pData)
    317 {
    318     const TRIANGLE_WORK_DESC& workDesc = *(const TRIANGLE_WORK_DESC*)pData;
    319     const SWR_RASTSTATE& rastState = pDC->pState->state.rastState;
    320     const SWR_BACKEND_STATE& backendState = pDC->pState->state.backendState;
    321 
    322     bool isPointSpriteTexCoordEnabled = backendState.pointSpriteTexCoordMask != 0;
    323 
    324     // load point vertex
    325     float x = *workDesc.pTriBuffer;
    326     float y = *(workDesc.pTriBuffer + 1);
    327     float z = *(workDesc.pTriBuffer + 2);
    328 
    329     // create a copy of the triangle buffer to write our adjusted vertices to
    330     OSALIGNSIMD(float) newTriBuffer[4 * 4];
    331     TRIANGLE_WORK_DESC newWorkDesc = workDesc;
    332     newWorkDesc.pTriBuffer = &newTriBuffer[0];
    333 
    334     // create a copy of the attrib buffer to write our adjusted attribs to
    335     OSALIGNSIMD(float) newAttribBuffer[4 * 3 * SWR_VTX_NUM_SLOTS];
    336     newWorkDesc.pAttribs = &newAttribBuffer[0];
    337 
    338     newWorkDesc.pUserClipBuffer = workDesc.pUserClipBuffer;
    339     newWorkDesc.numAttribs = workDesc.numAttribs;
    340     newWorkDesc.triFlags = workDesc.triFlags;
    341 
    342     // construct two tris by bloating point by point size
    343     float halfPointSize = workDesc.triFlags.pointSize * 0.5f;
    344     float lowerX = x - halfPointSize;
    345     float upperX = x + halfPointSize;
    346     float lowerY = y - halfPointSize;
    347     float upperY = y + halfPointSize;
    348 
    349     // tri 0
    350     float *pBuf = &newTriBuffer[0];
    351     *pBuf++ = lowerX;
    352     *pBuf++ = lowerX;
    353     *pBuf++ = upperX;
    354     pBuf++;
    355     *pBuf++ = lowerY;
    356     *pBuf++ = upperY;
    357     *pBuf++ = upperY;
    358     pBuf++;
    359     _mm_store_ps(pBuf, _mm_set1_ps(z));
    360     _mm_store_ps(pBuf += 4, _mm_set1_ps(1.0f));
    361 
    362     // setup triangle rasterizer function
    363     PFN_WORK_FUNC pfnTriRast;
    364     // conservative rast not supported for points/lines
    365     pfnTriRast = GetRasterizerFunc(rastState.sampleCount, rastState.bIsCenterPattern, false,
    366         SWR_INPUT_COVERAGE_NONE, EdgeValToEdgeState(ALL_EDGES_VALID), (pDC->pState->state.scissorsTileAligned == false));
    367 
    368     // overwrite texcoords for point sprites
    369     if (isPointSpriteTexCoordEnabled)
    370     {
    371         // copy original attribs
    372         memcpy(&newAttribBuffer[0], workDesc.pAttribs, 4 * 3 * workDesc.numAttribs * sizeof(float));
    373         newWorkDesc.pAttribs = &newAttribBuffer[0];
    374 
    375         // overwrite texcoord for point sprites
    376         uint32_t texCoordMask = backendState.pointSpriteTexCoordMask;
    377         DWORD texCoordAttrib = 0;
    378 
    379         while (_BitScanForward(&texCoordAttrib, texCoordMask))
    380         {
    381             texCoordMask &= ~(1 << texCoordAttrib);
    382             __m128* pTexAttrib = (__m128*)&newAttribBuffer[0] + 3 * texCoordAttrib;
    383             if (rastState.pointSpriteTopOrigin)
    384             {
    385                 pTexAttrib[0] = _mm_set_ps(1, 0, 0, 0);
    386                 pTexAttrib[1] = _mm_set_ps(1, 0, 1, 0);
    387                 pTexAttrib[2] = _mm_set_ps(1, 0, 1, 1);
    388             }
    389             else
    390             {
    391                 pTexAttrib[0] = _mm_set_ps(1, 0, 1, 0);
    392                 pTexAttrib[1] = _mm_set_ps(1, 0, 0, 0);
    393                 pTexAttrib[2] = _mm_set_ps(1, 0, 0, 1);
    394             }
    395         }
    396     }
    397     else
    398     {
    399         // no texcoord overwrite, can reuse the attrib buffer from frontend
    400         newWorkDesc.pAttribs = workDesc.pAttribs;
    401     }
    402 
    403     pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
    404 
    405     // tri 1
    406     pBuf = &newTriBuffer[0];
    407     *pBuf++ = lowerX;
    408     *pBuf++ = upperX;
    409     *pBuf++ = upperX;
    410     pBuf++;
    411     *pBuf++ = lowerY;
    412     *pBuf++ = upperY;
    413     *pBuf++ = lowerY;
    414     // z, w unchanged
    415 
    416     if (isPointSpriteTexCoordEnabled)
    417     {
    418         uint32_t texCoordMask = backendState.pointSpriteTexCoordMask;
    419         DWORD texCoordAttrib = 0;
    420 
    421         while (_BitScanForward(&texCoordAttrib, texCoordMask))
    422         {
    423             texCoordMask &= ~(1 << texCoordAttrib);
    424             __m128* pTexAttrib = (__m128*)&newAttribBuffer[0] + 3 * texCoordAttrib;
    425             if (rastState.pointSpriteTopOrigin)
    426             {
    427                 pTexAttrib[0] = _mm_set_ps(1, 0, 0, 0);
    428                 pTexAttrib[1] = _mm_set_ps(1, 0, 1, 1);
    429                 pTexAttrib[2] = _mm_set_ps(1, 0, 0, 1);
    430 
    431             }
    432             else
    433             {
    434                 pTexAttrib[0] = _mm_set_ps(1, 0, 1, 0);
    435                 pTexAttrib[1] = _mm_set_ps(1, 0, 0, 1);
    436                 pTexAttrib[2] = _mm_set_ps(1, 0, 1, 1);
    437             }
    438         }
    439     }
    440 
    441     pfnTriRast(pDC, workerId, macroTile, (void*)&newWorkDesc);
    442 }
    443 
    444 void InitRasterizerFunctions()
    445 {
    446     InitRasterizerFuncs();
    447 }
    448 
    449 // Selector for correct templated RasterizeTriangle function
    450 PFN_WORK_FUNC GetRasterizerFunc(
    451     SWR_MULTISAMPLE_COUNT numSamples,
    452     bool IsCenter,
    453     bool IsConservative,
    454     SWR_INPUT_COVERAGE InputCoverage,
    455     uint32_t EdgeEnable,
    456     bool RasterizeScissorEdges
    457 )
    458 {
    459     SWR_ASSERT(numSamples >= 0 && numSamples < SWR_MULTISAMPLE_TYPE_COUNT);
    460     SWR_ASSERT(InputCoverage >= 0 && InputCoverage < SWR_INPUT_COVERAGE_COUNT);
    461     SWR_ASSERT(EdgeEnable < STATE_VALID_TRI_EDGE_COUNT);
    462 
    463     PFN_WORK_FUNC func = gRasterizerFuncs[numSamples][IsCenter][IsConservative][InputCoverage][EdgeEnable][RasterizeScissorEdges];
    464     SWR_ASSERT(func);
    465 
    466     return func;
    467 }
    468