Home | History | Annotate | Download | only in core
      1 /****************************************************************************
      2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice (including the next
     12 * paragraph) shall be included in all copies or substantial portions of the
     13 * Software.
     14 *
     15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21 * IN THE SOFTWARE.
     22 *
     23 * @file frontend.h
     24 *
     25 * @brief Definitions for Frontend which handles vertex processing,
     26 *        primitive assembly, clipping, binning, etc.
     27 *
     28 ******************************************************************************/
     29 #pragma once
     30 #include "context.h"
     31 #include <type_traits>
     32 
     33 // Calculates the A and B coefficients for the 3 edges of the triangle
     34 //
     35 // maths for edge equations:
     36 //   standard form of a line in 2d
     37 //   Ax + By + C = 0
     38 //   A = y0 - y1
     39 //   B = x1 - x0
     40 //   C = x0y1 - x1y0
     41 INLINE
     42 void triangleSetupAB(const __m128 vX, const __m128 vY, __m128 & vA, __m128 & vB)
     43 {
     44     // vYsub = y1 y2 y0 dc
     45     __m128 vYsub = _mm_shuffle_ps(vY, vY, _MM_SHUFFLE(3, 0, 2, 1));
     46     // vY =    y0 y1 y2 dc
     47     vA = _mm_sub_ps(vY, vYsub);
     48 
     49     // Result:
     50     // A[0] = y0 - y1
     51     // A[1] = y1 - y2
     52     // A[2] = y2 - y0
     53 
     54     // vXsub = x1 x2 x0 dc
     55     __m128 vXsub = _mm_shuffle_ps(vX, vX, _MM_SHUFFLE(3, 0, 2, 1));
     56     // vX =    x0 x1 x2 dc
     57     vB = _mm_sub_ps(vXsub, vX);
     58 
     59     // Result:
     60     // B[0] = x1 - x0
     61     // B[1] = x2 - x1
     62     // B[2] = x0 - x2
     63 }
     64 
     65 INLINE
     66 void triangleSetupABVertical(const simdscalar vX[3], const simdscalar vY[3], simdscalar (&vA)[3], simdscalar (&vB)[3])
     67 {
     68     // generate edge equations
     69     // A = y0 - y1
     70     // B = x1 - x0
     71     vA[0] = _simd_sub_ps(vY[0], vY[1]);
     72     vA[1] = _simd_sub_ps(vY[1], vY[2]);
     73     vA[2] = _simd_sub_ps(vY[2], vY[0]);
     74 
     75     vB[0] = _simd_sub_ps(vX[1], vX[0]);
     76     vB[1] = _simd_sub_ps(vX[2], vX[1]);
     77     vB[2] = _simd_sub_ps(vX[0], vX[2]);
     78 }
     79 
     80 INLINE
     81 void triangleSetupABInt(const __m128i vX, const __m128i vY, __m128i & vA, __m128i & vB)
     82 {
     83     // generate edge equations
     84     // A = y0 - y1
     85     // B = x1 - x0
     86     // C = x0y1 - x1y0
     87     __m128i vYsub = _mm_shuffle_epi32(vY, _MM_SHUFFLE(3, 0, 2, 1));
     88     vA = _mm_sub_epi32(vY, vYsub);
     89 
     90     __m128i vXsub = _mm_shuffle_epi32(vX, _MM_SHUFFLE(3, 0, 2, 1));
     91     vB = _mm_sub_epi32(vXsub, vX);
     92 }
     93 
     94 INLINE
     95 void triangleSetupABIntVertical(const simdscalari vX[3], const simdscalari vY[3], simdscalari (&vA)[3], simdscalari (&vB)[3])
     96 {
     97     // A = y0 - y1
     98     // B = x1 - x0
     99     vA[0] = _simd_sub_epi32(vY[0], vY[1]);
    100     vA[1] = _simd_sub_epi32(vY[1], vY[2]);
    101     vA[2] = _simd_sub_epi32(vY[2], vY[0]);
    102 
    103     vB[0] = _simd_sub_epi32(vX[1], vX[0]);
    104     vB[1] = _simd_sub_epi32(vX[2], vX[1]);
    105     vB[2] = _simd_sub_epi32(vX[0], vX[2]);
    106 }
    107 // Calculate the determinant of the triangle
    108 // 2 vectors between the 3 points: P, Q
    109 // Px = x0-x2, Py = y0-y2
    110 // Qx = x1-x2, Qy = y1-y2
    111 //       |Px Qx|
    112 // det = |     | = PxQy - PyQx
    113 //       |Py Qy|
    114 // simplifies to : (x0-x2)*(y1-y2) - (y0-y2)*(x1-x2)
    115 //               try to reuse our A & B coef's already calculated. factor out a -1 from Py and Qx
    116 //               : B[2]*A[1] - (-(y2-y0))*(-(x2-x1))
    117 //               : B[2]*A[1] - (-1)(-1)(y2-y0)*(x2-x1)
    118 //               : B[2]*A[1] - A[2]*B[1]
    119 INLINE
    120 float calcDeterminantInt(const __m128i vA, const __m128i vB)
    121 {
    122     // vAShuf = [A1, A0, A2, A0]
    123     __m128i vAShuf = _mm_shuffle_epi32(vA, _MM_SHUFFLE(0, 2, 0, 1));
    124     // vBShuf = [B2, B0, B1, B0]
    125     __m128i vBShuf = _mm_shuffle_epi32(vB, _MM_SHUFFLE(0, 1, 0, 2));
    126     // vMul = [A1*B2, B1*A2]
    127     __m128i vMul   = _mm_mul_epi32(vAShuf, vBShuf);
    128 
    129     // shuffle upper to lower
    130     // vMul2 = [B1*A2, B1*A2]
    131     __m128i vMul2 = _mm_shuffle_epi32(vMul, _MM_SHUFFLE(3, 2, 3, 2));
    132     //vMul = [A1*B2 - B1*A2]
    133     vMul = _mm_sub_epi64(vMul, vMul2);
    134 
    135     int64_t result;
    136     _mm_store_sd((double*)&result, _mm_castsi128_pd(vMul));
    137 
    138     double dResult = (double)result;
    139     dResult = dResult * (1.0 / FIXED_POINT16_SCALE);
    140 
    141     return (float)dResult;
    142 }
    143 
    144 INLINE
    145 void calcDeterminantIntVertical(const simdscalari vA[3], const simdscalari vB[3], simdscalari *pvDet)
    146 {
    147     // refer to calcDeterminantInt comment for calculation explanation
    148     // A1*B2
    149     simdscalari vA1Lo = _simd_unpacklo_epi32(vA[1], vA[1]);     // 0 0 1 1 4 4 5 5
    150     simdscalari vA1Hi = _simd_unpackhi_epi32(vA[1], vA[1]);     // 2 2 3 3 6 6 7 7
    151 
    152     simdscalari vB2Lo = _simd_unpacklo_epi32(vB[2], vB[2]);
    153     simdscalari vB2Hi = _simd_unpackhi_epi32(vB[2], vB[2]);
    154 
    155     simdscalari vA1B2Lo = _simd_mul_epi32(vA1Lo, vB2Lo);        // 0 1 4 5
    156     simdscalari vA1B2Hi = _simd_mul_epi32(vA1Hi, vB2Hi);        // 2 3 6 7
    157 
    158     // B1*A2
    159     simdscalari vA2Lo = _simd_unpacklo_epi32(vA[2], vA[2]);
    160     simdscalari vA2Hi = _simd_unpackhi_epi32(vA[2], vA[2]);
    161 
    162     simdscalari vB1Lo = _simd_unpacklo_epi32(vB[1], vB[1]);
    163     simdscalari vB1Hi = _simd_unpackhi_epi32(vB[1], vB[1]);
    164 
    165     simdscalari vA2B1Lo = _simd_mul_epi32(vA2Lo, vB1Lo);
    166     simdscalari vA2B1Hi = _simd_mul_epi32(vA2Hi, vB1Hi);
    167 
    168     // A1*B2 - A2*B1
    169     simdscalari detLo = _simd_sub_epi64(vA1B2Lo, vA2B1Lo);
    170     simdscalari detHi = _simd_sub_epi64(vA1B2Hi, vA2B1Hi);
    171 
    172     // shuffle 0 1 4 5 -> 0 1 2 3
    173     simdscalari vResultLo = _mm256_permute2f128_si256(detLo, detHi, 0x20);
    174     simdscalari vResultHi = _mm256_permute2f128_si256(detLo, detHi, 0x31);
    175 
    176     pvDet[0] = vResultLo;
    177     pvDet[1] = vResultHi;
    178 }
    179 
    180 INLINE
    181 void triangleSetupC(const __m128 vX, const __m128 vY, const __m128 vA, const __m128 &vB, __m128 &vC)
    182 {
    183     // C = -Ax - By
    184     vC  = _mm_mul_ps(vA, vX);
    185     __m128 vCy = _mm_mul_ps(vB, vY);
    186     vC  = _mm_mul_ps(vC, _mm_set1_ps(-1.0f));
    187     vC  = _mm_sub_ps(vC, vCy);
    188 }
    189 
    190 INLINE
    191 void viewportTransform(__m128 &vX, __m128 &vY, __m128 &vZ, const SWR_VIEWPORT_MATRIX &vpMatrix)
    192 {
    193     vX = _mm_mul_ps(vX, _mm_set1_ps(vpMatrix.m00));
    194     vX = _mm_add_ps(vX, _mm_set1_ps(vpMatrix.m30));
    195 
    196     vY = _mm_mul_ps(vY, _mm_set1_ps(vpMatrix.m11));
    197     vY = _mm_add_ps(vY, _mm_set1_ps(vpMatrix.m31));
    198 
    199     vZ = _mm_mul_ps(vZ, _mm_set1_ps(vpMatrix.m22));
    200     vZ = _mm_add_ps(vZ, _mm_set1_ps(vpMatrix.m32));
    201 }
    202 
    203 template<uint32_t NumVerts>
    204 INLINE
    205 void viewportTransform(simdvector *v, const SWR_VIEWPORT_MATRICES & vpMatrices)
    206 {
    207     simdscalar m00 = _simd_load1_ps(&vpMatrices.m00[0]);
    208     simdscalar m30 = _simd_load1_ps(&vpMatrices.m30[0]);
    209     simdscalar m11 = _simd_load1_ps(&vpMatrices.m11[0]);
    210     simdscalar m31 = _simd_load1_ps(&vpMatrices.m31[0]);
    211     simdscalar m22 = _simd_load1_ps(&vpMatrices.m22[0]);
    212     simdscalar m32 = _simd_load1_ps(&vpMatrices.m32[0]);
    213 
    214     for (uint32_t i = 0; i < NumVerts; ++i)
    215     {
    216         v[i].x = _simd_fmadd_ps(v[i].x, m00, m30);
    217         v[i].y = _simd_fmadd_ps(v[i].y, m11, m31);
    218         v[i].z = _simd_fmadd_ps(v[i].z, m22, m32);
    219     }
    220 }
    221 
    222 template<uint32_t NumVerts>
    223 INLINE
    224 void viewportTransform(simdvector *v, const SWR_VIEWPORT_MATRICES & vpMatrices, simdscalari vViewportIdx)
    225 {
    226     // perform a gather of each matrix element based on the viewport array indexes
    227     simdscalar m00 = _simd_i32gather_ps(&vpMatrices.m00[0], vViewportIdx, 4);
    228     simdscalar m30 = _simd_i32gather_ps(&vpMatrices.m30[0], vViewportIdx, 4);
    229     simdscalar m11 = _simd_i32gather_ps(&vpMatrices.m11[0], vViewportIdx, 4);
    230     simdscalar m31 = _simd_i32gather_ps(&vpMatrices.m31[0], vViewportIdx, 4);
    231     simdscalar m22 = _simd_i32gather_ps(&vpMatrices.m22[0], vViewportIdx, 4);
    232     simdscalar m32 = _simd_i32gather_ps(&vpMatrices.m32[0], vViewportIdx, 4);
    233 
    234     for (uint32_t i = 0; i < NumVerts; ++i)
    235     {
    236         v[i].x = _simd_fmadd_ps(v[i].x, m00, m30);
    237         v[i].y = _simd_fmadd_ps(v[i].y, m11, m31);
    238         v[i].z = _simd_fmadd_ps(v[i].z, m22, m32);
    239     }
    240 }
    241 
    242 INLINE
    243 void calcBoundingBoxInt(const __m128i &vX, const __m128i &vY, SWR_RECT &bbox)
    244 {
    245     // Need horizontal fp min here
    246     __m128i vX1 = _mm_shuffle_epi32(vX, _MM_SHUFFLE(3, 2, 0, 1));
    247     __m128i vX2 = _mm_shuffle_epi32(vX, _MM_SHUFFLE(3, 0, 1, 2));
    248 
    249     __m128i vY1 = _mm_shuffle_epi32(vY, _MM_SHUFFLE(3, 2, 0, 1));
    250     __m128i vY2 = _mm_shuffle_epi32(vY, _MM_SHUFFLE(3, 0, 1, 2));
    251 
    252 
    253     __m128i vMinX = _mm_min_epi32(vX, vX1);
    254             vMinX = _mm_min_epi32(vMinX, vX2);
    255 
    256     __m128i vMaxX = _mm_max_epi32(vX, vX1);
    257             vMaxX = _mm_max_epi32(vMaxX, vX2);
    258 
    259     __m128i vMinY = _mm_min_epi32(vY, vY1);
    260             vMinY = _mm_min_epi32(vMinY, vY2);
    261 
    262     __m128i vMaxY = _mm_max_epi32(vY, vY1);
    263             vMaxY = _mm_max_epi32(vMaxY, vY2);
    264 
    265     bbox.xmin = _mm_extract_epi32(vMinX, 0);
    266     bbox.xmax = _mm_extract_epi32(vMaxX, 0);
    267     bbox.ymin = _mm_extract_epi32(vMinY, 0);
    268     bbox.ymax = _mm_extract_epi32(vMaxY, 0);
    269 }
    270 
    271 INLINE
    272 bool CanUseSimplePoints(DRAW_CONTEXT *pDC)
    273 {
    274     const API_STATE& state = GetApiState(pDC);
    275 
    276     return (state.rastState.sampleCount == SWR_MULTISAMPLE_1X &&
    277             state.rastState.pointSize == 1.0f &&
    278             !state.rastState.pointParam &&
    279             !state.rastState.pointSpriteEnable);
    280 }
    281 
    282 INLINE
    283 bool vHasNaN(const __m128& vec)
    284 {
    285     const __m128 result = _mm_cmpunord_ps(vec, vec);
    286     const int32_t mask = _mm_movemask_ps(result);
    287     return (mask != 0);
    288 }
    289 
    290 uint32_t GetNumPrims(PRIMITIVE_TOPOLOGY mode, uint32_t numElements);
    291 uint32_t NumVertsPerPrim(PRIMITIVE_TOPOLOGY topology, bool includeAdjVerts);
    292 
    293 
    294 // ProcessDraw front-end function.  All combinations of parameter values are available
    295 PFN_FE_WORK_FUNC GetProcessDrawFunc(
    296     bool IsIndexed,
    297     bool IsCutIndexEnabled,
    298     bool HasTessellation,
    299     bool HasGeometryShader,
    300     bool HasStreamOut,
    301     bool HasRasterization);
    302 
    303 void ProcessClear(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
    304 void ProcessStoreTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
    305 void ProcessDiscardInvalidateTiles(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
    306 void ProcessSync(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
    307 void ProcessShutdown(SWR_CONTEXT *pContext, DRAW_CONTEXT *pDC, uint32_t workerId, void *pUserData);
    308 
    309 PFN_PROCESS_PRIMS GetBinTrianglesFunc(bool IsConservative);
    310 
    311 struct PA_STATE_BASE;  // forward decl
    312 void BinPoints(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID, simdscalari viewportIdx);
    313 void BinLines(DRAW_CONTEXT *pDC, PA_STATE& pa, uint32_t workerId, simdvector prims[3], uint32_t primMask, simdscalari primID, simdscalari viewportIdx);
    314 
    315