Home | History | Annotate | Download | only in core
      1 /****************************************************************************
      2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice (including the next
     12 * paragraph) shall be included in all copies or substantial portions of the
     13 * Software.
     14 *
     15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21 * IN THE SOFTWARE.
     22 *
     23 * @file depthstencil.h
     24 *
     25 * @brief Implements depth/stencil functionality
     26 *
     27 ******************************************************************************/
     28 #pragma once
     29 #include "common/os.h"
     30 #include "format_conversion.h"
     31 
     32 INLINE
     33 void StencilOp(SWR_STENCILOP op, simdscalar const &mask, simdscalar const &stencilRefps, simdscalar &stencilps)
     34 {
     35     simdscalari stencil = _simd_castps_si(stencilps);
     36 
     37     switch (op)
     38     {
     39     case STENCILOP_KEEP:
     40         break;
     41     case STENCILOP_ZERO:
     42         stencilps = _simd_blendv_ps(stencilps, _simd_setzero_ps(), mask);
     43         break;
     44     case STENCILOP_REPLACE:
     45         stencilps = _simd_blendv_ps(stencilps, stencilRefps, mask);
     46         break;
     47     case STENCILOP_INCRSAT:
     48     {
     49         simdscalari stencilincr = _simd_adds_epu8(stencil, _simd_set1_epi32(1));
     50         stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask);
     51         break;
     52     }
     53     case STENCILOP_DECRSAT:
     54     {
     55         simdscalari stencildecr = _simd_subs_epu8(stencil, _simd_set1_epi32(1));
     56         stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask);
     57         break;
     58     }
     59     case STENCILOP_INCR:
     60     {
     61         simdscalari stencilincr = _simd_add_epi8(stencil, _simd_set1_epi32(1));
     62         stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask);
     63         break;
     64     }
     65     case STENCILOP_DECR:
     66     {
     67         simdscalari stencildecr = _simd_add_epi8(stencil, _simd_set1_epi32((-1) & 0xff));
     68         stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask);
     69         break;
     70     }
     71     case STENCILOP_INVERT:
     72     {
     73         simdscalar stencilinvert = _simd_andnot_ps(stencilps, _simd_cmpeq_ps(_simd_setzero_ps(), _simd_setzero_ps()));
     74         stencilps = _simd_blendv_ps(stencilps, stencilinvert, mask);
     75         break;
     76     }
     77     default:
     78         break;
     79     }
     80 }
     81 
     82 
     83 template<SWR_FORMAT depthFormatT>
     84 simdscalar QuantizeDepth(simdscalar const &depth)
     85 {
     86     SWR_TYPE depthType = FormatTraits<depthFormatT>::GetType(0);
     87     uint32_t depthBpc = FormatTraits<depthFormatT>::GetBPC(0);
     88 
     89     if (depthType == SWR_TYPE_FLOAT)
     90     {
     91         // assume only 32bit float depth supported
     92         SWR_ASSERT(depthBpc == 32);
     93 
     94         // matches shader precision, no quantizing needed
     95         return depth;
     96     }
     97 
     98     // should be unorm depth if not float
     99     SWR_ASSERT(depthType == SWR_TYPE_UNORM);
    100 
    101     float quantize = (float)((1 << depthBpc) - 1);
    102     simdscalar result = _simd_mul_ps(depth, _simd_set1_ps(quantize));
    103     result = _simd_add_ps(result, _simd_set1_ps(0.5f));
    104     result = _simd_round_ps(result, _MM_FROUND_TO_ZERO);
    105 
    106     if (depthBpc > 16)
    107     {
    108         result = _simd_div_ps(result, _simd_set1_ps(quantize));
    109     }
    110     else
    111     {
    112         result = _simd_mul_ps(result, _simd_set1_ps(1.0f / quantize));
    113     }
    114 
    115     return result;
    116 }
    117 
    118 INLINE
    119 simdscalar DepthStencilTest(const API_STATE* pState,
    120                  bool frontFacing, uint32_t viewportIndex, simdscalar const &iZ, uint8_t* pDepthBase, simdscalar const &coverageMask,
    121                  uint8_t *pStencilBase, simdscalar* pStencilMask)
    122 {
    123     static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
    124     static_assert(KNOB_STENCIL_HOT_TILE_FORMAT == R8_UINT, "Unsupported stencil hot tile format");
    125 
    126     const SWR_DEPTH_STENCIL_STATE* pDSState = &pState->depthStencilState;
    127     const SWR_VIEWPORT* pViewport = &pState->vp[viewportIndex];
    128 
    129     simdscalar depthResult = _simd_set1_ps(-1.0f);
    130     simdscalar zbuf;
    131 
    132     // clamp Z to viewport [minZ..maxZ]
    133     simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ);
    134     simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ);
    135     simdscalar interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, iZ));
    136 
    137     if (pDSState->depthTestEnable)
    138     {
    139         switch (pDSState->depthTestFunc)
    140         {
    141         case ZFUNC_NEVER: depthResult = _simd_setzero_ps(); break;
    142         case ZFUNC_ALWAYS: break;
    143         default:
    144             zbuf = _simd_load_ps((const float*)pDepthBase);
    145         }
    146 
    147         switch (pDSState->depthTestFunc)
    148         {
    149         case ZFUNC_LE: depthResult = _simd_cmple_ps(interpZ, zbuf); break;
    150         case ZFUNC_LT: depthResult = _simd_cmplt_ps(interpZ, zbuf); break;
    151         case ZFUNC_GT: depthResult = _simd_cmpgt_ps(interpZ, zbuf); break;
    152         case ZFUNC_GE: depthResult = _simd_cmpge_ps(interpZ, zbuf); break;
    153         case ZFUNC_EQ: depthResult = _simd_cmpeq_ps(interpZ, zbuf); break;
    154         case ZFUNC_NE: depthResult = _simd_cmpneq_ps(interpZ, zbuf); break;
    155         }
    156     }
    157 
    158     simdscalar stencilMask = _simd_set1_ps(-1.0f);
    159 
    160     if (pDSState->stencilTestEnable)
    161     {
    162         uint8_t stencilRefValue;
    163         uint32_t stencilTestFunc;
    164         uint8_t stencilTestMask;
    165         if (frontFacing || !pDSState->doubleSidedStencilTestEnable)
    166         {
    167             stencilRefValue = pDSState->stencilRefValue;
    168             stencilTestFunc = pDSState->stencilTestFunc;
    169             stencilTestMask = pDSState->stencilTestMask;
    170         }
    171         else
    172         {
    173             stencilRefValue = pDSState->backfaceStencilRefValue;
    174             stencilTestFunc = pDSState->backfaceStencilTestFunc;
    175             stencilTestMask = pDSState->backfaceStencilTestMask;
    176         }
    177 
    178         simdvector sbuf;
    179         simdscalar stencilWithMask;
    180         simdscalar stencilRef;
    181         switch(stencilTestFunc)
    182         {
    183         case ZFUNC_NEVER: stencilMask = _simd_setzero_ps(); break;
    184         case ZFUNC_ALWAYS: break;
    185         default:
    186             LoadSOA<R8_UINT>(pStencilBase, sbuf);
    187 
    188             // apply stencil read mask
    189             stencilWithMask = _simd_castsi_ps(_simd_and_si(_simd_castps_si(sbuf.v[0]), _simd_set1_epi32(stencilTestMask)));
    190 
    191             // do stencil compare in float to avoid simd integer emulation in AVX1
    192             stencilWithMask = _simd_cvtepi32_ps(_simd_castps_si(stencilWithMask));
    193 
    194             stencilRef = _simd_set1_ps((float)(stencilRefValue & stencilTestMask));
    195             break;
    196         }
    197 
    198         switch(stencilTestFunc)
    199         {
    200         case ZFUNC_LE: stencilMask = _simd_cmple_ps(stencilRef, stencilWithMask); break;
    201         case ZFUNC_LT: stencilMask = _simd_cmplt_ps(stencilRef, stencilWithMask); break;
    202         case ZFUNC_GT: stencilMask = _simd_cmpgt_ps(stencilRef, stencilWithMask); break;
    203         case ZFUNC_GE: stencilMask = _simd_cmpge_ps(stencilRef, stencilWithMask); break;
    204         case ZFUNC_EQ: stencilMask = _simd_cmpeq_ps(stencilRef, stencilWithMask); break;
    205         case ZFUNC_NE: stencilMask = _simd_cmpneq_ps(stencilRef, stencilWithMask); break;
    206         }
    207     }
    208 
    209     simdscalar depthWriteMask = _simd_and_ps(depthResult, stencilMask);
    210     depthWriteMask = _simd_and_ps(depthWriteMask, coverageMask);
    211 
    212     *pStencilMask = stencilMask;
    213     return depthWriteMask;
    214 }
    215 
    216 INLINE
    217 void DepthStencilWrite(const SWR_VIEWPORT* pViewport, const SWR_DEPTH_STENCIL_STATE* pDSState,
    218         bool frontFacing, simdscalar const &iZ, uint8_t* pDepthBase, const simdscalar& depthMask, const simdscalar& coverageMask,
    219         uint8_t *pStencilBase, const simdscalar& stencilMask)
    220 {
    221     if (pDSState->depthWriteEnable)
    222     {
    223         // clamp Z to viewport [minZ..maxZ]
    224         simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ);
    225         simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ);
    226         simdscalar interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, iZ));
    227 
    228         simdscalar vMask = _simd_and_ps(depthMask, coverageMask);
    229         _simd_maskstore_ps((float*)pDepthBase, _simd_castps_si(vMask), interpZ);
    230     }
    231 
    232     if (pDSState->stencilWriteEnable)
    233     {
    234         simdvector sbuf;
    235         LoadSOA<R8_UINT>(pStencilBase, sbuf);
    236         simdscalar stencilbuf = sbuf.v[0];
    237 
    238         uint8_t stencilRefValue;
    239         uint32_t stencilFailOp;
    240         uint32_t stencilPassDepthPassOp;
    241         uint32_t stencilPassDepthFailOp;
    242         uint8_t stencilWriteMask;
    243         if (frontFacing || !pDSState->doubleSidedStencilTestEnable)
    244         {
    245             stencilRefValue = pDSState->stencilRefValue;
    246             stencilFailOp = pDSState->stencilFailOp;
    247             stencilPassDepthPassOp = pDSState->stencilPassDepthPassOp;
    248             stencilPassDepthFailOp = pDSState->stencilPassDepthFailOp;
    249             stencilWriteMask = pDSState->stencilWriteMask;
    250         }
    251         else
    252         {
    253             stencilRefValue = pDSState->backfaceStencilRefValue;
    254             stencilFailOp = pDSState->backfaceStencilFailOp;
    255             stencilPassDepthPassOp = pDSState->backfaceStencilPassDepthPassOp;
    256             stencilPassDepthFailOp = pDSState->backfaceStencilPassDepthFailOp;
    257             stencilWriteMask = pDSState->backfaceStencilWriteMask;
    258         }
    259 
    260         simdscalar stencilps = stencilbuf;
    261         simdscalar stencilRefps = _simd_castsi_ps(_simd_set1_epi32(stencilRefValue));
    262 
    263         simdscalar stencilFailMask = _simd_andnot_ps(stencilMask, coverageMask);
    264         simdscalar stencilPassDepthPassMask = _simd_and_ps(stencilMask, depthMask);
    265         simdscalar stencilPassDepthFailMask = _simd_and_ps(stencilMask, _simd_andnot_ps(depthMask, _simd_set1_ps(-1)));
    266 
    267         simdscalar origStencil = stencilps;
    268 
    269         StencilOp((SWR_STENCILOP)stencilFailOp, stencilFailMask, stencilRefps, stencilps);
    270         StencilOp((SWR_STENCILOP)stencilPassDepthFailOp, stencilPassDepthFailMask, stencilRefps, stencilps);
    271         StencilOp((SWR_STENCILOP)stencilPassDepthPassOp, stencilPassDepthPassMask, stencilRefps, stencilps);
    272 
    273         // apply stencil write mask
    274         simdscalari vWriteMask = _simd_set1_epi32(stencilWriteMask);
    275         stencilps = _simd_and_ps(stencilps, _simd_castsi_ps(vWriteMask));
    276         stencilps = _simd_or_ps(_simd_andnot_ps(_simd_castsi_ps(vWriteMask), origStencil), stencilps);
    277 
    278         simdvector stencilResult;
    279         stencilResult.v[0] = _simd_blendv_ps(origStencil, stencilps, coverageMask);
    280         StoreSOA<R8_UINT>(stencilResult, pStencilBase);
    281     }
    282 
    283 }
    284