1 /**************************************************************************** 2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * @file depthstencil.h 24 * 25 * @brief Implements depth/stencil functionality 26 * 27 ******************************************************************************/ 28 #pragma once 29 #include "common/os.h" 30 #include "format_conversion.h" 31 32 INLINE 33 void StencilOp(SWR_STENCILOP op, simdscalar mask, simdscalar stencilRefps, simdscalar &stencilps) 34 { 35 simdscalari stencil = _simd_castps_si(stencilps); 36 37 switch (op) 38 { 39 case STENCILOP_KEEP: 40 break; 41 case STENCILOP_ZERO: 42 stencilps = _simd_blendv_ps(stencilps, _simd_setzero_ps(), mask); 43 break; 44 case STENCILOP_REPLACE: 45 stencilps = _simd_blendv_ps(stencilps, stencilRefps, mask); 46 break; 47 case STENCILOP_INCRSAT: 48 { 49 simdscalari stencilincr = _simd_adds_epu8(stencil, _simd_set1_epi32(1)); 50 stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask); 51 break; 52 } 53 case STENCILOP_DECRSAT: 54 { 55 simdscalari stencildecr = _simd_subs_epu8(stencil, _simd_set1_epi32(1)); 56 stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask); 57 break; 58 } 59 case STENCILOP_INCR: 60 { 61 simdscalari stencilincr = _simd_add_epi8(stencil, _simd_set1_epi32(1)); 62 stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask); 63 break; 64 } 65 case STENCILOP_DECR: 66 { 67 simdscalari stencildecr = _simd_add_epi8(stencil, _simd_set1_epi32((-1) & 0xff)); 68 stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask); 69 break; 70 } 71 case STENCILOP_INVERT: 72 { 73 simdscalar stencilinvert = _simd_andnot_ps(stencilps, _simd_cmpeq_ps(_simd_setzero_ps(), _simd_setzero_ps())); 74 stencilps = _simd_blendv_ps(stencilps, stencilinvert, mask); 75 break; 76 } 77 default: 78 break; 79 } 80 } 81 82 83 template<SWR_FORMAT depthFormatT> 84 simdscalar QuantizeDepth(simdscalar depth) 85 { 86 SWR_TYPE depthType = FormatTraits<depthFormatT>::GetType(0); 87 uint32_t depthBpc = FormatTraits<depthFormatT>::GetBPC(0); 88 89 if (depthType == SWR_TYPE_FLOAT) 90 { 91 // assume only 32bit float depth supported 92 SWR_ASSERT(depthBpc == 32); 93 94 // matches shader precision, no quantizing needed 95 return depth; 96 } 97 98 // should be unorm depth if not float 99 SWR_ASSERT(depthType == SWR_TYPE_UNORM); 100 101 float quantize = (float)((1 << depthBpc) - 1); 102 simdscalar result = _simd_mul_ps(depth, _simd_set1_ps(quantize)); 103 result = _simd_add_ps(result, _simd_set1_ps(0.5f)); 104 result = _simd_round_ps(result, _MM_FROUND_TO_ZERO); 105 106 if (depthBpc > 16) 107 { 108 result = _simd_div_ps(result, _simd_set1_ps(quantize)); 109 } 110 else 111 { 112 result = _simd_mul_ps(result, _simd_set1_ps(1.0f / quantize)); 113 } 114 115 return result; 116 } 117 118 INLINE 119 simdscalar DepthStencilTest(const API_STATE* pState, 120 bool frontFacing, uint32_t viewportIndex, simdscalar interpZ, uint8_t* pDepthBase, simdscalar coverageMask, 121 uint8_t *pStencilBase, simdscalar* pStencilMask) 122 { 123 static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format"); 124 static_assert(KNOB_STENCIL_HOT_TILE_FORMAT == R8_UINT, "Unsupported stencil hot tile format"); 125 126 const SWR_DEPTH_STENCIL_STATE* pDSState = &pState->depthStencilState; 127 const SWR_VIEWPORT* pViewport = &pState->vp[viewportIndex]; 128 129 simdscalar depthResult = _simd_set1_ps(-1.0f); 130 simdscalar zbuf; 131 132 // clamp Z to viewport [minZ..maxZ] 133 simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ); 134 simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ); 135 interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, interpZ)); 136 137 if (pDSState->depthTestEnable) 138 { 139 switch (pDSState->depthTestFunc) 140 { 141 case ZFUNC_NEVER: depthResult = _simd_setzero_ps(); break; 142 case ZFUNC_ALWAYS: break; 143 default: 144 zbuf = _simd_load_ps((const float*)pDepthBase); 145 } 146 147 switch (pDSState->depthTestFunc) 148 { 149 case ZFUNC_LE: depthResult = _simd_cmple_ps(interpZ, zbuf); break; 150 case ZFUNC_LT: depthResult = _simd_cmplt_ps(interpZ, zbuf); break; 151 case ZFUNC_GT: depthResult = _simd_cmpgt_ps(interpZ, zbuf); break; 152 case ZFUNC_GE: depthResult = _simd_cmpge_ps(interpZ, zbuf); break; 153 case ZFUNC_EQ: depthResult = _simd_cmpeq_ps(interpZ, zbuf); break; 154 case ZFUNC_NE: depthResult = _simd_cmpneq_ps(interpZ, zbuf); break; 155 } 156 } 157 158 simdscalar stencilMask = _simd_set1_ps(-1.0f); 159 160 if (pDSState->stencilTestEnable) 161 { 162 uint8_t stencilRefValue; 163 uint32_t stencilTestFunc; 164 uint8_t stencilTestMask; 165 if (frontFacing || !pDSState->doubleSidedStencilTestEnable) 166 { 167 stencilRefValue = pDSState->stencilRefValue; 168 stencilTestFunc = pDSState->stencilTestFunc; 169 stencilTestMask = pDSState->stencilTestMask; 170 } 171 else 172 { 173 stencilRefValue = pDSState->backfaceStencilRefValue; 174 stencilTestFunc = pDSState->backfaceStencilTestFunc; 175 stencilTestMask = pDSState->backfaceStencilTestMask; 176 } 177 178 simdvector sbuf; 179 simdscalar stencilWithMask; 180 simdscalar stencilRef; 181 switch(stencilTestFunc) 182 { 183 case ZFUNC_NEVER: stencilMask = _simd_setzero_ps(); break; 184 case ZFUNC_ALWAYS: break; 185 default: 186 LoadSOA<R8_UINT>(pStencilBase, sbuf); 187 188 // apply stencil read mask 189 stencilWithMask = _simd_castsi_ps(_simd_and_si(_simd_castps_si(sbuf.v[0]), _simd_set1_epi32(stencilTestMask))); 190 191 // do stencil compare in float to avoid simd integer emulation in AVX1 192 stencilWithMask = _simd_cvtepi32_ps(_simd_castps_si(stencilWithMask)); 193 194 stencilRef = _simd_set1_ps((float)(stencilRefValue & stencilTestMask)); 195 break; 196 } 197 198 switch(stencilTestFunc) 199 { 200 case ZFUNC_LE: stencilMask = _simd_cmple_ps(stencilRef, stencilWithMask); break; 201 case ZFUNC_LT: stencilMask = _simd_cmplt_ps(stencilRef, stencilWithMask); break; 202 case ZFUNC_GT: stencilMask = _simd_cmpgt_ps(stencilRef, stencilWithMask); break; 203 case ZFUNC_GE: stencilMask = _simd_cmpge_ps(stencilRef, stencilWithMask); break; 204 case ZFUNC_EQ: stencilMask = _simd_cmpeq_ps(stencilRef, stencilWithMask); break; 205 case ZFUNC_NE: stencilMask = _simd_cmpneq_ps(stencilRef, stencilWithMask); break; 206 } 207 } 208 209 simdscalar depthWriteMask = _simd_and_ps(depthResult, stencilMask); 210 depthWriteMask = _simd_and_ps(depthWriteMask, coverageMask); 211 212 *pStencilMask = stencilMask; 213 return depthWriteMask; 214 } 215 216 INLINE 217 void DepthStencilWrite(const SWR_VIEWPORT* pViewport, const SWR_DEPTH_STENCIL_STATE* pDSState, 218 bool frontFacing, simdscalar interpZ, uint8_t* pDepthBase, const simdscalar& depthMask, const simdscalar& coverageMask, 219 uint8_t *pStencilBase, const simdscalar& stencilMask) 220 { 221 if (pDSState->depthWriteEnable) 222 { 223 // clamp Z to viewport [minZ..maxZ] 224 simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ); 225 simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ); 226 interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, interpZ)); 227 228 simdscalar vMask = _simd_and_ps(depthMask, coverageMask); 229 _simd_maskstore_ps((float*)pDepthBase, _simd_castps_si(vMask), interpZ); 230 } 231 232 if (pDSState->stencilWriteEnable) 233 { 234 simdvector sbuf; 235 LoadSOA<R8_UINT>(pStencilBase, sbuf); 236 simdscalar stencilbuf = sbuf.v[0]; 237 238 uint8_t stencilRefValue; 239 uint32_t stencilFailOp; 240 uint32_t stencilPassDepthPassOp; 241 uint32_t stencilPassDepthFailOp; 242 uint8_t stencilWriteMask; 243 if (frontFacing || !pDSState->doubleSidedStencilTestEnable) 244 { 245 stencilRefValue = pDSState->stencilRefValue; 246 stencilFailOp = pDSState->stencilFailOp; 247 stencilPassDepthPassOp = pDSState->stencilPassDepthPassOp; 248 stencilPassDepthFailOp = pDSState->stencilPassDepthFailOp; 249 stencilWriteMask = pDSState->stencilWriteMask; 250 } 251 else 252 { 253 stencilRefValue = pDSState->backfaceStencilRefValue; 254 stencilFailOp = pDSState->backfaceStencilFailOp; 255 stencilPassDepthPassOp = pDSState->backfaceStencilPassDepthPassOp; 256 stencilPassDepthFailOp = pDSState->backfaceStencilPassDepthFailOp; 257 stencilWriteMask = pDSState->backfaceStencilWriteMask; 258 } 259 260 simdscalar stencilps = stencilbuf; 261 simdscalar stencilRefps = _simd_castsi_ps(_simd_set1_epi32(stencilRefValue)); 262 263 simdscalar stencilFailMask = _simd_andnot_ps(stencilMask, coverageMask); 264 simdscalar stencilPassDepthPassMask = _simd_and_ps(stencilMask, depthMask); 265 simdscalar stencilPassDepthFailMask = _simd_and_ps(stencilMask, _simd_andnot_ps(depthMask, _simd_set1_ps(-1))); 266 267 simdscalar origStencil = stencilps; 268 269 StencilOp((SWR_STENCILOP)stencilFailOp, stencilFailMask, stencilRefps, stencilps); 270 StencilOp((SWR_STENCILOP)stencilPassDepthFailOp, stencilPassDepthFailMask, stencilRefps, stencilps); 271 StencilOp((SWR_STENCILOP)stencilPassDepthPassOp, stencilPassDepthPassMask, stencilRefps, stencilps); 272 273 // apply stencil write mask 274 simdscalari vWriteMask = _simd_set1_epi32(stencilWriteMask); 275 stencilps = _simd_and_ps(stencilps, _simd_castsi_ps(vWriteMask)); 276 stencilps = _simd_or_ps(_simd_andnot_ps(_simd_castsi_ps(vWriteMask), origStencil), stencilps); 277 278 simdvector stencilResult; 279 stencilResult.v[0] = _simd_blendv_ps(origStencil, stencilps, coverageMask); 280 StoreSOA<R8_UINT>(stencilResult, pStencilBase); 281 } 282 283 } 284