1 /**************************************************************************** 2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * @file blend.cpp 24 * 25 * @brief Implementation for blending operations. 26 * 27 ******************************************************************************/ 28 #include "state.h" 29 30 template<bool Color, bool Alpha> 31 INLINE 32 void GenerateBlendFactor(SWR_BLEND_FACTOR func, simdvector &constantColor, simdvector &src, simdvector &src1, simdvector &dst, simdvector &out) 33 { 34 simdvector result; 35 36 switch (func) 37 { 38 case BLENDFACTOR_ZERO: 39 result.x = _simd_setzero_ps(); 40 result.y = _simd_setzero_ps(); 41 result.z = _simd_setzero_ps(); 42 result.w = _simd_setzero_ps(); 43 break; 44 45 case BLENDFACTOR_ONE: 46 result.x = _simd_set1_ps(1.0); 47 result.y = _simd_set1_ps(1.0); 48 result.z = _simd_set1_ps(1.0); 49 result.w = _simd_set1_ps(1.0); 50 break; 51 52 case BLENDFACTOR_SRC_COLOR: 53 result = src; 54 break; 55 56 case BLENDFACTOR_DST_COLOR: 57 result = dst; 58 break; 59 60 case BLENDFACTOR_INV_SRC_COLOR: 61 result.x = _simd_sub_ps(_simd_set1_ps(1.0), src.x); 62 result.y = _simd_sub_ps(_simd_set1_ps(1.0), src.y); 63 result.z = _simd_sub_ps(_simd_set1_ps(1.0), src.z); 64 result.w = _simd_sub_ps(_simd_set1_ps(1.0), src.w); 65 break; 66 67 case BLENDFACTOR_INV_DST_COLOR: 68 result.x = _simd_sub_ps(_simd_set1_ps(1.0), dst.x); 69 result.y = _simd_sub_ps(_simd_set1_ps(1.0), dst.y); 70 result.z = _simd_sub_ps(_simd_set1_ps(1.0), dst.z); 71 result.w = _simd_sub_ps(_simd_set1_ps(1.0), dst.w); 72 break; 73 74 case BLENDFACTOR_SRC_ALPHA: result.x = src.w; 75 result.y = src.w; 76 result.z = src.w; 77 result.w = src.w; 78 break; 79 80 case BLENDFACTOR_INV_SRC_ALPHA: 81 { 82 simdscalar oneMinusSrcA = _simd_sub_ps(_simd_set1_ps(1.0), src.w); 83 result.x = oneMinusSrcA; 84 result.y = oneMinusSrcA; 85 result.z = oneMinusSrcA; 86 result.w = oneMinusSrcA; 87 break; 88 } 89 90 case BLENDFACTOR_DST_ALPHA: result.x = dst.w; 91 result.y = dst.w; 92 result.z = dst.w; 93 result.w = dst.w; 94 break; 95 96 case BLENDFACTOR_INV_DST_ALPHA: 97 { 98 simdscalar oneMinusDstA = _simd_sub_ps(_simd_set1_ps(1.0), dst.w); 99 result.x = oneMinusDstA; 100 result.y = oneMinusDstA; 101 result.z = oneMinusDstA; 102 result.w = oneMinusDstA; 103 break; 104 } 105 106 case BLENDFACTOR_SRC_ALPHA_SATURATE: 107 { 108 simdscalar sat = _simd_min_ps(src.w, _simd_sub_ps(_simd_set1_ps(1.0), dst.w)); 109 result.x = sat; 110 result.y = sat; 111 result.z = sat; 112 result.w = _simd_set1_ps(1.0); 113 break; 114 } 115 116 case BLENDFACTOR_CONST_COLOR: 117 result.x = constantColor[0]; 118 result.y = constantColor[1]; 119 result.z = constantColor[2]; 120 result.w = constantColor[3]; 121 break; 122 123 case BLENDFACTOR_CONST_ALPHA: 124 result.x = result.y = result.z = result.w = constantColor[3]; 125 break; 126 127 case BLENDFACTOR_INV_CONST_COLOR: 128 { 129 result.x = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[0]); 130 result.y = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[1]); 131 result.z = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[2]); 132 result.w = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[3]); 133 break; 134 } 135 136 case BLENDFACTOR_INV_CONST_ALPHA: 137 { 138 result.x = result.y = result.z = result.w = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[3]); 139 break; 140 } 141 142 case BLENDFACTOR_SRC1_COLOR: 143 result.x = src1.x; 144 result.y = src1.y; 145 result.z = src1.z; 146 result.w = src1.w; 147 break; 148 149 case BLENDFACTOR_SRC1_ALPHA: 150 result.x = result.y = result.z = result.w = src1.w; 151 break; 152 153 case BLENDFACTOR_INV_SRC1_COLOR: 154 result.x = _simd_sub_ps(_simd_set1_ps(1.0f), src1.x); 155 result.y = _simd_sub_ps(_simd_set1_ps(1.0f), src1.y); 156 result.z = _simd_sub_ps(_simd_set1_ps(1.0f), src1.z); 157 result.w = _simd_sub_ps(_simd_set1_ps(1.0f), src1.w); 158 break; 159 160 case BLENDFACTOR_INV_SRC1_ALPHA: 161 result.x = result.y = result.z = result.w = _simd_sub_ps(_simd_set1_ps(1.0f), src1.w); 162 break; 163 164 default: SWR_INVALID("Unimplemented blend factor: %d", func); 165 } 166 167 if (Color) 168 { 169 out.x = result.x; 170 out.y = result.y; 171 out.z = result.z; 172 } 173 if (Alpha) 174 { 175 out.w = result.w; 176 } 177 178 } 179 180 template<bool Color, bool Alpha> 181 INLINE void BlendFunc(SWR_BLEND_OP blendOp, simdvector &src, simdvector &srcFactor, simdvector &dst, simdvector &dstFactor, simdvector &out) 182 { 183 simdvector result; 184 185 switch (blendOp) 186 { 187 case BLENDOP_ADD: 188 result.x = _simd_fmadd_ps(srcFactor.x, src.x, _simd_mul_ps(dstFactor.x, dst.x)); 189 result.y = _simd_fmadd_ps(srcFactor.y, src.y, _simd_mul_ps(dstFactor.y, dst.y)); 190 result.z = _simd_fmadd_ps(srcFactor.z, src.z, _simd_mul_ps(dstFactor.z, dst.z)); 191 result.w = _simd_fmadd_ps(srcFactor.w, src.w, _simd_mul_ps(dstFactor.w, dst.w)); 192 break; 193 194 case BLENDOP_SUBTRACT: 195 result.x = _simd_fmsub_ps(srcFactor.x, src.x, _simd_mul_ps(dstFactor.x, dst.x)); 196 result.y = _simd_fmsub_ps(srcFactor.y, src.y, _simd_mul_ps(dstFactor.y, dst.y)); 197 result.z = _simd_fmsub_ps(srcFactor.z, src.z, _simd_mul_ps(dstFactor.z, dst.z)); 198 result.w = _simd_fmsub_ps(srcFactor.w, src.w, _simd_mul_ps(dstFactor.w, dst.w)); 199 break; 200 201 case BLENDOP_REVSUBTRACT: 202 result.x = _simd_fmsub_ps(dstFactor.x, dst.x, _simd_mul_ps(srcFactor.x, src.x)); 203 result.y = _simd_fmsub_ps(dstFactor.y, dst.y, _simd_mul_ps(srcFactor.y, src.y)); 204 result.z = _simd_fmsub_ps(dstFactor.z, dst.z, _simd_mul_ps(srcFactor.z, src.z)); 205 result.w = _simd_fmsub_ps(dstFactor.w, dst.w, _simd_mul_ps(srcFactor.w, src.w)); 206 break; 207 208 case BLENDOP_MIN: 209 result.x = _simd_min_ps(_simd_mul_ps(srcFactor.x, src.x), _simd_mul_ps(dstFactor.x, dst.x)); 210 result.y = _simd_min_ps(_simd_mul_ps(srcFactor.y, src.y), _simd_mul_ps(dstFactor.y, dst.y)); 211 result.z = _simd_min_ps(_simd_mul_ps(srcFactor.z, src.z), _simd_mul_ps(dstFactor.z, dst.z)); 212 result.w = _simd_min_ps(_simd_mul_ps(srcFactor.w, src.w), _simd_mul_ps(dstFactor.w, dst.w)); 213 break; 214 215 case BLENDOP_MAX: 216 result.x = _simd_max_ps(_simd_mul_ps(srcFactor.x, src.x), _simd_mul_ps(dstFactor.x, dst.x)); 217 result.y = _simd_max_ps(_simd_mul_ps(srcFactor.y, src.y), _simd_mul_ps(dstFactor.y, dst.y)); 218 result.z = _simd_max_ps(_simd_mul_ps(srcFactor.z, src.z), _simd_mul_ps(dstFactor.z, dst.z)); 219 result.w = _simd_max_ps(_simd_mul_ps(srcFactor.w, src.w), _simd_mul_ps(dstFactor.w, dst.w)); 220 break; 221 222 default: 223 SWR_INVALID("Unimplemented blend function: %d", blendOp); 224 } 225 226 if (Color) 227 { 228 out.x = result.x; 229 out.y = result.y; 230 out.z = result.z; 231 } 232 if (Alpha) 233 { 234 out.w = result.w; 235 } 236 } 237 238 template<SWR_TYPE type> 239 INLINE void Clamp(simdvector &src) 240 { 241 switch (type) 242 { 243 case SWR_TYPE_FLOAT: 244 break; 245 246 case SWR_TYPE_UNORM: 247 src.x = _simd_max_ps(src.x, _simd_setzero_ps()); 248 src.x = _simd_min_ps(src.x, _simd_set1_ps(1.0f)); 249 250 src.y = _simd_max_ps(src.y, _simd_setzero_ps()); 251 src.y = _simd_min_ps(src.y, _simd_set1_ps(1.0f)); 252 253 src.z = _simd_max_ps(src.z, _simd_setzero_ps()); 254 src.z = _simd_min_ps(src.z, _simd_set1_ps(1.0f)); 255 256 src.w = _simd_max_ps(src.w, _simd_setzero_ps()); 257 src.w = _simd_min_ps(src.w, _simd_set1_ps(1.0f)); 258 break; 259 260 case SWR_TYPE_SNORM: 261 src.x = _simd_max_ps(src.x, _simd_set1_ps(-1.0f)); 262 src.x = _simd_min_ps(src.x, _simd_set1_ps(1.0f)); 263 264 src.y = _simd_max_ps(src.y, _simd_set1_ps(-1.0f)); 265 src.y = _simd_min_ps(src.y, _simd_set1_ps(1.0f)); 266 267 src.z = _simd_max_ps(src.z, _simd_set1_ps(-1.0f)); 268 src.z = _simd_min_ps(src.z, _simd_set1_ps(1.0f)); 269 270 src.w = _simd_max_ps(src.w, _simd_set1_ps(-1.0f)); 271 src.w = _simd_min_ps(src.w, _simd_set1_ps(1.0f)); 272 break; 273 274 default: 275 SWR_INVALID("Unimplemented clamp: %d", type); 276 break; 277 } 278 } 279 280 template<SWR_TYPE type> 281 void Blend(const SWR_BLEND_STATE *pBlendState, const SWR_RENDER_TARGET_BLEND_STATE *pState, simdvector &src, simdvector& src1, uint8_t *pDst, simdvector &result) 282 { 283 // load render target 284 simdvector dst; 285 LoadSOA<KNOB_COLOR_HOT_TILE_FORMAT>(pDst, dst); 286 287 simdvector constColor; 288 constColor.x = _simd_broadcast_ss(&pBlendState->constantColor[0]); 289 constColor.y = _simd_broadcast_ss(&pBlendState->constantColor[1]); 290 constColor.z = _simd_broadcast_ss(&pBlendState->constantColor[2]); 291 constColor.w = _simd_broadcast_ss(&pBlendState->constantColor[3]); 292 293 // clamp src/dst/constant 294 Clamp<type>(src); 295 Clamp<type>(src1); 296 Clamp<type>(dst); 297 Clamp<type>(constColor); 298 299 simdvector srcFactor, dstFactor; 300 if (pBlendState->independentAlphaBlendEnable) 301 { 302 GenerateBlendFactor<true, false>((SWR_BLEND_FACTOR)pState->sourceBlendFactor, constColor, src, src1, dst, srcFactor); 303 GenerateBlendFactor<false, true>((SWR_BLEND_FACTOR)pState->sourceAlphaBlendFactor, constColor, src, src1, dst, srcFactor); 304 305 GenerateBlendFactor<true, false>((SWR_BLEND_FACTOR)pState->destBlendFactor, constColor, src, src1, dst, dstFactor); 306 GenerateBlendFactor<false, true>((SWR_BLEND_FACTOR)pState->destAlphaBlendFactor, constColor, src, src1, dst, dstFactor); 307 308 BlendFunc<true, false>((SWR_BLEND_OP)pState->colorBlendFunc, src, srcFactor, dst, dstFactor, result); 309 BlendFunc<false, true>((SWR_BLEND_OP)pState->alphaBlendFunc, src, srcFactor, dst, dstFactor, result); 310 } 311 else 312 { 313 GenerateBlendFactor<true, true>((SWR_BLEND_FACTOR)pState->sourceBlendFactor, constColor, src, src1, dst, srcFactor); 314 GenerateBlendFactor<true, true>((SWR_BLEND_FACTOR)pState->destBlendFactor, constColor, src, src1, dst, dstFactor); 315 316 BlendFunc<true, true>((SWR_BLEND_OP)pState->colorBlendFunc, src, srcFactor, dst, dstFactor, result); 317 } 318 } 319