Home | History | Annotate | Download | only in core
      1 /****************************************************************************
      2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
      3 *
      4 * Permission is hereby granted, free of charge, to any person obtaining a
      5 * copy of this software and associated documentation files (the "Software"),
      6 * to deal in the Software without restriction, including without limitation
      7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8 * and/or sell copies of the Software, and to permit persons to whom the
      9 * Software is furnished to do so, subject to the following conditions:
     10 *
     11 * The above copyright notice and this permission notice (including the next
     12 * paragraph) shall be included in all copies or substantial portions of the
     13 * Software.
     14 *
     15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21 * IN THE SOFTWARE.
     22 *
     23 * @file blend.cpp
     24 *
     25 * @brief Implementation for blending operations.
     26 *
     27 ******************************************************************************/
     28 #include "state.h"
     29 
     30 template<bool Color, bool Alpha>
     31 INLINE
     32 void GenerateBlendFactor(SWR_BLEND_FACTOR func, simdvector &constantColor, simdvector &src, simdvector &src1, simdvector &dst, simdvector &out)
     33 {
     34     simdvector result;
     35 
     36     switch (func)
     37     {
     38     case BLENDFACTOR_ZERO:
     39         result.x = _simd_setzero_ps();
     40         result.y = _simd_setzero_ps();
     41         result.z = _simd_setzero_ps();
     42         result.w = _simd_setzero_ps();
     43         break;
     44 
     45     case BLENDFACTOR_ONE:
     46         result.x = _simd_set1_ps(1.0);
     47         result.y = _simd_set1_ps(1.0);
     48         result.z = _simd_set1_ps(1.0);
     49         result.w = _simd_set1_ps(1.0);
     50         break;
     51 
     52     case BLENDFACTOR_SRC_COLOR:
     53         result = src;
     54         break;
     55 
     56     case BLENDFACTOR_DST_COLOR:
     57         result = dst;
     58         break;
     59 
     60     case BLENDFACTOR_INV_SRC_COLOR:
     61         result.x = _simd_sub_ps(_simd_set1_ps(1.0), src.x);
     62         result.y = _simd_sub_ps(_simd_set1_ps(1.0), src.y);
     63         result.z = _simd_sub_ps(_simd_set1_ps(1.0), src.z);
     64         result.w = _simd_sub_ps(_simd_set1_ps(1.0), src.w);
     65         break;
     66 
     67     case BLENDFACTOR_INV_DST_COLOR:
     68         result.x = _simd_sub_ps(_simd_set1_ps(1.0), dst.x);
     69         result.y = _simd_sub_ps(_simd_set1_ps(1.0), dst.y);
     70         result.z = _simd_sub_ps(_simd_set1_ps(1.0), dst.z);
     71         result.w = _simd_sub_ps(_simd_set1_ps(1.0), dst.w);
     72         break;
     73 
     74     case BLENDFACTOR_SRC_ALPHA: result.x = src.w;
     75         result.y = src.w;
     76         result.z = src.w;
     77         result.w = src.w;
     78         break;
     79 
     80     case BLENDFACTOR_INV_SRC_ALPHA:
     81     {
     82         simdscalar oneMinusSrcA = _simd_sub_ps(_simd_set1_ps(1.0), src.w);
     83         result.x = oneMinusSrcA;
     84         result.y = oneMinusSrcA;
     85         result.z = oneMinusSrcA;
     86         result.w = oneMinusSrcA;
     87         break;
     88     }
     89 
     90     case BLENDFACTOR_DST_ALPHA: result.x = dst.w;
     91         result.y = dst.w;
     92         result.z = dst.w;
     93         result.w = dst.w;
     94         break;
     95 
     96     case BLENDFACTOR_INV_DST_ALPHA:
     97     {
     98         simdscalar oneMinusDstA = _simd_sub_ps(_simd_set1_ps(1.0), dst.w);
     99         result.x = oneMinusDstA;
    100         result.y = oneMinusDstA;
    101         result.z = oneMinusDstA;
    102         result.w = oneMinusDstA;
    103         break;
    104     }
    105 
    106     case BLENDFACTOR_SRC_ALPHA_SATURATE:
    107     {
    108         simdscalar sat = _simd_min_ps(src.w, _simd_sub_ps(_simd_set1_ps(1.0), dst.w));
    109         result.x = sat;
    110         result.y = sat;
    111         result.z = sat;
    112         result.w = _simd_set1_ps(1.0);
    113         break;
    114     }
    115 
    116     case BLENDFACTOR_CONST_COLOR:
    117         result.x = constantColor[0];
    118         result.y = constantColor[1];
    119         result.z = constantColor[2];
    120         result.w = constantColor[3];
    121         break;
    122 
    123     case BLENDFACTOR_CONST_ALPHA:
    124         result.x = result.y = result.z = result.w = constantColor[3];
    125         break;
    126 
    127     case BLENDFACTOR_INV_CONST_COLOR:
    128     {
    129         result.x = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[0]);
    130         result.y = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[1]);
    131         result.z = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[2]);
    132         result.w = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[3]);
    133         break;
    134     }
    135 
    136     case BLENDFACTOR_INV_CONST_ALPHA:
    137     {
    138         result.x = result.y = result.z = result.w = _simd_sub_ps(_simd_set1_ps(1.0f), constantColor[3]);
    139         break;
    140     }
    141 
    142     case BLENDFACTOR_SRC1_COLOR:
    143         result.x = src1.x;
    144         result.y = src1.y;
    145         result.z = src1.z;
    146         result.w = src1.w;
    147         break;
    148 
    149     case BLENDFACTOR_SRC1_ALPHA:
    150         result.x = result.y = result.z = result.w = src1.w;
    151         break;
    152 
    153     case BLENDFACTOR_INV_SRC1_COLOR:
    154         result.x = _simd_sub_ps(_simd_set1_ps(1.0f), src1.x);
    155         result.y = _simd_sub_ps(_simd_set1_ps(1.0f), src1.y);
    156         result.z = _simd_sub_ps(_simd_set1_ps(1.0f), src1.z);
    157         result.w = _simd_sub_ps(_simd_set1_ps(1.0f), src1.w);
    158         break;
    159 
    160     case BLENDFACTOR_INV_SRC1_ALPHA:
    161         result.x = result.y = result.z = result.w = _simd_sub_ps(_simd_set1_ps(1.0f), src1.w);
    162         break;
    163 
    164     default: SWR_INVALID("Unimplemented blend factor: %d", func);
    165     }
    166 
    167     if (Color)
    168     {
    169         out.x = result.x;
    170         out.y = result.y;
    171         out.z = result.z;
    172     }
    173     if (Alpha)
    174     {
    175         out.w = result.w;
    176     }
    177 
    178 }
    179 
    180 template<bool Color, bool Alpha>
    181 INLINE void BlendFunc(SWR_BLEND_OP blendOp, simdvector &src, simdvector &srcFactor, simdvector &dst, simdvector &dstFactor, simdvector &out)
    182 {
    183     simdvector result;
    184 
    185     switch (blendOp)
    186     {
    187     case BLENDOP_ADD:
    188         result.x = _simd_fmadd_ps(srcFactor.x, src.x, _simd_mul_ps(dstFactor.x, dst.x));
    189         result.y = _simd_fmadd_ps(srcFactor.y, src.y, _simd_mul_ps(dstFactor.y, dst.y));
    190         result.z = _simd_fmadd_ps(srcFactor.z, src.z, _simd_mul_ps(dstFactor.z, dst.z));
    191         result.w = _simd_fmadd_ps(srcFactor.w, src.w, _simd_mul_ps(dstFactor.w, dst.w));
    192         break;
    193 
    194     case BLENDOP_SUBTRACT:
    195         result.x = _simd_fmsub_ps(srcFactor.x, src.x, _simd_mul_ps(dstFactor.x, dst.x));
    196         result.y = _simd_fmsub_ps(srcFactor.y, src.y, _simd_mul_ps(dstFactor.y, dst.y));
    197         result.z = _simd_fmsub_ps(srcFactor.z, src.z, _simd_mul_ps(dstFactor.z, dst.z));
    198         result.w = _simd_fmsub_ps(srcFactor.w, src.w, _simd_mul_ps(dstFactor.w, dst.w));
    199         break;
    200 
    201     case BLENDOP_REVSUBTRACT:
    202         result.x = _simd_fmsub_ps(dstFactor.x, dst.x, _simd_mul_ps(srcFactor.x, src.x));
    203         result.y = _simd_fmsub_ps(dstFactor.y, dst.y, _simd_mul_ps(srcFactor.y, src.y));
    204         result.z = _simd_fmsub_ps(dstFactor.z, dst.z, _simd_mul_ps(srcFactor.z, src.z));
    205         result.w = _simd_fmsub_ps(dstFactor.w, dst.w, _simd_mul_ps(srcFactor.w, src.w));
    206         break;
    207 
    208     case BLENDOP_MIN:
    209         result.x = _simd_min_ps(_simd_mul_ps(srcFactor.x, src.x), _simd_mul_ps(dstFactor.x, dst.x));
    210         result.y = _simd_min_ps(_simd_mul_ps(srcFactor.y, src.y), _simd_mul_ps(dstFactor.y, dst.y));
    211         result.z = _simd_min_ps(_simd_mul_ps(srcFactor.z, src.z), _simd_mul_ps(dstFactor.z, dst.z));
    212         result.w = _simd_min_ps(_simd_mul_ps(srcFactor.w, src.w), _simd_mul_ps(dstFactor.w, dst.w));
    213         break;
    214 
    215     case BLENDOP_MAX:
    216         result.x = _simd_max_ps(_simd_mul_ps(srcFactor.x, src.x), _simd_mul_ps(dstFactor.x, dst.x));
    217         result.y = _simd_max_ps(_simd_mul_ps(srcFactor.y, src.y), _simd_mul_ps(dstFactor.y, dst.y));
    218         result.z = _simd_max_ps(_simd_mul_ps(srcFactor.z, src.z), _simd_mul_ps(dstFactor.z, dst.z));
    219         result.w = _simd_max_ps(_simd_mul_ps(srcFactor.w, src.w), _simd_mul_ps(dstFactor.w, dst.w));
    220         break;
    221 
    222     default:
    223         SWR_INVALID("Unimplemented blend function: %d", blendOp);
    224     }
    225 
    226     if (Color)
    227     {
    228         out.x = result.x;
    229         out.y = result.y;
    230         out.z = result.z;
    231     }
    232     if (Alpha)
    233     {
    234         out.w = result.w;
    235     }
    236 }
    237 
    238 template<SWR_TYPE type>
    239 INLINE void Clamp(simdvector &src)
    240 {
    241     switch (type)
    242     {
    243     case SWR_TYPE_FLOAT:
    244         break;
    245 
    246     case SWR_TYPE_UNORM:
    247         src.x = _simd_max_ps(src.x, _simd_setzero_ps());
    248         src.x = _simd_min_ps(src.x, _simd_set1_ps(1.0f));
    249 
    250         src.y = _simd_max_ps(src.y, _simd_setzero_ps());
    251         src.y = _simd_min_ps(src.y, _simd_set1_ps(1.0f));
    252 
    253         src.z = _simd_max_ps(src.z, _simd_setzero_ps());
    254         src.z = _simd_min_ps(src.z, _simd_set1_ps(1.0f));
    255 
    256         src.w = _simd_max_ps(src.w, _simd_setzero_ps());
    257         src.w = _simd_min_ps(src.w, _simd_set1_ps(1.0f));
    258         break;
    259 
    260     case SWR_TYPE_SNORM:
    261         src.x = _simd_max_ps(src.x, _simd_set1_ps(-1.0f));
    262         src.x = _simd_min_ps(src.x, _simd_set1_ps(1.0f));
    263 
    264         src.y = _simd_max_ps(src.y, _simd_set1_ps(-1.0f));
    265         src.y = _simd_min_ps(src.y, _simd_set1_ps(1.0f));
    266 
    267         src.z = _simd_max_ps(src.z, _simd_set1_ps(-1.0f));
    268         src.z = _simd_min_ps(src.z, _simd_set1_ps(1.0f));
    269 
    270         src.w = _simd_max_ps(src.w, _simd_set1_ps(-1.0f));
    271         src.w = _simd_min_ps(src.w, _simd_set1_ps(1.0f));
    272         break;
    273 
    274     default:
    275         SWR_INVALID("Unimplemented clamp: %d", type);
    276         break;
    277     }
    278 }
    279 
    280 template<SWR_TYPE type>
    281 void Blend(const SWR_BLEND_STATE *pBlendState, const SWR_RENDER_TARGET_BLEND_STATE *pState, simdvector &src, simdvector& src1, uint8_t *pDst, simdvector &result)
    282 {
    283     // load render target
    284     simdvector dst;
    285     LoadSOA<KNOB_COLOR_HOT_TILE_FORMAT>(pDst, dst);
    286 
    287     simdvector constColor;
    288     constColor.x = _simd_broadcast_ss(&pBlendState->constantColor[0]);
    289     constColor.y = _simd_broadcast_ss(&pBlendState->constantColor[1]);
    290     constColor.z = _simd_broadcast_ss(&pBlendState->constantColor[2]);
    291     constColor.w = _simd_broadcast_ss(&pBlendState->constantColor[3]);
    292 
    293     // clamp src/dst/constant
    294     Clamp<type>(src);
    295     Clamp<type>(src1);
    296     Clamp<type>(dst);
    297     Clamp<type>(constColor);
    298 
    299     simdvector srcFactor, dstFactor;
    300     if (pBlendState->independentAlphaBlendEnable)
    301     {
    302         GenerateBlendFactor<true, false>((SWR_BLEND_FACTOR)pState->sourceBlendFactor, constColor, src, src1, dst, srcFactor);
    303         GenerateBlendFactor<false, true>((SWR_BLEND_FACTOR)pState->sourceAlphaBlendFactor, constColor, src, src1, dst, srcFactor);
    304 
    305         GenerateBlendFactor<true, false>((SWR_BLEND_FACTOR)pState->destBlendFactor, constColor, src, src1, dst, dstFactor);
    306         GenerateBlendFactor<false, true>((SWR_BLEND_FACTOR)pState->destAlphaBlendFactor, constColor, src, src1, dst, dstFactor);
    307 
    308         BlendFunc<true, false>((SWR_BLEND_OP)pState->colorBlendFunc, src, srcFactor, dst, dstFactor, result);
    309         BlendFunc<false, true>((SWR_BLEND_OP)pState->alphaBlendFunc, src, srcFactor, dst, dstFactor, result);
    310     }
    311     else
    312     {
    313         GenerateBlendFactor<true, true>((SWR_BLEND_FACTOR)pState->sourceBlendFactor, constColor, src, src1, dst, srcFactor);
    314         GenerateBlendFactor<true, true>((SWR_BLEND_FACTOR)pState->destBlendFactor, constColor, src, src1, dst, dstFactor);
    315 
    316         BlendFunc<true, true>((SWR_BLEND_OP)pState->colorBlendFunc, src, srcFactor, dst, dstFactor, result);
    317     }
    318 }
    319