1 /************************************************************************** 2 * 3 * Copyright 2012 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 #include "pipe/p_state.h" 29 #include "util/u_debug.h" 30 31 #include "gallivm/lp_bld_type.h" 32 #include "gallivm/lp_bld_arit.h" 33 #include "gallivm/lp_bld_const.h" 34 #include "gallivm/lp_bld_logic.h" 35 #include "gallivm/lp_bld_swizzle.h" 36 #include "gallivm/lp_bld_flow.h" 37 #include "gallivm/lp_bld_debug.h" 38 #include "gallivm/lp_bld_pack.h" 39 40 #include "lp_bld_blend.h" 41 42 /** 43 * Is (a OP b) == (b OP a)? 44 */ 45 boolean 46 lp_build_blend_func_commutative(unsigned func) 47 { 48 switch (func) { 49 case PIPE_BLEND_ADD: 50 case PIPE_BLEND_MIN: 51 case PIPE_BLEND_MAX: 52 return TRUE; 53 case PIPE_BLEND_SUBTRACT: 54 case PIPE_BLEND_REVERSE_SUBTRACT: 55 return FALSE; 56 default: 57 assert(0); 58 return TRUE; 59 } 60 } 61 62 63 /** 64 * Whether the blending functions are the reverse of each other. 65 */ 66 boolean 67 lp_build_blend_func_reverse(unsigned rgb_func, unsigned alpha_func) 68 { 69 if (rgb_func == alpha_func) 70 return FALSE; 71 if (rgb_func == PIPE_BLEND_SUBTRACT && alpha_func == PIPE_BLEND_REVERSE_SUBTRACT) 72 return TRUE; 73 if (rgb_func == PIPE_BLEND_REVERSE_SUBTRACT && alpha_func == PIPE_BLEND_SUBTRACT) 74 return TRUE; 75 return FALSE; 76 } 77 78 79 /** 80 * Whether the blending factors are complementary of each other. 81 */ 82 static inline boolean 83 lp_build_blend_factor_complementary(unsigned src_factor, unsigned dst_factor) 84 { 85 STATIC_ASSERT((PIPE_BLENDFACTOR_ZERO ^ 0x10) == PIPE_BLENDFACTOR_ONE); 86 STATIC_ASSERT((PIPE_BLENDFACTOR_CONST_COLOR ^ 0x10) == 87 PIPE_BLENDFACTOR_INV_CONST_COLOR); 88 return dst_factor == (src_factor ^ 0x10); 89 } 90 91 92 /** 93 * Whether this is a inverse blend factor 94 */ 95 static inline boolean 96 is_inverse_factor(unsigned factor) 97 { 98 STATIC_ASSERT(PIPE_BLENDFACTOR_ZERO == 0x11); 99 return factor > 0x11; 100 } 101 102 103 /** 104 * Calculates the (expanded to wider type) multiplication 105 * of 2 normalized numbers. 106 */ 107 static void 108 lp_build_mul_norm_expand(struct lp_build_context *bld, 109 LLVMValueRef a, LLVMValueRef b, 110 LLVMValueRef *resl, LLVMValueRef *resh, 111 boolean signedness_differs) 112 { 113 const struct lp_type type = bld->type; 114 struct lp_type wide_type = lp_wider_type(type); 115 struct lp_type wide_type2 = wide_type; 116 struct lp_type type2 = type; 117 LLVMValueRef al, ah, bl, bh; 118 119 assert(lp_check_value(type, a)); 120 assert(lp_check_value(type, b)); 121 assert(!type.floating && !type.fixed && type.norm); 122 123 if (a == bld->zero || b == bld->zero) { 124 LLVMValueRef zero = LLVMConstNull(lp_build_vec_type(bld->gallivm, wide_type)); 125 *resl = zero; 126 *resh = zero; 127 return; 128 } 129 130 if (signedness_differs) { 131 type2.sign = !type.sign; 132 wide_type2.sign = !wide_type2.sign; 133 } 134 135 lp_build_unpack2_native(bld->gallivm, type, wide_type, a, &al, &ah); 136 lp_build_unpack2_native(bld->gallivm, type2, wide_type2, b, &bl, &bh); 137 138 *resl = lp_build_mul_norm(bld->gallivm, wide_type, al, bl); 139 *resh = lp_build_mul_norm(bld->gallivm, wide_type, ah, bh); 140 } 141 142 143 /** 144 * @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml 145 */ 146 LLVMValueRef 147 lp_build_blend_func(struct lp_build_context *bld, 148 unsigned func, 149 LLVMValueRef term1, 150 LLVMValueRef term2) 151 { 152 switch (func) { 153 case PIPE_BLEND_ADD: 154 return lp_build_add(bld, term1, term2); 155 case PIPE_BLEND_SUBTRACT: 156 return lp_build_sub(bld, term1, term2); 157 case PIPE_BLEND_REVERSE_SUBTRACT: 158 return lp_build_sub(bld, term2, term1); 159 case PIPE_BLEND_MIN: 160 return lp_build_min(bld, term1, term2); 161 case PIPE_BLEND_MAX: 162 return lp_build_max(bld, term1, term2); 163 default: 164 assert(0); 165 return bld->zero; 166 } 167 } 168 169 170 /** 171 * Performs optimisations and blending independent of SoA/AoS 172 * 173 * @param func the blend function 174 * @param factor_src PIPE_BLENDFACTOR_xxx 175 * @param factor_dst PIPE_BLENDFACTOR_xxx 176 * @param src source rgba 177 * @param dst dest rgba 178 * @param src_factor src factor computed value 179 * @param dst_factor dst factor computed value 180 * @param not_alpha_dependent same factors accross all channels of src/dst 181 * 182 * not_alpha_dependent should be: 183 * SoA: always true as it is only one channel at a time 184 * AoS: rgb_src_factor == alpha_src_factor && rgb_dst_factor == alpha_dst_factor 185 * 186 * Note that pretty much every possible optimisation can only be done on non-unorm targets 187 * due to unorm values not going above 1.0 meaning factorisation can change results. 188 * e.g. (0.9 * 0.9) + (0.9 * 0.9) != 0.9 * (0.9 + 0.9) as result of + is always <= 1. 189 */ 190 LLVMValueRef 191 lp_build_blend(struct lp_build_context *bld, 192 unsigned func, 193 unsigned factor_src, 194 unsigned factor_dst, 195 LLVMValueRef src, 196 LLVMValueRef dst, 197 LLVMValueRef src_factor, 198 LLVMValueRef dst_factor, 199 boolean not_alpha_dependent, 200 boolean optimise_only) 201 { 202 LLVMValueRef result, src_term, dst_term; 203 204 /* If we are not alpha dependent we can mess with the src/dst factors */ 205 if (not_alpha_dependent) { 206 if (lp_build_blend_factor_complementary(factor_src, factor_dst)) { 207 if (func == PIPE_BLEND_ADD) { 208 if (factor_src < factor_dst) { 209 return lp_build_lerp(bld, src_factor, dst, src, 0); 210 } else { 211 return lp_build_lerp(bld, dst_factor, src, dst, 0); 212 } 213 } else if (bld->type.floating && func == PIPE_BLEND_SUBTRACT) { 214 result = lp_build_add(bld, src, dst); 215 216 if (factor_src < factor_dst) { 217 result = lp_build_mul(bld, result, src_factor); 218 return lp_build_sub(bld, result, dst); 219 } else { 220 result = lp_build_mul(bld, result, dst_factor); 221 return lp_build_sub(bld, src, result); 222 } 223 } else if (bld->type.floating && func == PIPE_BLEND_REVERSE_SUBTRACT) { 224 result = lp_build_add(bld, src, dst); 225 226 if (factor_src < factor_dst) { 227 result = lp_build_mul(bld, result, src_factor); 228 return lp_build_sub(bld, dst, result); 229 } else { 230 result = lp_build_mul(bld, result, dst_factor); 231 return lp_build_sub(bld, result, src); 232 } 233 } 234 } 235 236 if (bld->type.floating && factor_src == factor_dst) { 237 if (func == PIPE_BLEND_ADD || 238 func == PIPE_BLEND_SUBTRACT || 239 func == PIPE_BLEND_REVERSE_SUBTRACT) { 240 LLVMValueRef result; 241 result = lp_build_blend_func(bld, func, src, dst); 242 return lp_build_mul(bld, result, src_factor); 243 } 244 } 245 } 246 247 if (optimise_only) 248 return NULL; 249 250 if ((bld->type.norm && bld->type.sign) && 251 (is_inverse_factor(factor_src) || is_inverse_factor(factor_dst))) { 252 /* 253 * With snorm blending, the inverse blend factors range from [0,2] 254 * instead of [-1,1], so the ordinary signed normalized arithmetic 255 * doesn't quite work. Unpack must be unsigned, and the add/sub 256 * must be done with wider type. 257 * (Note that it's not quite obvious what the blend equation wrt to 258 * clamping should actually be based on GL spec in this case, but 259 * really the incoming src values are clamped to [-1,1] (the dst is 260 * always clamped already), and then NO further clamping occurs until 261 * the end.) 262 */ 263 struct lp_build_context bldw; 264 struct lp_type wide_type = lp_wider_type(bld->type); 265 LLVMValueRef src_terml, src_termh, dst_terml, dst_termh; 266 LLVMValueRef resl, resh; 267 268 /* 269 * We don't need saturate math for the sub/add, since we have 270 * x+1 bit numbers in x*2 wide type (result is x+2 bits). 271 * (Doesn't really matter on x86 sse2 though as we use saturated 272 * intrinsics.) 273 */ 274 wide_type.norm = 0; 275 lp_build_context_init(&bldw, bld->gallivm, wide_type); 276 277 /* 278 * XXX This is a bit hackish. Note that -128 really should 279 * be -1.0, the same as -127. However, we did not actually clamp 280 * things anywhere (relying on pack intrinsics instead) therefore 281 * we will get -128, and the inverted factor then 255. But the mul 282 * can overflow in this case (rather the rounding fixups for the mul, 283 * -128*255 will be positive). 284 * So we clamp the src and dst up here but only when necessary (we 285 * should do this before calculating blend factors but it's enough 286 * for avoiding overflow). 287 */ 288 if (is_inverse_factor(factor_src)) { 289 src = lp_build_max(bld, src, 290 lp_build_const_vec(bld->gallivm, bld->type, -1.0)); 291 } 292 if (is_inverse_factor(factor_dst)) { 293 dst = lp_build_max(bld, dst, 294 lp_build_const_vec(bld->gallivm, bld->type, -1.0)); 295 } 296 297 lp_build_mul_norm_expand(bld, src, src_factor, &src_terml, &src_termh, 298 is_inverse_factor(factor_src) ? TRUE : FALSE); 299 lp_build_mul_norm_expand(bld, dst, dst_factor, &dst_terml, &dst_termh, 300 is_inverse_factor(factor_dst) ? TRUE : FALSE); 301 resl = lp_build_blend_func(&bldw, func, src_terml, dst_terml); 302 resh = lp_build_blend_func(&bldw, func, src_termh, dst_termh); 303 304 /* 305 * XXX pack2_native is not ok because the values have to be in dst 306 * range. We need native pack though for the correct order on avx2. 307 * Will break on everything not implementing clamping pack intrinsics 308 * (i.e. everything but sse2 and altivec). 309 */ 310 return lp_build_pack2_native(bld->gallivm, wide_type, bld->type, resl, resh); 311 } else { 312 src_term = lp_build_mul(bld, src, src_factor); 313 dst_term = lp_build_mul(bld, dst, dst_factor); 314 return lp_build_blend_func(bld, func, src_term, dst_term); 315 } 316 } 317 318 void 319 lp_build_alpha_to_coverage(struct gallivm_state *gallivm, 320 struct lp_type type, 321 struct lp_build_mask_context *mask, 322 LLVMValueRef alpha, 323 boolean do_branch) 324 { 325 struct lp_build_context bld; 326 LLVMValueRef test; 327 LLVMValueRef alpha_ref_value; 328 329 lp_build_context_init(&bld, gallivm, type); 330 331 alpha_ref_value = lp_build_const_vec(gallivm, type, 0.5); 332 333 test = lp_build_cmp(&bld, PIPE_FUNC_GREATER, alpha, alpha_ref_value); 334 335 lp_build_name(test, "alpha_to_coverage"); 336 337 lp_build_mask_update(mask, test); 338 339 if (do_branch) 340 lp_build_mask_check(mask); 341 } 342