1 /************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 /** 29 * @file 30 * Helper functions for logical operations. 31 * 32 * @author Jose Fonseca <jfonseca (at) vmware.com> 33 */ 34 35 36 #include "util/u_cpu_detect.h" 37 #include "util/u_memory.h" 38 #include "util/u_debug.h" 39 40 #include "lp_bld_type.h" 41 #include "lp_bld_const.h" 42 #include "lp_bld_swizzle.h" 43 #include "lp_bld_init.h" 44 #include "lp_bld_intr.h" 45 #include "lp_bld_debug.h" 46 #include "lp_bld_logic.h" 47 48 49 /* 50 * XXX 51 * 52 * Selection with vector conditional like 53 * 54 * select <4 x i1> %C, %A, %B 55 * 56 * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is only 57 * supported on some backends (x86) starting with llvm 3.1. 58 * 59 * Expanding the boolean vector to full SIMD register width, as in 60 * 61 * sext <4 x i1> %C to <4 x i32> 62 * 63 * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but 64 * it causes assertion failures in LLVM 2.6. It appears to work correctly on 65 * LLVM 2.7. 66 */ 67 68 69 /** 70 * Build code to compare two values 'a' and 'b' of 'type' using the given func. 71 * \param func one of PIPE_FUNC_x 72 * If the ordered argument is true the function will use LLVM's ordered 73 * comparisons, otherwise unordered comparisons will be used. 74 * The result values will be 0 for false or ~0 for true. 75 */ 76 static LLVMValueRef 77 lp_build_compare_ext(struct gallivm_state *gallivm, 78 const struct lp_type type, 79 unsigned func, 80 LLVMValueRef a, 81 LLVMValueRef b, 82 boolean ordered) 83 { 84 LLVMBuilderRef builder = gallivm->builder; 85 LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type); 86 LLVMValueRef zeros = LLVMConstNull(int_vec_type); 87 LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); 88 LLVMValueRef cond; 89 LLVMValueRef res; 90 91 assert(lp_check_value(type, a)); 92 assert(lp_check_value(type, b)); 93 94 if(func == PIPE_FUNC_NEVER) 95 return zeros; 96 if(func == PIPE_FUNC_ALWAYS) 97 return ones; 98 99 assert(func > PIPE_FUNC_NEVER); 100 assert(func < PIPE_FUNC_ALWAYS); 101 102 if(type.floating) { 103 LLVMRealPredicate op; 104 switch(func) { 105 case PIPE_FUNC_EQUAL: 106 op = ordered ? LLVMRealOEQ : LLVMRealUEQ; 107 break; 108 case PIPE_FUNC_NOTEQUAL: 109 op = ordered ? LLVMRealONE : LLVMRealUNE; 110 break; 111 case PIPE_FUNC_LESS: 112 op = ordered ? LLVMRealOLT : LLVMRealULT; 113 break; 114 case PIPE_FUNC_LEQUAL: 115 op = ordered ? LLVMRealOLE : LLVMRealULE; 116 break; 117 case PIPE_FUNC_GREATER: 118 op = ordered ? LLVMRealOGT : LLVMRealUGT; 119 break; 120 case PIPE_FUNC_GEQUAL: 121 op = ordered ? LLVMRealOGE : LLVMRealUGE; 122 break; 123 default: 124 assert(0); 125 return lp_build_undef(gallivm, type); 126 } 127 128 cond = LLVMBuildFCmp(builder, op, a, b, ""); 129 res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 130 } 131 else { 132 LLVMIntPredicate op; 133 switch(func) { 134 case PIPE_FUNC_EQUAL: 135 op = LLVMIntEQ; 136 break; 137 case PIPE_FUNC_NOTEQUAL: 138 op = LLVMIntNE; 139 break; 140 case PIPE_FUNC_LESS: 141 op = type.sign ? LLVMIntSLT : LLVMIntULT; 142 break; 143 case PIPE_FUNC_LEQUAL: 144 op = type.sign ? LLVMIntSLE : LLVMIntULE; 145 break; 146 case PIPE_FUNC_GREATER: 147 op = type.sign ? LLVMIntSGT : LLVMIntUGT; 148 break; 149 case PIPE_FUNC_GEQUAL: 150 op = type.sign ? LLVMIntSGE : LLVMIntUGE; 151 break; 152 default: 153 assert(0); 154 return lp_build_undef(gallivm, type); 155 } 156 157 cond = LLVMBuildICmp(builder, op, a, b, ""); 158 res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 159 } 160 161 return res; 162 } 163 164 /** 165 * Build code to compare two values 'a' and 'b' of 'type' using the given func. 166 * \param func one of PIPE_FUNC_x 167 * The result values will be 0 for false or ~0 for true. 168 */ 169 LLVMValueRef 170 lp_build_compare(struct gallivm_state *gallivm, 171 const struct lp_type type, 172 unsigned func, 173 LLVMValueRef a, 174 LLVMValueRef b) 175 { 176 LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type); 177 LLVMValueRef zeros = LLVMConstNull(int_vec_type); 178 LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); 179 180 assert(lp_check_value(type, a)); 181 assert(lp_check_value(type, b)); 182 183 if(func == PIPE_FUNC_NEVER) 184 return zeros; 185 if(func == PIPE_FUNC_ALWAYS) 186 return ones; 187 188 assert(func > PIPE_FUNC_NEVER); 189 assert(func < PIPE_FUNC_ALWAYS); 190 191 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 192 /* 193 * There are no unsigned integer comparison instructions in SSE. 194 */ 195 196 if (!type.floating && !type.sign && 197 type.width * type.length == 128 && 198 util_cpu_caps.has_sse2 && 199 (func == PIPE_FUNC_LESS || 200 func == PIPE_FUNC_LEQUAL || 201 func == PIPE_FUNC_GREATER || 202 func == PIPE_FUNC_GEQUAL) && 203 (gallivm_debug & GALLIVM_DEBUG_PERF)) { 204 debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n", 205 __FUNCTION__, type.length, type.width); 206 } 207 #endif 208 209 return lp_build_compare_ext(gallivm, type, func, a, b, FALSE); 210 } 211 212 /** 213 * Build code to compare two values 'a' and 'b' using the given func. 214 * \param func one of PIPE_FUNC_x 215 * If the operands are floating point numbers, the function will use 216 * ordered comparison which means that it will return true if both 217 * operands are not a NaN and the specified condition evaluates to true. 218 * The result values will be 0 for false or ~0 for true. 219 */ 220 LLVMValueRef 221 lp_build_cmp_ordered(struct lp_build_context *bld, 222 unsigned func, 223 LLVMValueRef a, 224 LLVMValueRef b) 225 { 226 return lp_build_compare_ext(bld->gallivm, bld->type, func, a, b, TRUE); 227 } 228 229 /** 230 * Build code to compare two values 'a' and 'b' using the given func. 231 * \param func one of PIPE_FUNC_x 232 * If the operands are floating point numbers, the function will use 233 * unordered comparison which means that it will return true if either 234 * operand is a NaN or the specified condition evaluates to true. 235 * The result values will be 0 for false or ~0 for true. 236 */ 237 LLVMValueRef 238 lp_build_cmp(struct lp_build_context *bld, 239 unsigned func, 240 LLVMValueRef a, 241 LLVMValueRef b) 242 { 243 return lp_build_compare(bld->gallivm, bld->type, func, a, b); 244 } 245 246 247 /** 248 * Return (mask & a) | (~mask & b); 249 */ 250 LLVMValueRef 251 lp_build_select_bitwise(struct lp_build_context *bld, 252 LLVMValueRef mask, 253 LLVMValueRef a, 254 LLVMValueRef b) 255 { 256 LLVMBuilderRef builder = bld->gallivm->builder; 257 struct lp_type type = bld->type; 258 LLVMValueRef res; 259 260 assert(lp_check_value(type, a)); 261 assert(lp_check_value(type, b)); 262 263 if (a == b) { 264 return a; 265 } 266 267 if(type.floating) { 268 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type); 269 a = LLVMBuildBitCast(builder, a, int_vec_type, ""); 270 b = LLVMBuildBitCast(builder, b, int_vec_type, ""); 271 } 272 273 a = LLVMBuildAnd(builder, a, mask, ""); 274 275 /* This often gets translated to PANDN, but sometimes the NOT is 276 * pre-computed and stored in another constant. The best strategy depends 277 * on available registers, so it is not a big deal -- hopefully LLVM does 278 * the right decision attending the rest of the program. 279 */ 280 b = LLVMBuildAnd(builder, b, LLVMBuildNot(builder, mask, ""), ""); 281 282 res = LLVMBuildOr(builder, a, b, ""); 283 284 if(type.floating) { 285 LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); 286 res = LLVMBuildBitCast(builder, res, vec_type, ""); 287 } 288 289 return res; 290 } 291 292 293 /** 294 * Return mask ? a : b; 295 * 296 * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value 297 * will yield unpredictable results. 298 */ 299 LLVMValueRef 300 lp_build_select(struct lp_build_context *bld, 301 LLVMValueRef mask, 302 LLVMValueRef a, 303 LLVMValueRef b) 304 { 305 LLVMBuilderRef builder = bld->gallivm->builder; 306 LLVMContextRef lc = bld->gallivm->context; 307 struct lp_type type = bld->type; 308 LLVMValueRef res; 309 310 assert(lp_check_value(type, a)); 311 assert(lp_check_value(type, b)); 312 313 if(a == b) 314 return a; 315 316 if (type.length == 1) { 317 mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), ""); 318 res = LLVMBuildSelect(builder, mask, a, b, ""); 319 } 320 else if (!(HAVE_LLVM == 0x0307) && 321 (LLVMIsConstant(mask) || 322 LLVMGetInstructionOpcode(mask) == LLVMSExt)) { 323 /* Generate a vector select. 324 * 325 * Using vector selects should avoid emitting intrinsics hence avoid 326 * hindering optimization passes, but vector selects weren't properly 327 * supported yet for a long time, and LLVM will generate poor code when 328 * the mask is not the result of a comparison. 329 * Also, llvm 3.7 may miscompile them (bug 94972). 330 * XXX: Even if the instruction was an SExt, this may still produce 331 * terrible code. Try piglit stencil-twoside. 332 */ 333 334 /* Convert the mask to a vector of booleans. 335 * 336 * XXX: In x86 the mask is controlled by the MSB, so if we shifted the 337 * mask by `type.width - 1`, LLVM should realize the mask is ready. Alas 338 * what really happens is that LLVM will emit two shifts back to back. 339 */ 340 if (0) { 341 LLVMValueRef shift = LLVMConstInt(bld->int_elem_type, bld->type.width - 1, 0); 342 shift = lp_build_broadcast(bld->gallivm, bld->int_vec_type, shift); 343 mask = LLVMBuildLShr(builder, mask, shift, ""); 344 } 345 LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length); 346 mask = LLVMBuildTrunc(builder, mask, bool_vec_type, ""); 347 348 res = LLVMBuildSelect(builder, mask, a, b, ""); 349 } 350 else if (((util_cpu_caps.has_sse4_1 && 351 type.width * type.length == 128) || 352 (util_cpu_caps.has_avx && 353 type.width * type.length == 256 && type.width >= 32) || 354 (util_cpu_caps.has_avx2 && 355 type.width * type.length == 256)) && 356 !LLVMIsConstant(a) && 357 !LLVMIsConstant(b) && 358 !LLVMIsConstant(mask)) { 359 const char *intrinsic; 360 LLVMTypeRef arg_type; 361 LLVMValueRef args[3]; 362 363 /* 364 * There's only float blend in AVX but can just cast i32/i64 365 * to float. 366 */ 367 if (type.width * type.length == 256) { 368 if (type.width == 64) { 369 intrinsic = "llvm.x86.avx.blendv.pd.256"; 370 arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 4); 371 } 372 else if (type.width == 32) { 373 intrinsic = "llvm.x86.avx.blendv.ps.256"; 374 arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8); 375 } else { 376 assert(util_cpu_caps.has_avx2); 377 intrinsic = "llvm.x86.avx2.pblendvb"; 378 arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 32); 379 } 380 } 381 else if (type.floating && 382 type.width == 64) { 383 intrinsic = "llvm.x86.sse41.blendvpd"; 384 arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 2); 385 } else if (type.floating && 386 type.width == 32) { 387 intrinsic = "llvm.x86.sse41.blendvps"; 388 arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 4); 389 } else { 390 intrinsic = "llvm.x86.sse41.pblendvb"; 391 arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 16); 392 } 393 394 if (arg_type != bld->int_vec_type) { 395 mask = LLVMBuildBitCast(builder, mask, arg_type, ""); 396 } 397 398 if (arg_type != bld->vec_type) { 399 a = LLVMBuildBitCast(builder, a, arg_type, ""); 400 b = LLVMBuildBitCast(builder, b, arg_type, ""); 401 } 402 403 args[0] = b; 404 args[1] = a; 405 args[2] = mask; 406 407 res = lp_build_intrinsic(builder, intrinsic, 408 arg_type, args, ARRAY_SIZE(args), 0); 409 410 if (arg_type != bld->vec_type) { 411 res = LLVMBuildBitCast(builder, res, bld->vec_type, ""); 412 } 413 } 414 else { 415 res = lp_build_select_bitwise(bld, mask, a, b); 416 } 417 418 return res; 419 } 420 421 422 /** 423 * Return mask ? a : b; 424 * 425 * mask is a TGSI_WRITEMASK_xxx. 426 */ 427 LLVMValueRef 428 lp_build_select_aos(struct lp_build_context *bld, 429 unsigned mask, 430 LLVMValueRef a, 431 LLVMValueRef b, 432 unsigned num_channels) 433 { 434 LLVMBuilderRef builder = bld->gallivm->builder; 435 const struct lp_type type = bld->type; 436 const unsigned n = type.length; 437 unsigned i, j; 438 439 assert((mask & ~0xf) == 0); 440 assert(lp_check_value(type, a)); 441 assert(lp_check_value(type, b)); 442 443 if(a == b) 444 return a; 445 if((mask & 0xf) == 0xf) 446 return a; 447 if((mask & 0xf) == 0x0) 448 return b; 449 if(a == bld->undef || b == bld->undef) 450 return bld->undef; 451 452 /* 453 * There are two major ways of accomplishing this: 454 * - with a shuffle 455 * - with a select 456 * 457 * The flip between these is empirical and might need to be adjusted. 458 */ 459 if (n <= 4) { 460 /* 461 * Shuffle. 462 */ 463 LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); 464 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 465 466 for(j = 0; j < n; j += num_channels) 467 for(i = 0; i < num_channels; ++i) 468 shuffles[j + i] = LLVMConstInt(elem_type, 469 (mask & (1 << i) ? 0 : n) + j + i, 470 0); 471 472 return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), ""); 473 } 474 else { 475 LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, num_channels); 476 return lp_build_select(bld, mask_vec, a, b); 477 } 478 } 479 480 481 /** 482 * Return (scalar-cast)val ? true : false; 483 */ 484 LLVMValueRef 485 lp_build_any_true_range(struct lp_build_context *bld, 486 unsigned real_length, 487 LLVMValueRef val) 488 { 489 LLVMBuilderRef builder = bld->gallivm->builder; 490 LLVMTypeRef scalar_type; 491 LLVMTypeRef true_type; 492 493 assert(real_length <= bld->type.length); 494 495 true_type = LLVMIntTypeInContext(bld->gallivm->context, 496 bld->type.width * real_length); 497 scalar_type = LLVMIntTypeInContext(bld->gallivm->context, 498 bld->type.width * bld->type.length); 499 val = LLVMBuildBitCast(builder, val, scalar_type, ""); 500 /* 501 * We're using always native types so we can use intrinsics. 502 * However, if we don't do per-element calculations, we must ensure 503 * the excess elements aren't used since they may contain garbage. 504 */ 505 if (real_length < bld->type.length) { 506 val = LLVMBuildTrunc(builder, val, true_type, ""); 507 } 508 return LLVMBuildICmp(builder, LLVMIntNE, 509 val, LLVMConstNull(true_type), ""); 510 } 511