1 /************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * Copyright 2007-2008 VMware, Inc. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29 /** 30 * @file 31 * TGSI to LLVM IR translation -- SoA. 32 * 33 * @author Jose Fonseca <jfonseca (at) vmware.com> 34 * 35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, 36 * Brian Paul, and others. 37 */ 38 39 #include "pipe/p_config.h" 40 #include "pipe/p_shader_tokens.h" 41 #include "util/u_debug.h" 42 #include "util/u_math.h" 43 #include "util/u_memory.h" 44 #include "tgsi/tgsi_dump.h" 45 #include "tgsi/tgsi_exec.h" 46 #include "tgsi/tgsi_info.h" 47 #include "tgsi/tgsi_parse.h" 48 #include "tgsi/tgsi_util.h" 49 #include "tgsi/tgsi_scan.h" 50 #include "tgsi/tgsi_strings.h" 51 #include "lp_bld_tgsi_action.h" 52 #include "lp_bld_type.h" 53 #include "lp_bld_const.h" 54 #include "lp_bld_arit.h" 55 #include "lp_bld_bitarit.h" 56 #include "lp_bld_gather.h" 57 #include "lp_bld_init.h" 58 #include "lp_bld_logic.h" 59 #include "lp_bld_swizzle.h" 60 #include "lp_bld_flow.h" 61 #include "lp_bld_quad.h" 62 #include "lp_bld_tgsi.h" 63 #include "lp_bld_limits.h" 64 #include "lp_bld_debug.h" 65 #include "lp_bld_printf.h" 66 #include "lp_bld_sample.h" 67 #include "lp_bld_struct.h" 68 69 /* SM 4.0 says that subroutines can nest 32 deep and 70 * we need one more for our main function */ 71 #define LP_MAX_NUM_FUNCS 33 72 73 #define DUMP_GS_EMITS 0 74 75 /* 76 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI 77 * instruction. 78 * 79 * TODO: 80 * - take execution masks in consideration 81 * - debug control-flow instructions 82 */ 83 #define DEBUG_EXECUTION 0 84 85 86 /* 87 * Emit code to print a register value. 88 */ 89 static void 90 emit_dump_reg(struct gallivm_state *gallivm, 91 unsigned file, 92 unsigned index, 93 unsigned chan, 94 LLVMValueRef value) 95 { 96 char buf[32]; 97 98 util_snprintf(buf, sizeof buf, " %s[%u].%c = ", 99 tgsi_file_name(file), 100 index, "xyzw"[chan]); 101 102 lp_build_print_value(gallivm, buf, value); 103 } 104 105 /* 106 * Return the context for the current function. 107 * (always 'main', if shader doesn't do any function calls) 108 */ 109 static inline struct function_ctx * 110 func_ctx(struct lp_exec_mask *mask) 111 { 112 assert(mask->function_stack_size > 0); 113 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS); 114 return &mask->function_stack[mask->function_stack_size - 1]; 115 } 116 117 /* 118 * Returns true if we're in a loop. 119 * It's global, meaning that it returns true even if there's 120 * no loop inside the current function, but we were inside 121 * a loop inside another function, from which this one was called. 122 */ 123 static inline boolean 124 mask_has_loop(struct lp_exec_mask *mask) 125 { 126 int i; 127 for (i = mask->function_stack_size - 1; i >= 0; --i) { 128 const struct function_ctx *ctx = &mask->function_stack[i]; 129 if (ctx->loop_stack_size > 0) 130 return TRUE; 131 } 132 return FALSE; 133 } 134 135 /* 136 * Returns true if we're inside a switch statement. 137 * It's global, meaning that it returns true even if there's 138 * no switch in the current function, but we were inside 139 * a switch inside another function, from which this one was called. 140 */ 141 static inline boolean 142 mask_has_switch(struct lp_exec_mask *mask) 143 { 144 int i; 145 for (i = mask->function_stack_size - 1; i >= 0; --i) { 146 const struct function_ctx *ctx = &mask->function_stack[i]; 147 if (ctx->switch_stack_size > 0) 148 return TRUE; 149 } 150 return FALSE; 151 } 152 153 /* 154 * Returns true if we're inside a conditional. 155 * It's global, meaning that it returns true even if there's 156 * no conditional in the current function, but we were inside 157 * a conditional inside another function, from which this one was called. 158 */ 159 static inline boolean 160 mask_has_cond(struct lp_exec_mask *mask) 161 { 162 int i; 163 for (i = mask->function_stack_size - 1; i >= 0; --i) { 164 const struct function_ctx *ctx = &mask->function_stack[i]; 165 if (ctx->cond_stack_size > 0) 166 return TRUE; 167 } 168 return FALSE; 169 } 170 171 172 /* 173 * Initialize a function context at the specified index. 174 */ 175 static void 176 lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx) 177 { 178 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context); 179 LLVMBuilderRef builder = mask->bld->gallivm->builder; 180 struct function_ctx *ctx = &mask->function_stack[function_idx]; 181 182 ctx->cond_stack_size = 0; 183 ctx->loop_stack_size = 0; 184 ctx->switch_stack_size = 0; 185 186 if (function_idx == 0) { 187 ctx->ret_mask = mask->ret_mask; 188 } 189 190 ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm, 191 int_type, "looplimiter"); 192 LLVMBuildStore( 193 builder, 194 LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false), 195 ctx->loop_limiter); 196 } 197 198 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) 199 { 200 mask->bld = bld; 201 mask->has_mask = FALSE; 202 mask->ret_in_main = FALSE; 203 /* For the main function */ 204 mask->function_stack_size = 1; 205 206 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type); 207 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = 208 mask->cond_mask = mask->switch_mask = 209 LLVMConstAllOnes(mask->int_vec_type); 210 211 mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS, 212 sizeof(mask->function_stack[0])); 213 lp_exec_mask_function_init(mask, 0); 214 } 215 216 static void 217 lp_exec_mask_fini(struct lp_exec_mask *mask) 218 { 219 FREE(mask->function_stack); 220 } 221 222 static void lp_exec_mask_update(struct lp_exec_mask *mask) 223 { 224 LLVMBuilderRef builder = mask->bld->gallivm->builder; 225 boolean has_loop_mask = mask_has_loop(mask); 226 boolean has_cond_mask = mask_has_cond(mask); 227 boolean has_switch_mask = mask_has_switch(mask); 228 boolean has_ret_mask = mask->function_stack_size > 1 || 229 mask->ret_in_main; 230 231 if (has_loop_mask) { 232 /*for loops we need to update the entire mask at runtime */ 233 LLVMValueRef tmp; 234 assert(mask->break_mask); 235 tmp = LLVMBuildAnd(builder, 236 mask->cont_mask, 237 mask->break_mask, 238 "maskcb"); 239 mask->exec_mask = LLVMBuildAnd(builder, 240 mask->cond_mask, 241 tmp, 242 "maskfull"); 243 } else 244 mask->exec_mask = mask->cond_mask; 245 246 if (has_switch_mask) { 247 mask->exec_mask = LLVMBuildAnd(builder, 248 mask->exec_mask, 249 mask->switch_mask, 250 "switchmask"); 251 } 252 253 if (has_ret_mask) { 254 mask->exec_mask = LLVMBuildAnd(builder, 255 mask->exec_mask, 256 mask->ret_mask, 257 "callmask"); 258 } 259 260 mask->has_mask = (has_cond_mask || 261 has_loop_mask || 262 has_switch_mask || 263 has_ret_mask); 264 } 265 266 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, 267 LLVMValueRef val) 268 { 269 LLVMBuilderRef builder = mask->bld->gallivm->builder; 270 struct function_ctx *ctx = func_ctx(mask); 271 272 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) { 273 ctx->cond_stack_size++; 274 return; 275 } 276 if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) { 277 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type)); 278 } 279 ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask; 280 assert(LLVMTypeOf(val) == mask->int_vec_type); 281 mask->cond_mask = LLVMBuildAnd(builder, 282 mask->cond_mask, 283 val, 284 ""); 285 lp_exec_mask_update(mask); 286 } 287 288 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) 289 { 290 LLVMBuilderRef builder = mask->bld->gallivm->builder; 291 struct function_ctx *ctx = func_ctx(mask); 292 LLVMValueRef prev_mask; 293 LLVMValueRef inv_mask; 294 295 assert(ctx->cond_stack_size); 296 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) 297 return; 298 prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1]; 299 if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) { 300 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type)); 301 } 302 303 inv_mask = LLVMBuildNot(builder, mask->cond_mask, ""); 304 305 mask->cond_mask = LLVMBuildAnd(builder, 306 inv_mask, 307 prev_mask, ""); 308 lp_exec_mask_update(mask); 309 } 310 311 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) 312 { 313 struct function_ctx *ctx = func_ctx(mask); 314 assert(ctx->cond_stack_size); 315 --ctx->cond_stack_size; 316 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) 317 return; 318 mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size]; 319 lp_exec_mask_update(mask); 320 } 321 322 static void lp_exec_bgnloop(struct lp_exec_mask *mask) 323 { 324 LLVMBuilderRef builder = mask->bld->gallivm->builder; 325 struct function_ctx *ctx = func_ctx(mask); 326 327 if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) { 328 ++ctx->loop_stack_size; 329 return; 330 } 331 332 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] = 333 ctx->break_type; 334 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP; 335 336 ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block; 337 ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask; 338 ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask; 339 ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var; 340 ++ctx->loop_stack_size; 341 342 ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, ""); 343 LLVMBuildStore(builder, mask->break_mask, ctx->break_var); 344 345 ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop"); 346 347 LLVMBuildBr(builder, ctx->loop_block); 348 LLVMPositionBuilderAtEnd(builder, ctx->loop_block); 349 350 mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, ""); 351 352 lp_exec_mask_update(mask); 353 } 354 355 static void lp_exec_break(struct lp_exec_mask *mask, 356 struct lp_build_tgsi_context * bld_base) 357 { 358 LLVMBuilderRef builder = mask->bld->gallivm->builder; 359 struct function_ctx *ctx = func_ctx(mask); 360 361 if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) { 362 LLVMValueRef exec_mask = LLVMBuildNot(builder, 363 mask->exec_mask, 364 "break"); 365 366 mask->break_mask = LLVMBuildAnd(builder, 367 mask->break_mask, 368 exec_mask, "break_full"); 369 } 370 else { 371 unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode; 372 boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH || 373 opcode == TGSI_OPCODE_CASE); 374 375 376 if (ctx->switch_in_default) { 377 /* 378 * stop default execution but only if this is an unconditional switch. 379 * (The condition here is not perfect since dead code after break is 380 * allowed but should be sufficient since false negatives are just 381 * unoptimized - so we don't have to pre-evaluate that). 382 */ 383 if(break_always && ctx->switch_pc) { 384 bld_base->pc = ctx->switch_pc; 385 return; 386 } 387 } 388 389 if (break_always) { 390 mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type); 391 } 392 else { 393 LLVMValueRef exec_mask = LLVMBuildNot(builder, 394 mask->exec_mask, 395 "break"); 396 mask->switch_mask = LLVMBuildAnd(builder, 397 mask->switch_mask, 398 exec_mask, "break_switch"); 399 } 400 } 401 402 lp_exec_mask_update(mask); 403 } 404 405 static void lp_exec_break_condition(struct lp_exec_mask *mask, 406 LLVMValueRef cond) 407 { 408 LLVMBuilderRef builder = mask->bld->gallivm->builder; 409 struct function_ctx *ctx = func_ctx(mask); 410 LLVMValueRef cond_mask = LLVMBuildAnd(builder, 411 mask->exec_mask, 412 cond, "cond_mask"); 413 cond_mask = LLVMBuildNot(builder, cond_mask, "break_cond"); 414 415 if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) { 416 mask->break_mask = LLVMBuildAnd(builder, 417 mask->break_mask, 418 cond_mask, "breakc_full"); 419 } 420 else { 421 mask->switch_mask = LLVMBuildAnd(builder, 422 mask->switch_mask, 423 cond_mask, "breakc_switch"); 424 } 425 426 lp_exec_mask_update(mask); 427 } 428 429 static void lp_exec_continue(struct lp_exec_mask *mask) 430 { 431 LLVMBuilderRef builder = mask->bld->gallivm->builder; 432 LLVMValueRef exec_mask = LLVMBuildNot(builder, 433 mask->exec_mask, 434 ""); 435 436 mask->cont_mask = LLVMBuildAnd(builder, 437 mask->cont_mask, 438 exec_mask, ""); 439 440 lp_exec_mask_update(mask); 441 } 442 443 444 static void lp_exec_endloop(struct gallivm_state *gallivm, 445 struct lp_exec_mask *mask) 446 { 447 LLVMBuilderRef builder = mask->bld->gallivm->builder; 448 struct function_ctx *ctx = func_ctx(mask); 449 LLVMBasicBlockRef endloop; 450 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context); 451 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context, 452 mask->bld->type.width * 453 mask->bld->type.length); 454 LLVMValueRef i1cond, i2cond, icond, limiter; 455 456 assert(mask->break_mask); 457 458 459 assert(ctx->loop_stack_size); 460 if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) { 461 --ctx->loop_stack_size; 462 return; 463 } 464 465 /* 466 * Restore the cont_mask, but don't pop 467 */ 468 mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask; 469 lp_exec_mask_update(mask); 470 471 /* 472 * Unlike the continue mask, the break_mask must be preserved across loop 473 * iterations 474 */ 475 LLVMBuildStore(builder, mask->break_mask, ctx->break_var); 476 477 /* Decrement the loop limiter */ 478 limiter = LLVMBuildLoad(builder, ctx->loop_limiter, ""); 479 480 limiter = LLVMBuildSub( 481 builder, 482 limiter, 483 LLVMConstInt(int_type, 1, false), 484 ""); 485 486 LLVMBuildStore(builder, limiter, ctx->loop_limiter); 487 488 /* i1cond = (mask != 0) */ 489 i1cond = LLVMBuildICmp( 490 builder, 491 LLVMIntNE, 492 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""), 493 LLVMConstNull(reg_type), "i1cond"); 494 495 /* i2cond = (looplimiter > 0) */ 496 i2cond = LLVMBuildICmp( 497 builder, 498 LLVMIntSGT, 499 limiter, 500 LLVMConstNull(int_type), "i2cond"); 501 502 /* if( i1cond && i2cond ) */ 503 icond = LLVMBuildAnd(builder, i1cond, i2cond, ""); 504 505 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop"); 506 507 LLVMBuildCondBr(builder, 508 icond, ctx->loop_block, endloop); 509 510 LLVMPositionBuilderAtEnd(builder, endloop); 511 512 assert(ctx->loop_stack_size); 513 --ctx->loop_stack_size; 514 mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask; 515 mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask; 516 ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block; 517 ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var; 518 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + 519 ctx->switch_stack_size]; 520 521 lp_exec_mask_update(mask); 522 } 523 524 static void lp_exec_switch(struct lp_exec_mask *mask, 525 LLVMValueRef switchval) 526 { 527 struct function_ctx *ctx = func_ctx(mask); 528 529 if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING || 530 ctx->loop_stack_size > LP_MAX_TGSI_NESTING) { 531 ctx->switch_stack_size++; 532 return; 533 } 534 535 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] = 536 ctx->break_type; 537 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH; 538 539 ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask; 540 ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val; 541 ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default; 542 ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default; 543 ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc; 544 ctx->switch_stack_size++; 545 546 mask->switch_mask = LLVMConstNull(mask->int_vec_type); 547 ctx->switch_val = switchval; 548 ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type); 549 ctx->switch_in_default = false; 550 ctx->switch_pc = 0; 551 552 lp_exec_mask_update(mask); 553 } 554 555 static void lp_exec_endswitch(struct lp_exec_mask *mask, 556 struct lp_build_tgsi_context * bld_base) 557 { 558 LLVMBuilderRef builder = mask->bld->gallivm->builder; 559 struct function_ctx *ctx = func_ctx(mask); 560 561 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) { 562 ctx->switch_stack_size--; 563 return; 564 } 565 566 /* check if there's deferred default if so do it now */ 567 if (ctx->switch_pc && !ctx->switch_in_default) { 568 LLVMValueRef prevmask, defaultmask; 569 unsigned tmp_pc; 570 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask; 571 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask"); 572 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask"); 573 ctx->switch_in_default = true; 574 575 lp_exec_mask_update(mask); 576 577 assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode == 578 TGSI_OPCODE_DEFAULT); 579 580 tmp_pc = bld_base->pc; 581 bld_base->pc = ctx->switch_pc; 582 /* 583 * re-purpose switch_pc to point to here again, since we stop execution of 584 * the deferred default after next break. 585 */ 586 ctx->switch_pc = tmp_pc - 1; 587 588 return; 589 } 590 591 else if (ctx->switch_pc && ctx->switch_in_default) { 592 assert(bld_base->pc == ctx->switch_pc + 1); 593 } 594 595 ctx->switch_stack_size--; 596 mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask; 597 ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val; 598 ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default; 599 ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default; 600 ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc; 601 602 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size]; 603 604 lp_exec_mask_update(mask); 605 } 606 607 static void lp_exec_case(struct lp_exec_mask *mask, 608 LLVMValueRef caseval) 609 { 610 LLVMBuilderRef builder = mask->bld->gallivm->builder; 611 struct function_ctx *ctx = func_ctx(mask); 612 613 LLVMValueRef casemask, prevmask; 614 615 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) { 616 return; 617 } 618 619 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */ 620 if (!ctx->switch_in_default) { 621 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask; 622 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val); 623 ctx->switch_mask_default = LLVMBuildOr(builder, casemask, 624 ctx->switch_mask_default, "sw_default_mask"); 625 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, ""); 626 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask"); 627 628 lp_exec_mask_update(mask); 629 } 630 } 631 632 /* 633 * Analyse default statement in a switch. 634 * \return true if default is last statement, false otherwise 635 * \param default_pc_start contains pc of instruction to jump to 636 * if default wasn't last but there's no 637 * fallthrough into default. 638 */ 639 static boolean default_analyse_is_last(struct lp_exec_mask *mask, 640 struct lp_build_tgsi_context * bld_base, 641 int *default_pc_start) 642 { 643 unsigned pc = bld_base->pc; 644 struct function_ctx *ctx = func_ctx(mask); 645 int curr_switch_stack = ctx->switch_stack_size; 646 647 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) { 648 return false; 649 } 650 651 /* skip over case statements which are together with default */ 652 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) { 653 pc++; 654 } 655 656 while (pc != ~0u && pc < bld_base->num_instructions) { 657 unsigned opcode = bld_base->instructions[pc].Instruction.Opcode; 658 switch (opcode) { 659 case TGSI_OPCODE_CASE: 660 if (curr_switch_stack == ctx->switch_stack_size) { 661 *default_pc_start = pc - 1; 662 return false; 663 } 664 break; 665 case TGSI_OPCODE_SWITCH: 666 curr_switch_stack++; 667 break; 668 case TGSI_OPCODE_ENDSWITCH: 669 if (curr_switch_stack == ctx->switch_stack_size) { 670 *default_pc_start = pc - 1; 671 return true; 672 } 673 curr_switch_stack--; 674 break; 675 } 676 pc++; 677 } 678 /* should never arrive here */ 679 assert(0); 680 return true; 681 } 682 683 static void lp_exec_default(struct lp_exec_mask *mask, 684 struct lp_build_tgsi_context * bld_base) 685 { 686 LLVMBuilderRef builder = mask->bld->gallivm->builder; 687 struct function_ctx *ctx = func_ctx(mask); 688 689 int default_exec_pc; 690 boolean default_is_last; 691 692 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) { 693 return; 694 } 695 696 /* 697 * This is a messy opcode, because it may not be always at the end and 698 * there can be fallthrough in and out of it. 699 */ 700 701 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc); 702 /* 703 * If it is last statement in switch (note that case statements appearing 704 * "at the same time" as default don't change that) everything is just fine, 705 * update switch mask and go on. This means we can handle default with 706 * fallthrough INTO it without overhead, if it is last. 707 */ 708 if (default_is_last) { 709 LLVMValueRef prevmask, defaultmask; 710 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask; 711 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask"); 712 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, ""); 713 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask"); 714 ctx->switch_in_default = true; 715 716 lp_exec_mask_update(mask); 717 } 718 else { 719 /* 720 * Technically, "case" immediately before default isn't really a 721 * fallthrough, however we still have to count them as such as we 722 * already have updated the masks. 723 * If that happens in practice could add a switch optimizer pass 724 * which just gets rid of all case statements appearing together with 725 * default (or could do switch analysis at switch start time instead). 726 */ 727 unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode; 728 boolean ft_into = (opcode != TGSI_OPCODE_BRK && 729 opcode != TGSI_OPCODE_SWITCH); 730 /* 731 * If it is not last statement and there was no fallthrough into it, 732 * we record the PC and continue execution at next case (again, those 733 * case encountered at the same time don't count). At endswitch 734 * time, we update switchmask, and go back executing the code we skipped 735 * until the next break (possibly re-executing some code with changed mask 736 * if there was a fallthrough out of default). 737 * Finally, if it is not last statement and there was a fallthrough into it, 738 * do the same as with the former case, except instead of skipping the code 739 * just execute it without updating the mask, then go back and re-execute. 740 */ 741 ctx->switch_pc = bld_base->pc; 742 if (!ft_into) { 743 bld_base->pc = default_exec_pc; 744 } 745 } 746 } 747 748 749 /* stores val into an address pointed to by dst_ptr. 750 * mask->exec_mask is used to figure out which bits of val 751 * should be stored into the address 752 * (0 means don't store this bit, 1 means do store). 753 */ 754 static void lp_exec_mask_store(struct lp_exec_mask *mask, 755 struct lp_build_context *bld_store, 756 LLVMValueRef pred, 757 LLVMValueRef val, 758 LLVMValueRef dst_ptr) 759 { 760 LLVMBuilderRef builder = mask->bld->gallivm->builder; 761 762 assert(lp_check_value(bld_store->type, val)); 763 assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind); 764 assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val)); 765 766 /* Mix the predicate and execution mask */ 767 if (mask->has_mask) { 768 if (pred) { 769 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, ""); 770 } else { 771 pred = mask->exec_mask; 772 } 773 } 774 775 if (pred) { 776 LLVMValueRef res, dst; 777 778 dst = LLVMBuildLoad(builder, dst_ptr, ""); 779 res = lp_build_select(bld_store, pred, val, dst); 780 LLVMBuildStore(builder, res, dst_ptr); 781 } else 782 LLVMBuildStore(builder, val, dst_ptr); 783 } 784 785 static void lp_exec_mask_call(struct lp_exec_mask *mask, 786 int func, 787 int *pc) 788 { 789 if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) { 790 return; 791 } 792 793 lp_exec_mask_function_init(mask, mask->function_stack_size); 794 mask->function_stack[mask->function_stack_size].pc = *pc; 795 mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask; 796 mask->function_stack_size++; 797 *pc = func; 798 } 799 800 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc) 801 { 802 LLVMBuilderRef builder = mask->bld->gallivm->builder; 803 struct function_ctx *ctx = func_ctx(mask); 804 LLVMValueRef exec_mask; 805 806 if (ctx->cond_stack_size == 0 && 807 ctx->loop_stack_size == 0 && 808 ctx->switch_stack_size == 0 && 809 mask->function_stack_size == 1) { 810 /* returning from main() */ 811 *pc = -1; 812 return; 813 } 814 815 if (mask->function_stack_size == 1) { 816 /* 817 * This requires special handling since we need to ensure 818 * we don't drop the mask even if we have no call stack 819 * (e.g. after a ret in a if clause after the endif) 820 */ 821 mask->ret_in_main = TRUE; 822 } 823 824 exec_mask = LLVMBuildNot(builder, 825 mask->exec_mask, 826 "ret"); 827 828 mask->ret_mask = LLVMBuildAnd(builder, 829 mask->ret_mask, 830 exec_mask, "ret_full"); 831 832 lp_exec_mask_update(mask); 833 } 834 835 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask) 836 { 837 } 838 839 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc) 840 { 841 struct function_ctx *ctx; 842 843 assert(mask->function_stack_size > 1); 844 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS); 845 846 ctx = func_ctx(mask); 847 mask->function_stack_size--; 848 849 *pc = ctx->pc; 850 mask->ret_mask = ctx->ret_mask; 851 852 lp_exec_mask_update(mask); 853 } 854 855 856 static LLVMValueRef 857 get_file_ptr(struct lp_build_tgsi_soa_context *bld, 858 unsigned file, 859 int index, 860 unsigned chan) 861 { 862 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 863 LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS]; 864 LLVMValueRef var_of_array; 865 866 switch (file) { 867 case TGSI_FILE_TEMPORARY: 868 array_of_vars = bld->temps; 869 var_of_array = bld->temps_array; 870 break; 871 case TGSI_FILE_OUTPUT: 872 array_of_vars = bld->outputs; 873 var_of_array = bld->outputs_array; 874 break; 875 default: 876 assert(0); 877 return NULL; 878 } 879 880 assert(chan < 4); 881 882 if (bld->indirect_files & (1 << file)) { 883 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan); 884 return LLVMBuildGEP(builder, var_of_array, &lindex, 1, ""); 885 } 886 else { 887 assert(index <= bld->bld_base.info->file_max[file]); 888 return array_of_vars[index][chan]; 889 } 890 } 891 892 893 /** 894 * Return pointer to a temporary register channel (src or dest). 895 * Note that indirect addressing cannot be handled here. 896 * \param index which temporary register 897 * \param chan which channel of the temp register. 898 */ 899 LLVMValueRef 900 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld, 901 unsigned index, 902 unsigned chan) 903 { 904 return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan); 905 } 906 907 /** 908 * Return pointer to a output register channel (src or dest). 909 * Note that indirect addressing cannot be handled here. 910 * \param index which output register 911 * \param chan which channel of the output register. 912 */ 913 LLVMValueRef 914 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld, 915 unsigned index, 916 unsigned chan) 917 { 918 return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan); 919 } 920 921 /* 922 * If we have indirect addressing in outputs copy our alloca array 923 * to the outputs slots specified by the caller to make sure 924 * our outputs are delivered consistently via the same interface. 925 */ 926 static void 927 gather_outputs(struct lp_build_tgsi_soa_context * bld) 928 { 929 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { 930 unsigned index, chan; 931 assert(bld->bld_base.info->num_outputs <= 932 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1); 933 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) { 934 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { 935 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan); 936 } 937 } 938 } 939 } 940 941 /** 942 * Gather vector. 943 * XXX the lp_build_gather() function should be capable of doing this 944 * with a little work. 945 */ 946 static LLVMValueRef 947 build_gather(struct lp_build_tgsi_context *bld_base, 948 LLVMValueRef base_ptr, 949 LLVMValueRef indexes, 950 LLVMValueRef overflow_mask, 951 LLVMValueRef indexes2) 952 { 953 struct gallivm_state *gallivm = bld_base->base.gallivm; 954 LLVMBuilderRef builder = gallivm->builder; 955 struct lp_build_context *uint_bld = &bld_base->uint_bld; 956 struct lp_build_context *bld = &bld_base->base; 957 LLVMValueRef res; 958 unsigned i; 959 960 if (indexes2) 961 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2)); 962 else 963 res = bld->undef; 964 /* 965 * overflow_mask is a vector telling us which channels 966 * in the vector overflowed. We use the overflow behavior for 967 * constant buffers which is defined as: 968 * Out of bounds access to constant buffer returns 0 in all 969 * components. Out of bounds behavior is always with respect 970 * to the size of the buffer bound at that slot. 971 */ 972 973 if (overflow_mask) { 974 /* 975 * We avoid per-element control flow here (also due to llvm going crazy, 976 * though I suspect it's better anyway since overflow is likely rare). 977 * Note that since we still fetch from buffers even if num_elements was 978 * zero (in this case we'll fetch from index zero) the jit func callers 979 * MUST provide valid fake constant buffers of size 4x32 (the values do 980 * not matter), otherwise we'd still need (not per element though) 981 * control flow. 982 */ 983 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes); 984 if (indexes2) 985 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2); 986 } 987 988 /* 989 * Loop over elements of index_vec, load scalar value, insert it into 'res'. 990 */ 991 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) { 992 LLVMValueRef si, di; 993 LLVMValueRef index; 994 LLVMValueRef scalar_ptr, scalar; 995 996 di = lp_build_const_int32(bld->gallivm, i); 997 if (indexes2) 998 si = lp_build_const_int32(bld->gallivm, i >> 1); 999 else 1000 si = di; 1001 1002 if (indexes2 && (i & 1)) { 1003 index = LLVMBuildExtractElement(builder, 1004 indexes2, si, ""); 1005 } else { 1006 index = LLVMBuildExtractElement(builder, 1007 indexes, si, ""); 1008 } 1009 scalar_ptr = LLVMBuildGEP(builder, base_ptr, 1010 &index, 1, "gather_ptr"); 1011 scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 1012 1013 res = LLVMBuildInsertElement(builder, res, scalar, di, ""); 1014 } 1015 1016 if (overflow_mask) { 1017 if (indexes2) { 1018 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, ""); 1019 overflow_mask = LLVMBuildSExt(builder, overflow_mask, 1020 bld_base->dbl_bld.int_vec_type, ""); 1021 res = lp_build_select(&bld_base->dbl_bld, overflow_mask, 1022 bld_base->dbl_bld.zero, res); 1023 } else 1024 res = lp_build_select(bld, overflow_mask, bld->zero, res); 1025 } 1026 1027 return res; 1028 } 1029 1030 1031 /** 1032 * Scatter/store vector. 1033 */ 1034 static void 1035 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld, 1036 LLVMValueRef base_ptr, 1037 LLVMValueRef indexes, 1038 LLVMValueRef values, 1039 struct lp_exec_mask *mask, 1040 LLVMValueRef pred) 1041 { 1042 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1043 LLVMBuilderRef builder = gallivm->builder; 1044 unsigned i; 1045 1046 /* Mix the predicate and execution mask */ 1047 if (mask->has_mask) { 1048 if (pred) { 1049 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, ""); 1050 } 1051 else { 1052 pred = mask->exec_mask; 1053 } 1054 } 1055 1056 /* 1057 * Loop over elements of index_vec, store scalar value. 1058 */ 1059 for (i = 0; i < bld->bld_base.base.type.length; i++) { 1060 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 1061 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, ""); 1062 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr"); 1063 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val"); 1064 LLVMValueRef scalar_pred = pred ? 1065 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL; 1066 1067 if (0) 1068 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n", 1069 ii, val, index, scalar_ptr); 1070 1071 if (scalar_pred) { 1072 LLVMValueRef real_val, dst_val; 1073 dst_val = LLVMBuildLoad(builder, scalar_ptr, ""); 1074 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val); 1075 LLVMBuildStore(builder, real_val, scalar_ptr); 1076 } 1077 else { 1078 LLVMBuildStore(builder, val, scalar_ptr); 1079 } 1080 } 1081 } 1082 1083 1084 /** 1085 * Read the current value of the ADDR register, convert the floats to 1086 * ints, add the base index and return the vector of offsets. 1087 * The offsets will be used to index into the constant buffer or 1088 * temporary register file. 1089 */ 1090 static LLVMValueRef 1091 get_indirect_index(struct lp_build_tgsi_soa_context *bld, 1092 unsigned reg_file, unsigned reg_index, 1093 const struct tgsi_ind_register *indirect_reg) 1094 { 1095 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 1096 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld; 1097 /* always use X component of address register */ 1098 unsigned swizzle = indirect_reg->Swizzle; 1099 LLVMValueRef base; 1100 LLVMValueRef rel; 1101 LLVMValueRef max_index; 1102 LLVMValueRef index; 1103 1104 assert(bld->indirect_files & (1 << reg_file)); 1105 1106 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index); 1107 1108 assert(swizzle < 4); 1109 switch (indirect_reg->File) { 1110 case TGSI_FILE_ADDRESS: 1111 rel = LLVMBuildLoad(builder, 1112 bld->addr[indirect_reg->Index][swizzle], 1113 "load addr reg"); 1114 /* ADDR LLVM values already have LLVM integer type. */ 1115 break; 1116 case TGSI_FILE_TEMPORARY: 1117 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle); 1118 rel = LLVMBuildLoad(builder, rel, "load temp reg"); 1119 /* TEMP LLVM values always have LLVM float type, but for indirection, the 1120 * value actually stored is expected to be an integer */ 1121 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, ""); 1122 break; 1123 default: 1124 assert(0); 1125 rel = uint_bld->zero; 1126 } 1127 1128 index = lp_build_add(uint_bld, base, rel); 1129 1130 /* 1131 * emit_fetch_constant handles constant buffer overflow so this code 1132 * is pointless for them. 1133 * Furthermore the D3D10 spec in section 6.5 says: 1134 * If the constant buffer bound to a slot is larger than the size 1135 * declared in the shader for that slot, implementations are allowed 1136 * to return incorrect data (not necessarily 0) for indices that are 1137 * larger than the declared size but smaller than the buffer size. 1138 */ 1139 if (reg_file != TGSI_FILE_CONSTANT) { 1140 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm, 1141 uint_bld->type, 1142 bld->bld_base.info->file_max[reg_file]); 1143 1144 assert(!uint_bld->type.sign); 1145 index = lp_build_min(uint_bld, index, max_index); 1146 } 1147 1148 return index; 1149 } 1150 1151 static struct lp_build_context * 1152 stype_to_fetch(struct lp_build_tgsi_context * bld_base, 1153 enum tgsi_opcode_type stype) 1154 { 1155 struct lp_build_context *bld_fetch; 1156 1157 switch (stype) { 1158 case TGSI_TYPE_FLOAT: 1159 case TGSI_TYPE_UNTYPED: 1160 bld_fetch = &bld_base->base; 1161 break; 1162 case TGSI_TYPE_UNSIGNED: 1163 bld_fetch = &bld_base->uint_bld; 1164 break; 1165 case TGSI_TYPE_SIGNED: 1166 bld_fetch = &bld_base->int_bld; 1167 break; 1168 case TGSI_TYPE_DOUBLE: 1169 bld_fetch = &bld_base->dbl_bld; 1170 break; 1171 case TGSI_TYPE_UNSIGNED64: 1172 bld_fetch = &bld_base->uint64_bld; 1173 break; 1174 case TGSI_TYPE_SIGNED64: 1175 bld_fetch = &bld_base->int64_bld; 1176 break; 1177 case TGSI_TYPE_VOID: 1178 default: 1179 assert(0); 1180 bld_fetch = NULL; 1181 break; 1182 } 1183 return bld_fetch; 1184 } 1185 1186 static LLVMValueRef 1187 get_soa_array_offsets(struct lp_build_context *uint_bld, 1188 LLVMValueRef indirect_index, 1189 unsigned chan_index, 1190 boolean need_perelement_offset) 1191 { 1192 struct gallivm_state *gallivm = uint_bld->gallivm; 1193 LLVMValueRef chan_vec = 1194 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index); 1195 LLVMValueRef length_vec = 1196 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length); 1197 LLVMValueRef index_vec; 1198 1199 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ 1200 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 1201 index_vec = lp_build_add(uint_bld, index_vec, chan_vec); 1202 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 1203 1204 if (need_perelement_offset) { 1205 LLVMValueRef pixel_offsets; 1206 unsigned i; 1207 /* build pixel offset vector: {0, 1, 2, 3, ...} */ 1208 pixel_offsets = uint_bld->undef; 1209 for (i = 0; i < uint_bld->type.length; i++) { 1210 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 1211 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets, 1212 ii, ii, ""); 1213 } 1214 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); 1215 } 1216 return index_vec; 1217 } 1218 1219 static LLVMValueRef 1220 emit_fetch_constant( 1221 struct lp_build_tgsi_context * bld_base, 1222 const struct tgsi_full_src_register * reg, 1223 enum tgsi_opcode_type stype, 1224 unsigned swizzle) 1225 { 1226 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1227 struct gallivm_state *gallivm = bld_base->base.gallivm; 1228 LLVMBuilderRef builder = gallivm->builder; 1229 struct lp_build_context *uint_bld = &bld_base->uint_bld; 1230 unsigned dimension = 0; 1231 LLVMValueRef consts_ptr; 1232 LLVMValueRef num_consts; 1233 LLVMValueRef res; 1234 1235 /* XXX: Handle fetching xyzw components as a vector */ 1236 assert(swizzle != ~0u); 1237 1238 if (reg->Register.Dimension) { 1239 assert(!reg->Dimension.Indirect); 1240 dimension = reg->Dimension.Index; 1241 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS); 1242 } 1243 1244 consts_ptr = bld->consts[dimension]; 1245 num_consts = bld->consts_sizes[dimension]; 1246 1247 if (reg->Register.Indirect) { 1248 LLVMValueRef indirect_index; 1249 LLVMValueRef swizzle_vec = 1250 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle); 1251 LLVMValueRef index_vec; /* index into the const buffer */ 1252 LLVMValueRef overflow_mask; 1253 LLVMValueRef index_vec2 = NULL; 1254 1255 indirect_index = get_indirect_index(bld, 1256 reg->Register.File, 1257 reg->Register.Index, 1258 ®->Indirect); 1259 1260 /* All fetches are from the same constant buffer, so 1261 * we need to propagate the size to a vector to do a 1262 * vector comparison */ 1263 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts); 1264 /* Construct a boolean vector telling us which channels 1265 * overflow the bound constant buffer */ 1266 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL, 1267 indirect_index, num_consts); 1268 1269 /* index_vec = indirect_index * 4 + swizzle */ 1270 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 1271 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 1272 1273 if (tgsi_type_is_64bit(stype)) { 1274 LLVMValueRef swizzle_vec2; 1275 swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle + 1); 1276 index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2); 1277 index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2); 1278 } 1279 /* Gather values from the constant buffer */ 1280 res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2); 1281 } 1282 else { 1283 LLVMValueRef index; /* index into the const buffer */ 1284 LLVMValueRef scalar, scalar_ptr; 1285 struct lp_build_context *bld_broad = &bld_base->base; 1286 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle); 1287 1288 scalar_ptr = LLVMBuildGEP(builder, consts_ptr, 1289 &index, 1, ""); 1290 if (stype == TGSI_TYPE_DOUBLE) { 1291 LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0); 1292 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, ""); 1293 bld_broad = &bld_base->dbl_bld; 1294 } else if (stype == TGSI_TYPE_UNSIGNED64) { 1295 LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0); 1296 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, ""); 1297 bld_broad = &bld_base->uint64_bld; 1298 } else if (stype == TGSI_TYPE_SIGNED64) { 1299 LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0); 1300 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, ""); 1301 bld_broad = &bld_base->int64_bld; 1302 } 1303 scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 1304 res = lp_build_broadcast_scalar(bld_broad, scalar); 1305 } 1306 1307 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) { 1308 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); 1309 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); 1310 } 1311 1312 return res; 1313 } 1314 1315 /** 1316 * Fetch 64-bit values from two separate channels. 1317 * 64-bit values are stored split across two channels, like xy and zw. 1318 * This function creates a set of 16 floats, 1319 * extracts the values from the two channels, 1320 * puts them in the correct place, then casts to 8 64-bits. 1321 */ 1322 static LLVMValueRef 1323 emit_fetch_64bit( 1324 struct lp_build_tgsi_context * bld_base, 1325 enum tgsi_opcode_type stype, 1326 LLVMValueRef input, 1327 LLVMValueRef input2) 1328 { 1329 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1330 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1331 LLVMBuilderRef builder = gallivm->builder; 1332 LLVMValueRef res; 1333 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); 1334 int i; 1335 LLVMValueRef shuffles[16]; 1336 int len = bld_base->base.type.length * 2; 1337 assert(len <= 16); 1338 1339 for (i = 0; i < bld_base->base.type.length * 2; i+=2) { 1340 shuffles[i] = lp_build_const_int32(gallivm, i / 2); 1341 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length); 1342 } 1343 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), ""); 1344 1345 return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); 1346 } 1347 1348 static LLVMValueRef 1349 emit_fetch_immediate( 1350 struct lp_build_tgsi_context * bld_base, 1351 const struct tgsi_full_src_register * reg, 1352 enum tgsi_opcode_type stype, 1353 unsigned swizzle) 1354 { 1355 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1356 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1357 LLVMBuilderRef builder = gallivm->builder; 1358 LLVMValueRef res = NULL; 1359 1360 if (bld->use_immediates_array || reg->Register.Indirect) { 1361 LLVMValueRef imms_array; 1362 LLVMTypeRef fptr_type; 1363 1364 /* cast imms_array pointer to float* */ 1365 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1366 imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, ""); 1367 1368 if (reg->Register.Indirect) { 1369 LLVMValueRef indirect_index; 1370 LLVMValueRef index_vec; /* index into the immediate register array */ 1371 LLVMValueRef index_vec2 = NULL; 1372 indirect_index = get_indirect_index(bld, 1373 reg->Register.File, 1374 reg->Register.Index, 1375 ®->Indirect); 1376 /* 1377 * Unlike for other reg classes, adding pixel offsets is unnecessary - 1378 * immediates are stored as full vectors (FIXME??? - might be better 1379 * to store them the same as constants) but all elements are the same 1380 * in any case. 1381 */ 1382 index_vec = get_soa_array_offsets(&bld_base->uint_bld, 1383 indirect_index, 1384 swizzle, 1385 FALSE); 1386 if (tgsi_type_is_64bit(stype)) 1387 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld, 1388 indirect_index, 1389 swizzle + 1, 1390 FALSE); 1391 /* Gather values from the immediate register array */ 1392 res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2); 1393 } else { 1394 LLVMValueRef lindex = lp_build_const_int32(gallivm, 1395 reg->Register.Index * 4 + swizzle); 1396 LLVMValueRef imms_ptr = LLVMBuildGEP(builder, 1397 bld->imms_array, &lindex, 1, ""); 1398 res = LLVMBuildLoad(builder, imms_ptr, ""); 1399 1400 if (tgsi_type_is_64bit(stype)) { 1401 LLVMValueRef lindex1; 1402 LLVMValueRef imms_ptr2; 1403 LLVMValueRef res2; 1404 1405 lindex1 = lp_build_const_int32(gallivm, 1406 reg->Register.Index * 4 + swizzle + 1); 1407 imms_ptr2 = LLVMBuildGEP(builder, 1408 bld->imms_array, &lindex1, 1, ""); 1409 res2 = LLVMBuildLoad(builder, imms_ptr2, ""); 1410 res = emit_fetch_64bit(bld_base, stype, res, res2); 1411 } 1412 } 1413 } 1414 else { 1415 res = bld->immediates[reg->Register.Index][swizzle]; 1416 if (tgsi_type_is_64bit(stype)) 1417 res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle + 1]); 1418 } 1419 1420 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) { 1421 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); 1422 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); 1423 } 1424 return res; 1425 } 1426 1427 static LLVMValueRef 1428 emit_fetch_input( 1429 struct lp_build_tgsi_context * bld_base, 1430 const struct tgsi_full_src_register * reg, 1431 enum tgsi_opcode_type stype, 1432 unsigned swizzle) 1433 { 1434 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1435 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1436 LLVMBuilderRef builder = gallivm->builder; 1437 LLVMValueRef res; 1438 1439 if (reg->Register.Indirect) { 1440 LLVMValueRef indirect_index; 1441 LLVMValueRef index_vec; /* index into the input reg array */ 1442 LLVMValueRef index_vec2 = NULL; 1443 LLVMValueRef inputs_array; 1444 LLVMTypeRef fptr_type; 1445 1446 indirect_index = get_indirect_index(bld, 1447 reg->Register.File, 1448 reg->Register.Index, 1449 ®->Indirect); 1450 1451 index_vec = get_soa_array_offsets(&bld_base->uint_bld, 1452 indirect_index, 1453 swizzle, 1454 TRUE); 1455 if (tgsi_type_is_64bit(stype)) { 1456 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld, 1457 indirect_index, 1458 swizzle + 1, 1459 TRUE); 1460 } 1461 /* cast inputs_array pointer to float* */ 1462 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1463 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, ""); 1464 1465 /* Gather values from the input register array */ 1466 res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2); 1467 } else { 1468 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { 1469 LLVMValueRef lindex = lp_build_const_int32(gallivm, 1470 reg->Register.Index * 4 + swizzle); 1471 LLVMValueRef input_ptr = LLVMBuildGEP(builder, 1472 bld->inputs_array, &lindex, 1, ""); 1473 1474 res = LLVMBuildLoad(builder, input_ptr, ""); 1475 if (tgsi_type_is_64bit(stype)) { 1476 LLVMValueRef lindex1; 1477 LLVMValueRef input_ptr2; 1478 LLVMValueRef res2; 1479 1480 lindex1 = lp_build_const_int32(gallivm, 1481 reg->Register.Index * 4 + swizzle + 1); 1482 input_ptr2 = LLVMBuildGEP(builder, 1483 bld->inputs_array, &lindex1, 1, ""); 1484 res2 = LLVMBuildLoad(builder, input_ptr2, ""); 1485 res = emit_fetch_64bit(bld_base, stype, res, res2); 1486 } 1487 } 1488 else { 1489 res = bld->inputs[reg->Register.Index][swizzle]; 1490 if (tgsi_type_is_64bit(stype)) 1491 res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle + 1]); 1492 } 1493 } 1494 1495 assert(res); 1496 1497 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) { 1498 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); 1499 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); 1500 } 1501 1502 return res; 1503 } 1504 1505 1506 static LLVMValueRef 1507 emit_fetch_gs_input( 1508 struct lp_build_tgsi_context * bld_base, 1509 const struct tgsi_full_src_register * reg, 1510 enum tgsi_opcode_type stype, 1511 unsigned swizzle) 1512 { 1513 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1514 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1515 const struct tgsi_shader_info *info = bld->bld_base.info; 1516 LLVMBuilderRef builder = gallivm->builder; 1517 LLVMValueRef attrib_index = NULL; 1518 LLVMValueRef vertex_index = NULL; 1519 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle); 1520 LLVMValueRef res; 1521 1522 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) { 1523 /* This is really a system value not a regular input */ 1524 assert(!reg->Register.Indirect); 1525 assert(!reg->Dimension.Indirect); 1526 res = bld->system_values.prim_id; 1527 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) { 1528 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, ""); 1529 } 1530 return res; 1531 } 1532 1533 if (reg->Register.Indirect) { 1534 attrib_index = get_indirect_index(bld, 1535 reg->Register.File, 1536 reg->Register.Index, 1537 ®->Indirect); 1538 } else { 1539 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index); 1540 } 1541 1542 if (reg->Dimension.Indirect) { 1543 vertex_index = get_indirect_index(bld, 1544 reg->Register.File, 1545 reg->Dimension.Index, 1546 ®->DimIndirect); 1547 } else { 1548 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index); 1549 } 1550 1551 res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base, 1552 reg->Dimension.Indirect, 1553 vertex_index, 1554 reg->Register.Indirect, 1555 attrib_index, 1556 swizzle_index); 1557 1558 assert(res); 1559 if (tgsi_type_is_64bit(stype)) { 1560 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle + 1); 1561 LLVMValueRef res2; 1562 res2 = bld->gs_iface->fetch_input(bld->gs_iface, bld_base, 1563 reg->Dimension.Indirect, 1564 vertex_index, 1565 reg->Register.Indirect, 1566 attrib_index, 1567 swizzle_index); 1568 assert(res2); 1569 res = emit_fetch_64bit(bld_base, stype, res, res2); 1570 } else if (stype == TGSI_TYPE_UNSIGNED) { 1571 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, ""); 1572 } else if (stype == TGSI_TYPE_SIGNED) { 1573 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, ""); 1574 } 1575 1576 return res; 1577 } 1578 1579 static LLVMValueRef 1580 emit_fetch_temporary( 1581 struct lp_build_tgsi_context * bld_base, 1582 const struct tgsi_full_src_register * reg, 1583 enum tgsi_opcode_type stype, 1584 unsigned swizzle) 1585 { 1586 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1587 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1588 LLVMBuilderRef builder = gallivm->builder; 1589 LLVMValueRef res; 1590 1591 if (reg->Register.Indirect) { 1592 LLVMValueRef indirect_index; 1593 LLVMValueRef index_vec, index_vec2 = NULL; /* index into the temp reg array */ 1594 LLVMValueRef temps_array; 1595 LLVMTypeRef fptr_type; 1596 1597 indirect_index = get_indirect_index(bld, 1598 reg->Register.File, 1599 reg->Register.Index, 1600 ®->Indirect); 1601 1602 index_vec = get_soa_array_offsets(&bld_base->uint_bld, 1603 indirect_index, 1604 swizzle, 1605 TRUE); 1606 if (tgsi_type_is_64bit(stype)) { 1607 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld, 1608 indirect_index, 1609 swizzle + 1, 1610 TRUE); 1611 } 1612 1613 /* cast temps_array pointer to float* */ 1614 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1615 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, ""); 1616 1617 /* Gather values from the temporary register array */ 1618 res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2); 1619 } 1620 else { 1621 LLVMValueRef temp_ptr; 1622 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle); 1623 res = LLVMBuildLoad(builder, temp_ptr, ""); 1624 1625 if (tgsi_type_is_64bit(stype)) { 1626 LLVMValueRef temp_ptr2, res2; 1627 1628 temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle + 1); 1629 res2 = LLVMBuildLoad(builder, temp_ptr2, ""); 1630 res = emit_fetch_64bit(bld_base, stype, res, res2); 1631 } 1632 } 1633 1634 if (stype == TGSI_TYPE_SIGNED || 1635 stype == TGSI_TYPE_UNSIGNED || 1636 stype == TGSI_TYPE_DOUBLE || 1637 stype == TGSI_TYPE_SIGNED64 || 1638 stype == TGSI_TYPE_UNSIGNED64) { 1639 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); 1640 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); 1641 } 1642 1643 return res; 1644 } 1645 1646 static LLVMValueRef 1647 emit_fetch_system_value( 1648 struct lp_build_tgsi_context * bld_base, 1649 const struct tgsi_full_src_register * reg, 1650 enum tgsi_opcode_type stype, 1651 unsigned swizzle) 1652 { 1653 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1654 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1655 const struct tgsi_shader_info *info = bld->bld_base.info; 1656 LLVMBuilderRef builder = gallivm->builder; 1657 LLVMValueRef res; 1658 enum tgsi_opcode_type atype; // Actual type of the value 1659 1660 assert(!reg->Register.Indirect); 1661 1662 switch (info->system_value_semantic_name[reg->Register.Index]) { 1663 case TGSI_SEMANTIC_INSTANCEID: 1664 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id); 1665 atype = TGSI_TYPE_UNSIGNED; 1666 break; 1667 1668 case TGSI_SEMANTIC_VERTEXID: 1669 res = bld->system_values.vertex_id; 1670 atype = TGSI_TYPE_UNSIGNED; 1671 break; 1672 1673 case TGSI_SEMANTIC_VERTEXID_NOBASE: 1674 res = bld->system_values.vertex_id_nobase; 1675 atype = TGSI_TYPE_UNSIGNED; 1676 break; 1677 1678 case TGSI_SEMANTIC_BASEVERTEX: 1679 res = bld->system_values.basevertex; 1680 atype = TGSI_TYPE_UNSIGNED; 1681 break; 1682 1683 case TGSI_SEMANTIC_PRIMID: 1684 res = bld->system_values.prim_id; 1685 atype = TGSI_TYPE_UNSIGNED; 1686 break; 1687 1688 case TGSI_SEMANTIC_INVOCATIONID: 1689 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id); 1690 atype = TGSI_TYPE_UNSIGNED; 1691 break; 1692 1693 default: 1694 assert(!"unexpected semantic in emit_fetch_system_value"); 1695 res = bld_base->base.zero; 1696 atype = TGSI_TYPE_FLOAT; 1697 break; 1698 } 1699 1700 if (atype != stype) { 1701 if (stype == TGSI_TYPE_FLOAT) { 1702 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, ""); 1703 } else if (stype == TGSI_TYPE_UNSIGNED) { 1704 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, ""); 1705 } else if (stype == TGSI_TYPE_SIGNED) { 1706 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, ""); 1707 } 1708 } 1709 1710 return res; 1711 } 1712 1713 /** 1714 * Register fetch with derivatives. 1715 */ 1716 static void 1717 emit_fetch_deriv( 1718 struct lp_build_tgsi_soa_context *bld, 1719 LLVMValueRef src, 1720 LLVMValueRef *res, 1721 LLVMValueRef *ddx, 1722 LLVMValueRef *ddy) 1723 { 1724 if (res) 1725 *res = src; 1726 1727 /* TODO: use interpolation coeffs for inputs */ 1728 1729 if (ddx) 1730 *ddx = lp_build_ddx(&bld->bld_base.base, src); 1731 1732 if (ddy) 1733 *ddy = lp_build_ddy(&bld->bld_base.base, src); 1734 } 1735 1736 1737 /** 1738 * Predicate. 1739 */ 1740 static void 1741 emit_fetch_predicate( 1742 struct lp_build_tgsi_soa_context *bld, 1743 const struct tgsi_full_instruction *inst, 1744 LLVMValueRef *pred) 1745 { 1746 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 1747 unsigned index; 1748 unsigned char swizzles[4]; 1749 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL}; 1750 LLVMValueRef value; 1751 unsigned chan; 1752 1753 if (!inst->Instruction.Predicate) { 1754 TGSI_FOR_EACH_CHANNEL( chan ) { 1755 pred[chan] = NULL; 1756 } 1757 return; 1758 } 1759 1760 swizzles[0] = inst->Predicate.SwizzleX; 1761 swizzles[1] = inst->Predicate.SwizzleY; 1762 swizzles[2] = inst->Predicate.SwizzleZ; 1763 swizzles[3] = inst->Predicate.SwizzleW; 1764 1765 index = inst->Predicate.Index; 1766 assert(index < LP_MAX_TGSI_PREDS); 1767 1768 TGSI_FOR_EACH_CHANNEL( chan ) { 1769 unsigned swizzle = swizzles[chan]; 1770 1771 /* 1772 * Only fetch the predicate register channels that are actually listed 1773 * in the swizzles 1774 */ 1775 if (!unswizzled[swizzle]) { 1776 value = LLVMBuildLoad(builder, 1777 bld->preds[index][swizzle], ""); 1778 1779 /* 1780 * Convert the value to an integer mask. 1781 * 1782 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions 1783 * is needlessly causing two comparisons due to storing the intermediate 1784 * result as float vector instead of an integer mask vector. 1785 */ 1786 value = lp_build_compare(bld->bld_base.base.gallivm, 1787 bld->bld_base.base.type, 1788 PIPE_FUNC_NOTEQUAL, 1789 value, 1790 bld->bld_base.base.zero); 1791 if (inst->Predicate.Negate) { 1792 value = LLVMBuildNot(builder, value, ""); 1793 } 1794 1795 unswizzled[swizzle] = value; 1796 } else { 1797 value = unswizzled[swizzle]; 1798 } 1799 1800 pred[chan] = value; 1801 } 1802 } 1803 1804 /** 1805 * store an array of 8 64-bit into two arrays of 8 floats 1806 * i.e. 1807 * value is d0, d1, d2, d3 etc. 1808 * each 64-bit has high and low pieces x, y 1809 * so gets stored into the separate channels as: 1810 * chan_ptr = d0.x, d1.x, d2.x, d3.x 1811 * chan_ptr2 = d0.y, d1.y, d2.y, d3.y 1812 */ 1813 static void 1814 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base, 1815 LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2, 1816 LLVMValueRef pred, 1817 LLVMValueRef value) 1818 { 1819 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1820 struct gallivm_state *gallivm = bld_base->base.gallivm; 1821 LLVMBuilderRef builder = gallivm->builder; 1822 struct lp_build_context *float_bld = &bld_base->base; 1823 unsigned i; 1824 LLVMValueRef temp, temp2; 1825 LLVMValueRef shuffles[8]; 1826 LLVMValueRef shuffles2[8]; 1827 1828 for (i = 0; i < bld_base->base.type.length; i++) { 1829 shuffles[i] = lp_build_const_int32(gallivm, i * 2); 1830 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1); 1831 } 1832 1833 temp = LLVMBuildShuffleVector(builder, value, 1834 LLVMGetUndef(LLVMTypeOf(value)), 1835 LLVMConstVector(shuffles, 1836 bld_base->base.type.length), 1837 ""); 1838 temp2 = LLVMBuildShuffleVector(builder, value, 1839 LLVMGetUndef(LLVMTypeOf(value)), 1840 LLVMConstVector(shuffles2, 1841 bld_base->base.type.length), 1842 ""); 1843 1844 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, temp, chan_ptr); 1845 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, temp2, chan_ptr2); 1846 } 1847 1848 /** 1849 * Register store. 1850 */ 1851 static void 1852 emit_store_chan( 1853 struct lp_build_tgsi_context *bld_base, 1854 const struct tgsi_full_instruction *inst, 1855 unsigned index, 1856 unsigned chan_index, 1857 LLVMValueRef pred, 1858 LLVMValueRef value) 1859 { 1860 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1861 struct gallivm_state *gallivm = bld_base->base.gallivm; 1862 LLVMBuilderRef builder = gallivm->builder; 1863 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 1864 struct lp_build_context *float_bld = &bld_base->base; 1865 struct lp_build_context *int_bld = &bld_base->int_bld; 1866 LLVMValueRef indirect_index = NULL; 1867 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode); 1868 1869 /* 1870 * Apply saturation. 1871 * 1872 * It is always assumed to be float. 1873 */ 1874 if (inst->Instruction.Saturate) { 1875 assert(dtype == TGSI_TYPE_FLOAT || 1876 dtype == TGSI_TYPE_UNTYPED); 1877 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); 1878 value = lp_build_clamp_zero_one_nanzero(float_bld, value); 1879 } 1880 1881 if (reg->Register.Indirect) { 1882 /* 1883 * Currently the mesa/st doesn't generate indirect stores 1884 * to 64-bit values, it normally uses MOV to do indirect stores. 1885 */ 1886 assert(!tgsi_type_is_64bit(dtype)); 1887 indirect_index = get_indirect_index(bld, 1888 reg->Register.File, 1889 reg->Register.Index, 1890 ®->Indirect); 1891 } else { 1892 assert(reg->Register.Index <= 1893 bld_base->info->file_max[reg->Register.File]); 1894 } 1895 1896 if (DEBUG_EXECUTION) { 1897 emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value); 1898 } 1899 1900 switch( reg->Register.File ) { 1901 case TGSI_FILE_OUTPUT: 1902 /* Outputs are always stored as floats */ 1903 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); 1904 1905 if (reg->Register.Indirect) { 1906 LLVMValueRef index_vec; /* indexes into the output registers */ 1907 LLVMValueRef outputs_array; 1908 LLVMTypeRef fptr_type; 1909 1910 index_vec = get_soa_array_offsets(&bld_base->uint_bld, 1911 indirect_index, 1912 chan_index, 1913 TRUE); 1914 1915 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1916 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, ""); 1917 1918 /* Scatter store values into output registers */ 1919 emit_mask_scatter(bld, outputs_array, index_vec, value, 1920 &bld->exec_mask, pred); 1921 } 1922 else { 1923 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index, 1924 chan_index); 1925 1926 if (tgsi_type_is_64bit(dtype)) { 1927 LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index, 1928 chan_index + 1); 1929 emit_store_64bit_chan(bld_base, out_ptr, out_ptr2, 1930 pred, value); 1931 } else 1932 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, out_ptr); 1933 } 1934 break; 1935 1936 case TGSI_FILE_TEMPORARY: 1937 /* Temporaries are always stored as floats */ 1938 if (!tgsi_type_is_64bit(dtype)) 1939 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); 1940 else 1941 value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), ""); 1942 1943 if (reg->Register.Indirect) { 1944 LLVMValueRef index_vec; /* indexes into the temp registers */ 1945 LLVMValueRef temps_array; 1946 LLVMTypeRef fptr_type; 1947 1948 index_vec = get_soa_array_offsets(&bld_base->uint_bld, 1949 indirect_index, 1950 chan_index, 1951 TRUE); 1952 1953 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1954 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, ""); 1955 1956 /* Scatter store values into temp registers */ 1957 emit_mask_scatter(bld, temps_array, index_vec, value, 1958 &bld->exec_mask, pred); 1959 } 1960 else { 1961 LLVMValueRef temp_ptr; 1962 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index); 1963 1964 if (tgsi_type_is_64bit(dtype)) { 1965 LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld, 1966 reg->Register.Index, 1967 chan_index + 1); 1968 emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2, 1969 pred, value); 1970 } 1971 else 1972 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, temp_ptr); 1973 } 1974 break; 1975 1976 case TGSI_FILE_ADDRESS: 1977 assert(dtype == TGSI_TYPE_SIGNED); 1978 assert(LLVMTypeOf(value) == int_bld->vec_type); 1979 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, ""); 1980 lp_exec_mask_store(&bld->exec_mask, int_bld, pred, value, 1981 bld->addr[reg->Register.Index][chan_index]); 1982 break; 1983 1984 case TGSI_FILE_PREDICATE: 1985 assert(LLVMTypeOf(value) == float_bld->vec_type); 1986 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); 1987 lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, 1988 bld->preds[reg->Register.Index][chan_index]); 1989 break; 1990 1991 default: 1992 assert( 0 ); 1993 } 1994 1995 (void)dtype; 1996 } 1997 1998 /* 1999 * Called at the beginning of the translation of each TGSI instruction, to 2000 * emit some debug code. 2001 */ 2002 static void 2003 emit_debug( 2004 struct lp_build_tgsi_context * bld_base, 2005 const struct tgsi_full_instruction * inst, 2006 const struct tgsi_opcode_info * info) 2007 2008 { 2009 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 2010 2011 if (DEBUG_EXECUTION) { 2012 /* 2013 * Dump the TGSI instruction. 2014 */ 2015 2016 struct gallivm_state *gallivm = bld_base->base.gallivm; 2017 char buf[512]; 2018 buf[0] = '$'; 2019 buf[1] = ' '; 2020 tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2); 2021 lp_build_printf(gallivm, buf); 2022 2023 /* Dump the execution mask. 2024 */ 2025 if (bld->exec_mask.has_mask) { 2026 lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask); 2027 } 2028 } 2029 } 2030 2031 static void 2032 emit_store( 2033 struct lp_build_tgsi_context * bld_base, 2034 const struct tgsi_full_instruction * inst, 2035 const struct tgsi_opcode_info * info, 2036 LLVMValueRef dst[4]) 2037 2038 { 2039 unsigned chan_index; 2040 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 2041 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode); 2042 if(info->num_dst) { 2043 LLVMValueRef pred[TGSI_NUM_CHANNELS]; 2044 2045 emit_fetch_predicate( bld, inst, pred ); 2046 2047 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 2048 2049 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3)) 2050 continue; 2051 emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]); 2052 } 2053 } 2054 } 2055 2056 static unsigned 2057 tgsi_to_pipe_tex_target(unsigned tgsi_target) 2058 { 2059 switch (tgsi_target) { 2060 case TGSI_TEXTURE_BUFFER: 2061 return PIPE_BUFFER; 2062 case TGSI_TEXTURE_1D: 2063 case TGSI_TEXTURE_SHADOW1D: 2064 return PIPE_TEXTURE_1D; 2065 case TGSI_TEXTURE_2D: 2066 case TGSI_TEXTURE_SHADOW2D: 2067 case TGSI_TEXTURE_2D_MSAA: 2068 return PIPE_TEXTURE_2D; 2069 case TGSI_TEXTURE_3D: 2070 return PIPE_TEXTURE_3D; 2071 case TGSI_TEXTURE_CUBE: 2072 case TGSI_TEXTURE_SHADOWCUBE: 2073 return PIPE_TEXTURE_CUBE; 2074 case TGSI_TEXTURE_RECT: 2075 case TGSI_TEXTURE_SHADOWRECT: 2076 return PIPE_TEXTURE_RECT; 2077 case TGSI_TEXTURE_1D_ARRAY: 2078 case TGSI_TEXTURE_SHADOW1D_ARRAY: 2079 return PIPE_TEXTURE_1D_ARRAY; 2080 case TGSI_TEXTURE_2D_ARRAY: 2081 case TGSI_TEXTURE_SHADOW2D_ARRAY: 2082 case TGSI_TEXTURE_2D_ARRAY_MSAA: 2083 return PIPE_TEXTURE_2D_ARRAY; 2084 case TGSI_TEXTURE_CUBE_ARRAY: 2085 case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 2086 return PIPE_TEXTURE_CUBE_ARRAY; 2087 default: 2088 assert(0); 2089 return PIPE_BUFFER; 2090 } 2091 } 2092 2093 2094 static enum lp_sampler_lod_property 2095 lp_build_lod_property( 2096 struct lp_build_tgsi_context *bld_base, 2097 const struct tgsi_full_instruction *inst, 2098 unsigned src_op) 2099 { 2100 const struct tgsi_full_src_register *reg = &inst->Src[src_op]; 2101 enum lp_sampler_lod_property lod_property; 2102 2103 /* 2104 * Not much we can do here. We could try catching inputs declared 2105 * with constant interpolation but not sure it's worth it - since for 2106 * TEX opcodes as well as FETCH/LD the lod comes from same reg as 2107 * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just 2108 * like the constant/immediate recognition below. 2109 * What seems to be of more value would be to recognize temps holding 2110 * broadcasted scalars but no way we can do it. 2111 * Tried asking llvm but without any success (using LLVMIsConstant 2112 * even though this isn't exactly what we'd need), even as simple as 2113 * IMM[0] UINT32 (0,-1,0,0) 2114 * MOV TEMP[0] IMM[0].yyyy 2115 * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0] 2116 * doesn't work. 2117 * This means there's ZERO chance this will ever catch a scalar lod 2118 * with traditional tex opcodes as well as texel fetches, since the lod 2119 * comes from the same reg as coords (except some test shaders using 2120 * constant coords maybe). 2121 * There's at least hope for sample opcodes as well as size queries. 2122 */ 2123 if (reg->Register.File == TGSI_FILE_CONSTANT || 2124 reg->Register.File == TGSI_FILE_IMMEDIATE) { 2125 lod_property = LP_SAMPLER_LOD_SCALAR; 2126 } 2127 else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) { 2128 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) { 2129 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2130 } 2131 else { 2132 lod_property = LP_SAMPLER_LOD_PER_QUAD; 2133 } 2134 } 2135 else { 2136 /* never use scalar (per-quad) lod the results are just too wrong. */ 2137 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2138 } 2139 return lod_property; 2140 } 2141 2142 2143 /** 2144 * High-level instruction translators. 2145 */ 2146 2147 static void 2148 emit_tex( struct lp_build_tgsi_soa_context *bld, 2149 const struct tgsi_full_instruction *inst, 2150 enum lp_build_tex_modifier modifier, 2151 LLVMValueRef *texel, 2152 unsigned sampler_reg, 2153 enum lp_sampler_op_type sampler_op) 2154 { 2155 unsigned unit = inst->Src[sampler_reg].Register.Index; 2156 LLVMValueRef oow = NULL; 2157 LLVMValueRef lod = NULL; 2158 LLVMValueRef coords[5]; 2159 LLVMValueRef offsets[3] = { NULL }; 2160 struct lp_derivatives derivs; 2161 struct lp_sampler_params params; 2162 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR; 2163 unsigned num_derivs, num_offsets, i; 2164 unsigned shadow_coord = 0; 2165 unsigned layer_coord = 0; 2166 unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT; 2167 2168 memset(¶ms, 0, sizeof(params)); 2169 2170 if (!bld->sampler) { 2171 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 2172 for (i = 0; i < 4; i++) { 2173 texel[i] = bld->bld_base.base.undef; 2174 } 2175 return; 2176 } 2177 2178 switch (inst->Texture.Texture) { 2179 case TGSI_TEXTURE_1D_ARRAY: 2180 layer_coord = 1; 2181 /* fallthrough */ 2182 case TGSI_TEXTURE_1D: 2183 num_offsets = 1; 2184 num_derivs = 1; 2185 break; 2186 case TGSI_TEXTURE_2D_ARRAY: 2187 layer_coord = 2; 2188 /* fallthrough */ 2189 case TGSI_TEXTURE_2D: 2190 case TGSI_TEXTURE_RECT: 2191 num_offsets = 2; 2192 num_derivs = 2; 2193 break; 2194 case TGSI_TEXTURE_SHADOW1D_ARRAY: 2195 layer_coord = 1; 2196 /* fallthrough */ 2197 case TGSI_TEXTURE_SHADOW1D: 2198 shadow_coord = 2; 2199 num_offsets = 1; 2200 num_derivs = 1; 2201 break; 2202 case TGSI_TEXTURE_SHADOW2D_ARRAY: 2203 layer_coord = 2; 2204 shadow_coord = 3; 2205 num_offsets = 2; 2206 num_derivs = 2; 2207 break; 2208 case TGSI_TEXTURE_SHADOW2D: 2209 case TGSI_TEXTURE_SHADOWRECT: 2210 shadow_coord = 2; 2211 num_offsets = 2; 2212 num_derivs = 2; 2213 break; 2214 case TGSI_TEXTURE_CUBE: 2215 num_offsets = 2; 2216 num_derivs = 3; 2217 break; 2218 case TGSI_TEXTURE_3D: 2219 num_offsets = 3; 2220 num_derivs = 3; 2221 break; 2222 case TGSI_TEXTURE_SHADOWCUBE: 2223 shadow_coord = 3; 2224 num_offsets = 2; 2225 num_derivs = 3; 2226 break; 2227 case TGSI_TEXTURE_CUBE_ARRAY: 2228 num_offsets = 2; 2229 num_derivs = 3; 2230 layer_coord = 3; 2231 break; 2232 case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 2233 num_offsets = 2; 2234 num_derivs = 3; 2235 layer_coord = 3; 2236 shadow_coord = 4; /* shadow coord special different reg */ 2237 break; 2238 case TGSI_TEXTURE_2D_MSAA: 2239 case TGSI_TEXTURE_2D_ARRAY_MSAA: 2240 default: 2241 assert(0); 2242 return; 2243 } 2244 2245 /* Note lod and especially projected are illegal in a LOT of cases */ 2246 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS || 2247 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 2248 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 2249 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) { 2250 /* note that shadow cube array with bias/explicit lod does not exist */ 2251 lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0); 2252 } 2253 else { 2254 lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3); 2255 } 2256 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { 2257 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT; 2258 } 2259 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 2260 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT; 2261 } 2262 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0); 2263 } 2264 2265 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) { 2266 oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3); 2267 oow = lp_build_rcp(&bld->bld_base.base, oow); 2268 } 2269 2270 for (i = 0; i < num_derivs; i++) { 2271 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i); 2272 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) 2273 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow); 2274 } 2275 for (i = num_derivs; i < 5; i++) { 2276 coords[i] = bld->bld_base.base.undef; 2277 } 2278 2279 /* Layer coord always goes into 3rd slot, except for cube map arrays */ 2280 if (layer_coord) { 2281 if (layer_coord == 3) { 2282 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); 2283 } 2284 else { 2285 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); 2286 } 2287 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) 2288 coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow); 2289 } 2290 /* Shadow coord occupies always 5th slot. */ 2291 if (shadow_coord) { 2292 sample_key |= LP_SAMPLER_SHADOW; 2293 if (shadow_coord == 4) { 2294 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0); 2295 } 2296 else { 2297 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord); 2298 } 2299 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) 2300 coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow); 2301 } 2302 2303 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 2304 unsigned dim; 2305 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT; 2306 for (dim = 0; dim < num_derivs; ++dim) { 2307 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim); 2308 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim); 2309 } 2310 params.derivs = &derivs; 2311 /* 2312 * could also check all src regs if constant but I doubt such 2313 * cases exist in practice. 2314 */ 2315 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) { 2316 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) { 2317 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2318 } 2319 else { 2320 lod_property = LP_SAMPLER_LOD_PER_QUAD; 2321 } 2322 } 2323 else { 2324 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2325 } 2326 } 2327 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT; 2328 2329 /* we don't handle the 4 offset version of tg4 */ 2330 if (inst->Texture.NumOffsets == 1) { 2331 unsigned dim; 2332 sample_key |= LP_SAMPLER_OFFSETS; 2333 for (dim = 0; dim < num_offsets; dim++) { 2334 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim); 2335 } 2336 } 2337 2338 params.type = bld->bld_base.base.type; 2339 params.sample_key = sample_key; 2340 params.texture_index = unit; 2341 params.sampler_index = unit; 2342 params.context_ptr = bld->context_ptr; 2343 params.thread_data_ptr = bld->thread_data_ptr; 2344 params.coords = coords; 2345 params.offsets = offsets; 2346 params.lod = lod; 2347 params.texel = texel; 2348 2349 bld->sampler->emit_tex_sample(bld->sampler, 2350 bld->bld_base.base.gallivm, 2351 ¶ms); 2352 } 2353 2354 static void 2355 emit_sample(struct lp_build_tgsi_soa_context *bld, 2356 const struct tgsi_full_instruction *inst, 2357 enum lp_build_tex_modifier modifier, 2358 boolean compare, 2359 LLVMValueRef *texel) 2360 { 2361 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 2362 unsigned texture_unit, sampler_unit; 2363 LLVMValueRef lod = NULL; 2364 LLVMValueRef coords[5]; 2365 LLVMValueRef offsets[3] = { NULL }; 2366 struct lp_derivatives derivs; 2367 struct lp_sampler_params params; 2368 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR; 2369 2370 unsigned num_offsets, num_derivs, i; 2371 unsigned layer_coord = 0; 2372 unsigned sample_key = LP_SAMPLER_OP_TEXTURE << LP_SAMPLER_OP_TYPE_SHIFT; 2373 2374 memset(¶ms, 0, sizeof(params)); 2375 2376 if (!bld->sampler) { 2377 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 2378 for (i = 0; i < 4; i++) { 2379 texel[i] = bld->bld_base.base.undef; 2380 } 2381 return; 2382 } 2383 2384 /* 2385 * unlike old-style tex opcodes the texture/sampler indices 2386 * always come from src1 and src2 respectively. 2387 */ 2388 texture_unit = inst->Src[1].Register.Index; 2389 sampler_unit = inst->Src[2].Register.Index; 2390 2391 /* 2392 * Note inst->Texture.Texture will contain the number of offsets, 2393 * however the target information is NOT there and comes from the 2394 * declared sampler views instead. 2395 */ 2396 switch (bld->sv[texture_unit].Resource) { 2397 case TGSI_TEXTURE_1D: 2398 num_offsets = 1; 2399 num_derivs = 1; 2400 break; 2401 case TGSI_TEXTURE_1D_ARRAY: 2402 layer_coord = 1; 2403 num_offsets = 1; 2404 num_derivs = 1; 2405 break; 2406 case TGSI_TEXTURE_2D: 2407 case TGSI_TEXTURE_RECT: 2408 num_offsets = 2; 2409 num_derivs = 2; 2410 break; 2411 case TGSI_TEXTURE_2D_ARRAY: 2412 layer_coord = 2; 2413 num_offsets = 2; 2414 num_derivs = 2; 2415 break; 2416 case TGSI_TEXTURE_CUBE: 2417 num_offsets = 2; 2418 num_derivs = 3; 2419 break; 2420 case TGSI_TEXTURE_3D: 2421 num_offsets = 3; 2422 num_derivs = 3; 2423 break; 2424 case TGSI_TEXTURE_CUBE_ARRAY: 2425 layer_coord = 3; 2426 num_offsets = 2; 2427 num_derivs = 3; 2428 break; 2429 default: 2430 assert(0); 2431 return; 2432 } 2433 2434 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS || 2435 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 2436 lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0); 2437 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { 2438 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT; 2439 } 2440 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 2441 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT; 2442 } 2443 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0); 2444 } 2445 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) { 2446 /* XXX might be better to explicitly pass the level zero information */ 2447 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT; 2448 lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F); 2449 } 2450 2451 for (i = 0; i < num_derivs; i++) { 2452 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i); 2453 } 2454 for (i = num_derivs; i < 5; i++) { 2455 coords[i] = bld->bld_base.base.undef; 2456 } 2457 2458 /* Layer coord always goes into 3rd slot, except for cube map arrays */ 2459 if (layer_coord) { 2460 if (layer_coord == 3) 2461 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); 2462 else 2463 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); 2464 } 2465 /* Shadow coord occupies always 5th slot. */ 2466 if (compare) { 2467 sample_key |= LP_SAMPLER_SHADOW; 2468 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0); 2469 } 2470 2471 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 2472 unsigned dim; 2473 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT; 2474 for (dim = 0; dim < num_derivs; ++dim) { 2475 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim); 2476 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim); 2477 } 2478 params.derivs = &derivs; 2479 /* 2480 * could also check all src regs if constant but I doubt such 2481 * cases exist in practice. 2482 */ 2483 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) { 2484 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) { 2485 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2486 } 2487 else { 2488 lod_property = LP_SAMPLER_LOD_PER_QUAD; 2489 } 2490 } 2491 else { 2492 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2493 } 2494 } 2495 2496 /* some advanced gather instructions (txgo) would require 4 offsets */ 2497 if (inst->Texture.NumOffsets == 1) { 2498 unsigned dim; 2499 sample_key |= LP_SAMPLER_OFFSETS; 2500 for (dim = 0; dim < num_offsets; dim++) { 2501 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim); 2502 } 2503 } 2504 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT; 2505 2506 params.type = bld->bld_base.base.type; 2507 params.sample_key = sample_key; 2508 params.texture_index = texture_unit; 2509 params.sampler_index = sampler_unit; 2510 params.context_ptr = bld->context_ptr; 2511 params.thread_data_ptr = bld->thread_data_ptr; 2512 params.coords = coords; 2513 params.offsets = offsets; 2514 params.lod = lod; 2515 params.texel = texel; 2516 2517 bld->sampler->emit_tex_sample(bld->sampler, 2518 bld->bld_base.base.gallivm, 2519 ¶ms); 2520 2521 if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X || 2522 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y || 2523 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z || 2524 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) { 2525 unsigned char swizzles[4]; 2526 swizzles[0] = inst->Src[1].Register.SwizzleX; 2527 swizzles[1] = inst->Src[1].Register.SwizzleY; 2528 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2529 swizzles[3] = inst->Src[1].Register.SwizzleW; 2530 2531 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles); 2532 } 2533 } 2534 2535 static void 2536 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld, 2537 const struct tgsi_full_instruction *inst, 2538 LLVMValueRef *texel, 2539 boolean is_samplei) 2540 { 2541 unsigned unit, target; 2542 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type); 2543 LLVMValueRef explicit_lod = NULL; 2544 LLVMValueRef coords[5]; 2545 LLVMValueRef offsets[3] = { NULL }; 2546 struct lp_sampler_params params; 2547 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR; 2548 unsigned dims, i; 2549 unsigned layer_coord = 0; 2550 unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT; 2551 2552 memset(¶ms, 0, sizeof(params)); 2553 2554 if (!bld->sampler) { 2555 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 2556 for (i = 0; i < 4; i++) { 2557 texel[i] = coord_undef; 2558 } 2559 return; 2560 } 2561 2562 unit = inst->Src[1].Register.Index; 2563 2564 if (is_samplei) { 2565 target = bld->sv[unit].Resource; 2566 } 2567 else { 2568 target = inst->Texture.Texture; 2569 } 2570 2571 switch (target) { 2572 case TGSI_TEXTURE_1D: 2573 case TGSI_TEXTURE_BUFFER: 2574 dims = 1; 2575 break; 2576 case TGSI_TEXTURE_1D_ARRAY: 2577 layer_coord = 1; 2578 dims = 1; 2579 break; 2580 case TGSI_TEXTURE_2D: 2581 case TGSI_TEXTURE_RECT: 2582 case TGSI_TEXTURE_2D_MSAA: 2583 dims = 2; 2584 break; 2585 case TGSI_TEXTURE_2D_ARRAY: 2586 case TGSI_TEXTURE_2D_ARRAY_MSAA: 2587 layer_coord = 2; 2588 dims = 2; 2589 break; 2590 case TGSI_TEXTURE_3D: 2591 dims = 3; 2592 break; 2593 default: 2594 assert(0); 2595 return; 2596 } 2597 2598 /* always have lod except for buffers and msaa targets ? */ 2599 if (target != TGSI_TEXTURE_BUFFER && 2600 target != TGSI_TEXTURE_2D_MSAA && 2601 target != TGSI_TEXTURE_2D_ARRAY_MSAA) { 2602 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT; 2603 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3); 2604 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0); 2605 } 2606 /* 2607 * XXX: for real msaa support, the w component (or src2.x for sample_i_ms) 2608 * would be the sample index. 2609 */ 2610 2611 for (i = 0; i < dims; i++) { 2612 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i); 2613 } 2614 /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */ 2615 for (i = dims; i < 5; i++) { 2616 coords[i] = coord_undef; 2617 } 2618 if (layer_coord) 2619 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); 2620 2621 if (inst->Texture.NumOffsets == 1) { 2622 unsigned dim; 2623 sample_key |= LP_SAMPLER_OFFSETS; 2624 for (dim = 0; dim < dims; dim++) { 2625 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim); 2626 } 2627 } 2628 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT; 2629 2630 params.type = bld->bld_base.base.type; 2631 params.sample_key = sample_key; 2632 params.texture_index = unit; 2633 /* 2634 * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS 2635 * and trigger some assertions with d3d10 where the sampler view number 2636 * can exceed this. 2637 */ 2638 params.sampler_index = 0; 2639 params.context_ptr = bld->context_ptr; 2640 params.thread_data_ptr = bld->thread_data_ptr; 2641 params.coords = coords; 2642 params.offsets = offsets; 2643 params.derivs = NULL; 2644 params.lod = explicit_lod; 2645 params.texel = texel; 2646 2647 bld->sampler->emit_tex_sample(bld->sampler, 2648 bld->bld_base.base.gallivm, 2649 ¶ms); 2650 2651 if (is_samplei && 2652 (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X || 2653 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y || 2654 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z || 2655 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) { 2656 unsigned char swizzles[4]; 2657 swizzles[0] = inst->Src[1].Register.SwizzleX; 2658 swizzles[1] = inst->Src[1].Register.SwizzleY; 2659 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2660 swizzles[3] = inst->Src[1].Register.SwizzleW; 2661 2662 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles); 2663 } 2664 } 2665 2666 static void 2667 emit_size_query( struct lp_build_tgsi_soa_context *bld, 2668 const struct tgsi_full_instruction *inst, 2669 LLVMValueRef *sizes_out, 2670 boolean is_sviewinfo) 2671 { 2672 LLVMValueRef explicit_lod; 2673 enum lp_sampler_lod_property lod_property; 2674 unsigned has_lod; 2675 unsigned i; 2676 unsigned unit = inst->Src[1].Register.Index; 2677 unsigned target, pipe_target; 2678 struct lp_sampler_size_query_params params; 2679 2680 if (is_sviewinfo) { 2681 target = bld->sv[unit].Resource; 2682 } 2683 else { 2684 target = inst->Texture.Texture; 2685 } 2686 switch (target) { 2687 case TGSI_TEXTURE_BUFFER: 2688 case TGSI_TEXTURE_RECT: 2689 case TGSI_TEXTURE_SHADOWRECT: 2690 has_lod = 0; 2691 break; 2692 default: 2693 has_lod = 1; 2694 break; 2695 } 2696 2697 if (!bld->sampler) { 2698 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n"); 2699 for (i = 0; i < 4; i++) 2700 sizes_out[i] = bld->bld_base.int_bld.undef; 2701 return; 2702 } 2703 2704 if (has_lod) { 2705 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0); 2706 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0); 2707 } 2708 else { 2709 explicit_lod = NULL; 2710 lod_property = LP_SAMPLER_LOD_SCALAR; 2711 } 2712 2713 2714 pipe_target = tgsi_to_pipe_tex_target(target); 2715 2716 params.int_type = bld->bld_base.int_bld.type; 2717 params.texture_unit = unit; 2718 params.target = pipe_target; 2719 params.context_ptr = bld->context_ptr; 2720 params.is_sviewinfo = TRUE; 2721 params.lod_property = lod_property; 2722 params.explicit_lod = explicit_lod; 2723 params.sizes_out = sizes_out; 2724 2725 bld->sampler->emit_size_query(bld->sampler, 2726 bld->bld_base.base.gallivm, 2727 ¶ms); 2728 } 2729 2730 static boolean 2731 near_end_of_shader(struct lp_build_tgsi_soa_context *bld, 2732 int pc) 2733 { 2734 unsigned i; 2735 2736 for (i = 0; i < 5; i++) { 2737 unsigned opcode; 2738 2739 if (pc + i >= bld->bld_base.info->num_instructions) 2740 return TRUE; 2741 2742 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode; 2743 2744 if (opcode == TGSI_OPCODE_END) 2745 return TRUE; 2746 2747 if (opcode == TGSI_OPCODE_TEX || 2748 opcode == TGSI_OPCODE_TXP || 2749 opcode == TGSI_OPCODE_TXD || 2750 opcode == TGSI_OPCODE_TXB || 2751 opcode == TGSI_OPCODE_TXL || 2752 opcode == TGSI_OPCODE_TXF || 2753 opcode == TGSI_OPCODE_TXQ || 2754 opcode == TGSI_OPCODE_TEX2 || 2755 opcode == TGSI_OPCODE_TXB2 || 2756 opcode == TGSI_OPCODE_TXL2 || 2757 opcode == TGSI_OPCODE_SAMPLE || 2758 opcode == TGSI_OPCODE_SAMPLE_B || 2759 opcode == TGSI_OPCODE_SAMPLE_C || 2760 opcode == TGSI_OPCODE_SAMPLE_C_LZ || 2761 opcode == TGSI_OPCODE_SAMPLE_D || 2762 opcode == TGSI_OPCODE_SAMPLE_I || 2763 opcode == TGSI_OPCODE_SAMPLE_I_MS || 2764 opcode == TGSI_OPCODE_SAMPLE_L || 2765 opcode == TGSI_OPCODE_SVIEWINFO || 2766 opcode == TGSI_OPCODE_CAL || 2767 opcode == TGSI_OPCODE_CALLNZ || 2768 opcode == TGSI_OPCODE_IF || 2769 opcode == TGSI_OPCODE_UIF || 2770 opcode == TGSI_OPCODE_BGNLOOP || 2771 opcode == TGSI_OPCODE_SWITCH) 2772 return FALSE; 2773 } 2774 2775 return TRUE; 2776 } 2777 2778 2779 2780 /** 2781 * Kill fragment if any of the src register values are negative. 2782 */ 2783 static void 2784 emit_kill_if( 2785 struct lp_build_tgsi_soa_context *bld, 2786 const struct tgsi_full_instruction *inst, 2787 int pc) 2788 { 2789 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 2790 const struct tgsi_full_src_register *reg = &inst->Src[0]; 2791 LLVMValueRef terms[TGSI_NUM_CHANNELS]; 2792 LLVMValueRef mask; 2793 unsigned chan_index; 2794 2795 memset(&terms, 0, sizeof terms); 2796 2797 TGSI_FOR_EACH_CHANNEL( chan_index ) { 2798 unsigned swizzle; 2799 2800 /* Unswizzle channel */ 2801 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 2802 2803 /* Check if the component has not been already tested. */ 2804 assert(swizzle < TGSI_NUM_CHANNELS); 2805 if( !terms[swizzle] ) 2806 /* TODO: change the comparison operator instead of setting the sign */ 2807 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index ); 2808 } 2809 2810 mask = NULL; 2811 TGSI_FOR_EACH_CHANNEL( chan_index ) { 2812 if(terms[chan_index]) { 2813 LLVMValueRef chan_mask; 2814 2815 /* 2816 * If term < 0 then mask = 0 else mask = ~0. 2817 */ 2818 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero); 2819 2820 if(mask) 2821 mask = LLVMBuildAnd(builder, mask, chan_mask, ""); 2822 else 2823 mask = chan_mask; 2824 } 2825 } 2826 2827 if (bld->exec_mask.has_mask) { 2828 LLVMValueRef invmask; 2829 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp"); 2830 mask = LLVMBuildOr(builder, mask, invmask, ""); 2831 } 2832 2833 lp_build_mask_update(bld->mask, mask); 2834 if (!near_end_of_shader(bld, pc)) 2835 lp_build_mask_check(bld->mask); 2836 } 2837 2838 2839 /** 2840 * Unconditional fragment kill. 2841 * The only predication is the execution mask which will apply if 2842 * we're inside a loop or conditional. 2843 */ 2844 static void 2845 emit_kill(struct lp_build_tgsi_soa_context *bld, 2846 int pc) 2847 { 2848 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 2849 LLVMValueRef mask; 2850 2851 /* For those channels which are "alive", disable fragment shader 2852 * execution. 2853 */ 2854 if (bld->exec_mask.has_mask) { 2855 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp"); 2856 } 2857 else { 2858 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type); 2859 mask = zero; 2860 } 2861 2862 lp_build_mask_update(bld->mask, mask); 2863 2864 if (!near_end_of_shader(bld, pc)) 2865 lp_build_mask_check(bld->mask); 2866 } 2867 2868 2869 /** 2870 * Emit code which will dump the value of all the temporary registers 2871 * to stdout. 2872 */ 2873 static void 2874 emit_dump_file(struct lp_build_tgsi_soa_context *bld, 2875 unsigned file) 2876 { 2877 const struct tgsi_shader_info *info = bld->bld_base.info; 2878 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 2879 LLVMBuilderRef builder = gallivm->builder; 2880 LLVMValueRef reg_ptr; 2881 int index; 2882 int max_index = info->file_max[file]; 2883 2884 /* 2885 * Some register files, particularly constants, can be very large, 2886 * and dumping everything could make this unusably slow. 2887 */ 2888 max_index = MIN2(max_index, 32); 2889 2890 for (index = 0; index <= max_index; index++) { 2891 LLVMValueRef res; 2892 unsigned mask; 2893 int chan; 2894 2895 if (index < 8 * sizeof(unsigned) && 2896 (info->file_mask[file] & (1u << index)) == 0) { 2897 /* This was not declared.*/ 2898 continue; 2899 } 2900 2901 if (file == TGSI_FILE_INPUT) { 2902 mask = info->input_usage_mask[index]; 2903 } else { 2904 mask = TGSI_WRITEMASK_XYZW; 2905 } 2906 2907 for (chan = 0; chan < 4; chan++) { 2908 if ((mask & (1 << chan)) == 0) { 2909 /* This channel is not used.*/ 2910 continue; 2911 } 2912 2913 if (file == TGSI_FILE_CONSTANT) { 2914 struct tgsi_full_src_register reg; 2915 memset(®, 0, sizeof reg); 2916 reg.Register.File = file; 2917 reg.Register.Index = index; 2918 reg.Register.SwizzleX = 0; 2919 reg.Register.SwizzleY = 1; 2920 reg.Register.SwizzleZ = 2; 2921 reg.Register.SwizzleW = 3; 2922 2923 res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, ®, TGSI_TYPE_FLOAT, chan); 2924 if (!res) { 2925 continue; 2926 } 2927 } else if (file == TGSI_FILE_INPUT) { 2928 res = bld->inputs[index][chan]; 2929 if (!res) { 2930 continue; 2931 } 2932 } else if (file == TGSI_FILE_TEMPORARY) { 2933 reg_ptr = lp_get_temp_ptr_soa(bld, index, chan); 2934 assert(reg_ptr); 2935 res = LLVMBuildLoad(builder, reg_ptr, ""); 2936 } else if (file == TGSI_FILE_OUTPUT) { 2937 reg_ptr = lp_get_output_ptr(bld, index, chan); 2938 assert(reg_ptr); 2939 res = LLVMBuildLoad(builder, reg_ptr, ""); 2940 } else { 2941 assert(0); 2942 continue; 2943 } 2944 2945 emit_dump_reg(gallivm, file, index, chan, res); 2946 } 2947 } 2948 } 2949 2950 2951 2952 void 2953 lp_emit_declaration_soa( 2954 struct lp_build_tgsi_context *bld_base, 2955 const struct tgsi_full_declaration *decl) 2956 { 2957 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 2958 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 2959 LLVMTypeRef vec_type = bld->bld_base.base.vec_type; 2960 const unsigned first = decl->Range.First; 2961 const unsigned last = decl->Range.Last; 2962 unsigned idx, i; 2963 2964 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]); 2965 2966 switch (decl->Declaration.File) { 2967 case TGSI_FILE_TEMPORARY: 2968 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) { 2969 assert(last < LP_MAX_INLINED_TEMPS); 2970 for (idx = first; idx <= last; ++idx) { 2971 for (i = 0; i < TGSI_NUM_CHANNELS; i++) 2972 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp"); 2973 } 2974 } 2975 break; 2976 2977 case TGSI_FILE_OUTPUT: 2978 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { 2979 for (idx = first; idx <= last; ++idx) { 2980 for (i = 0; i < TGSI_NUM_CHANNELS; i++) 2981 bld->outputs[idx][i] = lp_build_alloca(gallivm, 2982 vec_type, "output"); 2983 } 2984 } 2985 break; 2986 2987 case TGSI_FILE_ADDRESS: 2988 /* ADDR registers are only allocated with an integer LLVM IR type, 2989 * as they are guaranteed to always have integers. 2990 * XXX: Not sure if this exception is worthwhile (or the whole idea of 2991 * an ADDR register for that matter). 2992 */ 2993 assert(last < LP_MAX_TGSI_ADDRS); 2994 for (idx = first; idx <= last; ++idx) { 2995 assert(idx < LP_MAX_TGSI_ADDRS); 2996 for (i = 0; i < TGSI_NUM_CHANNELS; i++) 2997 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr"); 2998 } 2999 break; 3000 3001 case TGSI_FILE_PREDICATE: 3002 assert(last < LP_MAX_TGSI_PREDS); 3003 for (idx = first; idx <= last; ++idx) { 3004 for (i = 0; i < TGSI_NUM_CHANNELS; i++) 3005 bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type, 3006 "predicate"); 3007 } 3008 break; 3009 3010 case TGSI_FILE_SAMPLER_VIEW: 3011 /* 3012 * The target stored here MUST match whatever there actually 3013 * is in the set sampler views (what about return type?). 3014 */ 3015 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS); 3016 for (idx = first; idx <= last; ++idx) { 3017 bld->sv[idx] = decl->SamplerView; 3018 } 3019 break; 3020 3021 case TGSI_FILE_CONSTANT: 3022 { 3023 /* 3024 * We could trivially fetch the per-buffer pointer when fetching the 3025 * constant, relying on llvm to figure out it's always the same pointer 3026 * anyway. However, doing so results in a huge (more than factor of 10) 3027 * slowdown in llvm compilation times for some (but not all) shaders 3028 * (more specifically, the IR optimization spends way more time in 3029 * DominatorTree::dominates). At least with llvm versions 3.1, 3.3. 3030 */ 3031 unsigned idx2D = decl->Dim.Index2D; 3032 LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D); 3033 assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS); 3034 bld->consts[idx2D] = 3035 lp_build_array_get(gallivm, bld->consts_ptr, index2D); 3036 bld->consts_sizes[idx2D] = 3037 lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D); 3038 } 3039 break; 3040 3041 default: 3042 /* don't need to declare other vars */ 3043 break; 3044 } 3045 } 3046 3047 3048 void lp_emit_immediate_soa( 3049 struct lp_build_tgsi_context *bld_base, 3050 const struct tgsi_full_immediate *imm) 3051 { 3052 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 3053 struct gallivm_state * gallivm = bld_base->base.gallivm; 3054 LLVMValueRef imms[4]; 3055 unsigned i; 3056 const uint size = imm->Immediate.NrTokens - 1; 3057 assert(size <= 4); 3058 switch (imm->Immediate.DataType) { 3059 case TGSI_IMM_FLOAT32: 3060 for( i = 0; i < size; ++i ) 3061 imms[i] = 3062 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float); 3063 3064 break; 3065 case TGSI_IMM_FLOAT64: 3066 case TGSI_IMM_UINT64: 3067 case TGSI_IMM_INT64: 3068 case TGSI_IMM_UINT32: 3069 for( i = 0; i < size; ++i ) { 3070 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint); 3071 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type); 3072 } 3073 3074 break; 3075 case TGSI_IMM_INT32: 3076 for( i = 0; i < size; ++i ) { 3077 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int); 3078 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type); 3079 } 3080 3081 break; 3082 } 3083 for( i = size; i < 4; ++i ) 3084 imms[i] = bld_base->base.undef; 3085 3086 if (bld->use_immediates_array) { 3087 unsigned index = bld->num_immediates; 3088 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 3089 LLVMBuilderRef builder = gallivm->builder; 3090 3091 assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)); 3092 for (i = 0; i < 4; ++i ) { 3093 LLVMValueRef lindex = lp_build_const_int32( 3094 bld->bld_base.base.gallivm, index * 4 + i); 3095 LLVMValueRef imm_ptr = LLVMBuildGEP(builder, 3096 bld->imms_array, &lindex, 1, ""); 3097 LLVMBuildStore(builder, imms[i], imm_ptr); 3098 } 3099 } else { 3100 /* simply copy the immediate values into the next immediates[] slot */ 3101 unsigned i; 3102 assert(imm->Immediate.NrTokens - 1 <= 4); 3103 assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES); 3104 3105 for(i = 0; i < 4; ++i ) 3106 bld->immediates[bld->num_immediates][i] = imms[i]; 3107 3108 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) { 3109 unsigned index = bld->num_immediates; 3110 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 3111 LLVMBuilderRef builder = gallivm->builder; 3112 for (i = 0; i < 4; ++i ) { 3113 LLVMValueRef lindex = lp_build_const_int32( 3114 bld->bld_base.base.gallivm, index * 4 + i); 3115 LLVMValueRef imm_ptr = LLVMBuildGEP(builder, 3116 bld->imms_array, &lindex, 1, ""); 3117 LLVMBuildStore(builder, 3118 bld->immediates[index][i], 3119 imm_ptr); 3120 } 3121 } 3122 } 3123 3124 bld->num_immediates++; 3125 } 3126 3127 static void 3128 ddx_emit( 3129 const struct lp_build_tgsi_action * action, 3130 struct lp_build_tgsi_context * bld_base, 3131 struct lp_build_emit_data * emit_data) 3132 { 3133 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3134 3135 emit_fetch_deriv(bld, emit_data->args[0], NULL, 3136 &emit_data->output[emit_data->chan], NULL); 3137 } 3138 3139 static void 3140 ddy_emit( 3141 const struct lp_build_tgsi_action * action, 3142 struct lp_build_tgsi_context * bld_base, 3143 struct lp_build_emit_data * emit_data) 3144 { 3145 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3146 3147 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL, 3148 &emit_data->output[emit_data->chan]); 3149 } 3150 3151 static void 3152 kill_emit( 3153 const struct lp_build_tgsi_action * action, 3154 struct lp_build_tgsi_context * bld_base, 3155 struct lp_build_emit_data * emit_data) 3156 { 3157 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3158 3159 emit_kill(bld, bld_base->pc - 1); 3160 } 3161 3162 static void 3163 kill_if_emit( 3164 const struct lp_build_tgsi_action * action, 3165 struct lp_build_tgsi_context * bld_base, 3166 struct lp_build_emit_data * emit_data) 3167 { 3168 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3169 3170 emit_kill_if(bld, emit_data->inst, bld_base->pc - 1); 3171 } 3172 3173 static void 3174 tex_emit( 3175 const struct lp_build_tgsi_action * action, 3176 struct lp_build_tgsi_context * bld_base, 3177 struct lp_build_emit_data * emit_data) 3178 { 3179 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3180 3181 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3182 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE); 3183 } 3184 3185 static void 3186 tex2_emit( 3187 const struct lp_build_tgsi_action * action, 3188 struct lp_build_tgsi_context * bld_base, 3189 struct lp_build_emit_data * emit_data) 3190 { 3191 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3192 3193 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3194 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE); 3195 } 3196 3197 static void 3198 txb_emit( 3199 const struct lp_build_tgsi_action * action, 3200 struct lp_build_tgsi_context * bld_base, 3201 struct lp_build_emit_data * emit_data) 3202 { 3203 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3204 3205 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, 3206 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE); 3207 } 3208 3209 static void 3210 txb2_emit( 3211 const struct lp_build_tgsi_action * action, 3212 struct lp_build_tgsi_context * bld_base, 3213 struct lp_build_emit_data * emit_data) 3214 { 3215 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3216 3217 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, 3218 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE); 3219 } 3220 3221 static void 3222 txd_emit( 3223 const struct lp_build_tgsi_action * action, 3224 struct lp_build_tgsi_context * bld_base, 3225 struct lp_build_emit_data * emit_data) 3226 { 3227 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3228 3229 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, 3230 emit_data->output, 3, LP_SAMPLER_OP_TEXTURE); 3231 } 3232 3233 static void 3234 txl_emit( 3235 const struct lp_build_tgsi_action * action, 3236 struct lp_build_tgsi_context * bld_base, 3237 struct lp_build_emit_data * emit_data) 3238 { 3239 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3240 3241 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, 3242 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE); 3243 } 3244 3245 static void 3246 txl2_emit( 3247 const struct lp_build_tgsi_action * action, 3248 struct lp_build_tgsi_context * bld_base, 3249 struct lp_build_emit_data * emit_data) 3250 { 3251 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3252 3253 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, 3254 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE); 3255 } 3256 3257 static void 3258 txp_emit( 3259 const struct lp_build_tgsi_action * action, 3260 struct lp_build_tgsi_context * bld_base, 3261 struct lp_build_emit_data * emit_data) 3262 { 3263 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3264 3265 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED, 3266 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE); 3267 } 3268 3269 static void 3270 tg4_emit( 3271 const struct lp_build_tgsi_action * action, 3272 struct lp_build_tgsi_context * bld_base, 3273 struct lp_build_emit_data * emit_data) 3274 { 3275 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3276 3277 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3278 emit_data->output, 2, LP_SAMPLER_OP_GATHER); 3279 } 3280 3281 static void 3282 txq_emit( 3283 const struct lp_build_tgsi_action * action, 3284 struct lp_build_tgsi_context * bld_base, 3285 struct lp_build_emit_data * emit_data) 3286 { 3287 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3288 3289 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE); 3290 } 3291 3292 static void 3293 txf_emit( 3294 const struct lp_build_tgsi_action * action, 3295 struct lp_build_tgsi_context * bld_base, 3296 struct lp_build_emit_data * emit_data) 3297 { 3298 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3299 3300 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE); 3301 } 3302 3303 static void 3304 sample_i_emit( 3305 const struct lp_build_tgsi_action * action, 3306 struct lp_build_tgsi_context * bld_base, 3307 struct lp_build_emit_data * emit_data) 3308 { 3309 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3310 3311 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE); 3312 } 3313 3314 static void 3315 sample_emit( 3316 const struct lp_build_tgsi_action * action, 3317 struct lp_build_tgsi_context * bld_base, 3318 struct lp_build_emit_data * emit_data) 3319 { 3320 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3321 3322 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3323 FALSE, emit_data->output); 3324 } 3325 3326 static void 3327 sample_b_emit( 3328 const struct lp_build_tgsi_action * action, 3329 struct lp_build_tgsi_context * bld_base, 3330 struct lp_build_emit_data * emit_data) 3331 { 3332 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3333 3334 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, 3335 FALSE, emit_data->output); 3336 } 3337 3338 static void 3339 sample_c_emit( 3340 const struct lp_build_tgsi_action * action, 3341 struct lp_build_tgsi_context * bld_base, 3342 struct lp_build_emit_data * emit_data) 3343 { 3344 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3345 3346 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3347 TRUE, emit_data->output); 3348 } 3349 3350 static void 3351 sample_c_lz_emit( 3352 const struct lp_build_tgsi_action * action, 3353 struct lp_build_tgsi_context * bld_base, 3354 struct lp_build_emit_data * emit_data) 3355 { 3356 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3357 3358 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO, 3359 TRUE, emit_data->output); 3360 } 3361 3362 static void 3363 sample_d_emit( 3364 const struct lp_build_tgsi_action * action, 3365 struct lp_build_tgsi_context * bld_base, 3366 struct lp_build_emit_data * emit_data) 3367 { 3368 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3369 3370 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, 3371 FALSE, emit_data->output); 3372 } 3373 3374 static void 3375 sample_l_emit( 3376 const struct lp_build_tgsi_action * action, 3377 struct lp_build_tgsi_context * bld_base, 3378 struct lp_build_emit_data * emit_data) 3379 { 3380 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3381 3382 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, 3383 FALSE, emit_data->output); 3384 } 3385 3386 static void 3387 sviewinfo_emit( 3388 const struct lp_build_tgsi_action * action, 3389 struct lp_build_tgsi_context * bld_base, 3390 struct lp_build_emit_data * emit_data) 3391 { 3392 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3393 3394 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE); 3395 } 3396 3397 static LLVMValueRef 3398 mask_vec(struct lp_build_tgsi_context *bld_base) 3399 { 3400 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3401 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 3402 struct lp_exec_mask *exec_mask = &bld->exec_mask; 3403 3404 if (!exec_mask->has_mask) { 3405 return lp_build_mask_value(bld->mask); 3406 } 3407 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask), 3408 exec_mask->exec_mask, ""); 3409 } 3410 3411 static void 3412 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base, 3413 LLVMValueRef ptr, 3414 LLVMValueRef mask) 3415 { 3416 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 3417 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, ""); 3418 3419 current_vec = LLVMBuildSub(builder, current_vec, mask, ""); 3420 3421 LLVMBuildStore(builder, current_vec, ptr); 3422 } 3423 3424 static void 3425 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base, 3426 LLVMValueRef ptr, 3427 LLVMValueRef mask) 3428 { 3429 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 3430 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, ""); 3431 3432 current_vec = lp_build_select(&bld_base->uint_bld, 3433 mask, 3434 bld_base->uint_bld.zero, 3435 current_vec); 3436 3437 LLVMBuildStore(builder, current_vec, ptr); 3438 } 3439 3440 static LLVMValueRef 3441 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld, 3442 LLVMValueRef current_mask_vec, 3443 LLVMValueRef total_emitted_vertices_vec) 3444 { 3445 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 3446 struct lp_build_context *int_bld = &bld->bld_base.int_bld; 3447 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS, 3448 total_emitted_vertices_vec, 3449 bld->max_output_vertices_vec); 3450 3451 return LLVMBuildAnd(builder, current_mask_vec, max_mask, ""); 3452 } 3453 3454 static void 3455 emit_vertex( 3456 const struct lp_build_tgsi_action * action, 3457 struct lp_build_tgsi_context * bld_base, 3458 struct lp_build_emit_data * emit_data) 3459 { 3460 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3461 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 3462 3463 if (bld->gs_iface->emit_vertex) { 3464 LLVMValueRef mask = mask_vec(bld_base); 3465 LLVMValueRef total_emitted_vertices_vec = 3466 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, ""); 3467 mask = clamp_mask_to_max_output_vertices(bld, mask, 3468 total_emitted_vertices_vec); 3469 gather_outputs(bld); 3470 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base, 3471 bld->outputs, 3472 total_emitted_vertices_vec); 3473 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr, 3474 mask); 3475 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr, 3476 mask); 3477 #if DUMP_GS_EMITS 3478 lp_build_print_value(bld->bld_base.base.gallivm, 3479 " +++ emit vertex masked ones = ", 3480 mask); 3481 lp_build_print_value(bld->bld_base.base.gallivm, 3482 " +++ emit vertex emitted = ", 3483 total_emitted_vertices_vec); 3484 #endif 3485 } 3486 } 3487 3488 3489 static void 3490 end_primitive_masked(struct lp_build_tgsi_context * bld_base, 3491 LLVMValueRef mask) 3492 { 3493 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3494 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 3495 3496 if (bld->gs_iface->end_primitive) { 3497 struct lp_build_context *uint_bld = &bld_base->uint_bld; 3498 LLVMValueRef emitted_vertices_vec = 3499 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, ""); 3500 LLVMValueRef emitted_prims_vec = 3501 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, ""); 3502 3503 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, 3504 emitted_vertices_vec, 3505 uint_bld->zero); 3506 /* We need to combine the current execution mask with the mask 3507 telling us which, if any, execution slots actually have 3508 unemitted primitives, this way we make sure that end_primitives 3509 executes only on the paths that have unflushed vertices */ 3510 mask = LLVMBuildAnd(builder, mask, emitted_mask, ""); 3511 3512 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base, 3513 emitted_vertices_vec, 3514 emitted_prims_vec); 3515 3516 #if DUMP_GS_EMITS 3517 lp_build_print_value(bld->bld_base.base.gallivm, 3518 " +++ end prim masked ones = ", 3519 mask); 3520 lp_build_print_value(bld->bld_base.base.gallivm, 3521 " +++ end prim emitted verts1 = ", 3522 emitted_vertices_vec); 3523 lp_build_print_value(bld->bld_base.base.gallivm, 3524 " +++ end prim emitted prims1 = ", 3525 LLVMBuildLoad(builder, 3526 bld->emitted_prims_vec_ptr, "")); 3527 #endif 3528 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr, 3529 mask); 3530 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr, 3531 mask); 3532 #if DUMP_GS_EMITS 3533 lp_build_print_value(bld->bld_base.base.gallivm, 3534 " +++ end prim emitted verts2 = ", 3535 LLVMBuildLoad(builder, 3536 bld->emitted_vertices_vec_ptr, "")); 3537 #endif 3538 } 3539 3540 } 3541 3542 static void 3543 end_primitive( 3544 const struct lp_build_tgsi_action * action, 3545 struct lp_build_tgsi_context * bld_base, 3546 struct lp_build_emit_data * emit_data) 3547 { 3548 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3549 3550 if (bld->gs_iface->end_primitive) { 3551 LLVMValueRef mask = mask_vec(bld_base); 3552 end_primitive_masked(bld_base, mask); 3553 } 3554 } 3555 3556 static void 3557 cal_emit( 3558 const struct lp_build_tgsi_action * action, 3559 struct lp_build_tgsi_context * bld_base, 3560 struct lp_build_emit_data * emit_data) 3561 { 3562 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3563 3564 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label, 3565 &bld_base->pc); 3566 } 3567 3568 static void 3569 ret_emit( 3570 const struct lp_build_tgsi_action * action, 3571 struct lp_build_tgsi_context * bld_base, 3572 struct lp_build_emit_data * emit_data) 3573 { 3574 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3575 3576 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc); 3577 } 3578 3579 static void 3580 brk_emit( 3581 const struct lp_build_tgsi_action * action, 3582 struct lp_build_tgsi_context * bld_base, 3583 struct lp_build_emit_data * emit_data) 3584 { 3585 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3586 3587 lp_exec_break(&bld->exec_mask, bld_base); 3588 } 3589 3590 static void 3591 breakc_emit( 3592 const struct lp_build_tgsi_action * action, 3593 struct lp_build_tgsi_context * bld_base, 3594 struct lp_build_emit_data * emit_data) 3595 { 3596 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3597 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 3598 struct lp_build_context *uint_bld = &bld_base->uint_bld; 3599 LLVMValueRef unsigned_cond = 3600 LLVMBuildBitCast(builder, emit_data->args[0], uint_bld->vec_type, ""); 3601 LLVMValueRef cond = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, 3602 unsigned_cond, 3603 uint_bld->zero); 3604 3605 lp_exec_break_condition(&bld->exec_mask, cond); 3606 } 3607 3608 static void 3609 if_emit( 3610 const struct lp_build_tgsi_action * action, 3611 struct lp_build_tgsi_context * bld_base, 3612 struct lp_build_emit_data * emit_data) 3613 { 3614 LLVMValueRef tmp; 3615 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3616 3617 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL, 3618 emit_data->args[0], bld->bld_base.base.zero); 3619 lp_exec_mask_cond_push(&bld->exec_mask, tmp); 3620 } 3621 3622 static void 3623 uif_emit( 3624 const struct lp_build_tgsi_action * action, 3625 struct lp_build_tgsi_context * bld_base, 3626 struct lp_build_emit_data * emit_data) 3627 { 3628 LLVMValueRef tmp; 3629 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3630 struct lp_build_context *uint_bld = &bld_base->uint_bld; 3631 3632 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, 3633 emit_data->args[0], uint_bld->zero); 3634 lp_exec_mask_cond_push(&bld->exec_mask, tmp); 3635 } 3636 3637 static void 3638 case_emit( 3639 const struct lp_build_tgsi_action * action, 3640 struct lp_build_tgsi_context * bld_base, 3641 struct lp_build_emit_data * emit_data) 3642 { 3643 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3644 3645 lp_exec_case(&bld->exec_mask, emit_data->args[0]); 3646 } 3647 3648 static void 3649 default_emit( 3650 const struct lp_build_tgsi_action * action, 3651 struct lp_build_tgsi_context * bld_base, 3652 struct lp_build_emit_data * emit_data) 3653 { 3654 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3655 3656 lp_exec_default(&bld->exec_mask, bld_base); 3657 } 3658 3659 static void 3660 switch_emit( 3661 const struct lp_build_tgsi_action * action, 3662 struct lp_build_tgsi_context * bld_base, 3663 struct lp_build_emit_data * emit_data) 3664 { 3665 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3666 3667 lp_exec_switch(&bld->exec_mask, emit_data->args[0]); 3668 } 3669 3670 static void 3671 endswitch_emit( 3672 const struct lp_build_tgsi_action * action, 3673 struct lp_build_tgsi_context * bld_base, 3674 struct lp_build_emit_data * emit_data) 3675 { 3676 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3677 3678 lp_exec_endswitch(&bld->exec_mask, bld_base); 3679 } 3680 3681 static void 3682 bgnloop_emit( 3683 const struct lp_build_tgsi_action * action, 3684 struct lp_build_tgsi_context * bld_base, 3685 struct lp_build_emit_data * emit_data) 3686 { 3687 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3688 3689 lp_exec_bgnloop(&bld->exec_mask); 3690 } 3691 3692 static void 3693 bgnsub_emit( 3694 const struct lp_build_tgsi_action * action, 3695 struct lp_build_tgsi_context * bld_base, 3696 struct lp_build_emit_data * emit_data) 3697 { 3698 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3699 3700 lp_exec_mask_bgnsub(&bld->exec_mask); 3701 } 3702 3703 static void 3704 else_emit( 3705 const struct lp_build_tgsi_action * action, 3706 struct lp_build_tgsi_context * bld_base, 3707 struct lp_build_emit_data * emit_data) 3708 { 3709 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3710 3711 lp_exec_mask_cond_invert(&bld->exec_mask); 3712 } 3713 3714 static void 3715 endif_emit( 3716 const struct lp_build_tgsi_action * action, 3717 struct lp_build_tgsi_context * bld_base, 3718 struct lp_build_emit_data * emit_data) 3719 { 3720 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3721 3722 lp_exec_mask_cond_pop(&bld->exec_mask); 3723 } 3724 3725 static void 3726 endloop_emit( 3727 const struct lp_build_tgsi_action * action, 3728 struct lp_build_tgsi_context * bld_base, 3729 struct lp_build_emit_data * emit_data) 3730 { 3731 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3732 3733 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask); 3734 } 3735 3736 static void 3737 endsub_emit( 3738 const struct lp_build_tgsi_action * action, 3739 struct lp_build_tgsi_context * bld_base, 3740 struct lp_build_emit_data * emit_data) 3741 { 3742 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3743 3744 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc); 3745 } 3746 3747 static void 3748 cont_emit( 3749 const struct lp_build_tgsi_action * action, 3750 struct lp_build_tgsi_context * bld_base, 3751 struct lp_build_emit_data * emit_data) 3752 { 3753 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3754 3755 lp_exec_continue(&bld->exec_mask); 3756 } 3757 3758 static void emit_prologue(struct lp_build_tgsi_context * bld_base) 3759 { 3760 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3761 struct gallivm_state * gallivm = bld_base->base.gallivm; 3762 3763 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 3764 LLVMValueRef array_size = 3765 lp_build_const_int32(gallivm, 3766 bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4); 3767 bld->temps_array = lp_build_array_alloca(gallivm, 3768 bld_base->base.vec_type, array_size, 3769 "temp_array"); 3770 } 3771 3772 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { 3773 LLVMValueRef array_size = 3774 lp_build_const_int32(gallivm, 3775 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4); 3776 bld->outputs_array = lp_build_array_alloca(gallivm, 3777 bld_base->base.vec_type, array_size, 3778 "output_array"); 3779 } 3780 3781 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) { 3782 LLVMValueRef array_size = 3783 lp_build_const_int32(gallivm, 3784 bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4); 3785 bld->imms_array = lp_build_array_alloca(gallivm, 3786 bld_base->base.vec_type, array_size, 3787 "imms_array"); 3788 } 3789 3790 /* If we have indirect addressing in inputs we need to copy them into 3791 * our alloca array to be able to iterate over them */ 3792 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) { 3793 unsigned index, chan; 3794 LLVMTypeRef vec_type = bld_base->base.vec_type; 3795 LLVMValueRef array_size = lp_build_const_int32(gallivm, 3796 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4); 3797 bld->inputs_array = lp_build_array_alloca(gallivm, 3798 vec_type, array_size, 3799 "input_array"); 3800 3801 assert(bld_base->info->num_inputs 3802 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1); 3803 3804 for (index = 0; index < bld_base->info->num_inputs; ++index) { 3805 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { 3806 LLVMValueRef lindex = 3807 lp_build_const_int32(gallivm, index * 4 + chan); 3808 LLVMValueRef input_ptr = 3809 LLVMBuildGEP(gallivm->builder, bld->inputs_array, 3810 &lindex, 1, ""); 3811 LLVMValueRef value = bld->inputs[index][chan]; 3812 if (value) 3813 LLVMBuildStore(gallivm->builder, value, input_ptr); 3814 } 3815 } 3816 } 3817 3818 if (bld->gs_iface) { 3819 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld; 3820 bld->emitted_prims_vec_ptr = 3821 lp_build_alloca(gallivm, 3822 uint_bld->vec_type, 3823 "emitted_prims_ptr"); 3824 bld->emitted_vertices_vec_ptr = 3825 lp_build_alloca(gallivm, 3826 uint_bld->vec_type, 3827 "emitted_vertices_ptr"); 3828 bld->total_emitted_vertices_vec_ptr = 3829 lp_build_alloca(gallivm, 3830 uint_bld->vec_type, 3831 "total_emitted_vertices_ptr"); 3832 3833 LLVMBuildStore(gallivm->builder, uint_bld->zero, 3834 bld->emitted_prims_vec_ptr); 3835 LLVMBuildStore(gallivm->builder, uint_bld->zero, 3836 bld->emitted_vertices_vec_ptr); 3837 LLVMBuildStore(gallivm->builder, uint_bld->zero, 3838 bld->total_emitted_vertices_vec_ptr); 3839 } 3840 3841 if (DEBUG_EXECUTION) { 3842 lp_build_printf(gallivm, "\n"); 3843 emit_dump_file(bld, TGSI_FILE_CONSTANT); 3844 if (!bld->gs_iface) 3845 emit_dump_file(bld, TGSI_FILE_INPUT); 3846 } 3847 } 3848 3849 static void emit_epilogue(struct lp_build_tgsi_context * bld_base) 3850 { 3851 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3852 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 3853 3854 if (DEBUG_EXECUTION) { 3855 /* for debugging */ 3856 if (0) { 3857 emit_dump_file(bld, TGSI_FILE_TEMPORARY); 3858 } 3859 emit_dump_file(bld, TGSI_FILE_OUTPUT); 3860 lp_build_printf(bld_base->base.gallivm, "\n"); 3861 } 3862 3863 /* If we have indirect addressing in outputs we need to copy our alloca array 3864 * to the outputs slots specified by the caller */ 3865 if (bld->gs_iface) { 3866 LLVMValueRef total_emitted_vertices_vec; 3867 LLVMValueRef emitted_prims_vec; 3868 /* implicit end_primitives, needed in case there are any unflushed 3869 vertices in the cache. Note must not call end_primitive here 3870 since the exec_mask is not valid at this point. */ 3871 end_primitive_masked(bld_base, lp_build_mask_value(bld->mask)); 3872 3873 total_emitted_vertices_vec = 3874 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, ""); 3875 emitted_prims_vec = 3876 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, ""); 3877 3878 bld->gs_iface->gs_epilogue(bld->gs_iface, 3879 &bld->bld_base, 3880 total_emitted_vertices_vec, 3881 emitted_prims_vec); 3882 } else { 3883 gather_outputs(bld); 3884 } 3885 } 3886 3887 void 3888 lp_build_tgsi_soa(struct gallivm_state *gallivm, 3889 const struct tgsi_token *tokens, 3890 struct lp_type type, 3891 struct lp_build_mask_context *mask, 3892 LLVMValueRef consts_ptr, 3893 LLVMValueRef const_sizes_ptr, 3894 const struct lp_bld_tgsi_system_values *system_values, 3895 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS], 3896 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], 3897 LLVMValueRef context_ptr, 3898 LLVMValueRef thread_data_ptr, 3899 struct lp_build_sampler_soa *sampler, 3900 const struct tgsi_shader_info *info, 3901 const struct lp_build_tgsi_gs_iface *gs_iface) 3902 { 3903 struct lp_build_tgsi_soa_context bld; 3904 3905 struct lp_type res_type; 3906 3907 assert(type.length <= LP_MAX_VECTOR_LENGTH); 3908 memset(&res_type, 0, sizeof res_type); 3909 res_type.width = type.width; 3910 res_type.length = type.length; 3911 res_type.sign = 1; 3912 3913 /* Setup build context */ 3914 memset(&bld, 0, sizeof bld); 3915 lp_build_context_init(&bld.bld_base.base, gallivm, type); 3916 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type)); 3917 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type)); 3918 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type)); 3919 { 3920 struct lp_type dbl_type; 3921 dbl_type = type; 3922 dbl_type.width *= 2; 3923 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type); 3924 } 3925 { 3926 struct lp_type uint64_type; 3927 uint64_type = lp_uint_type(type); 3928 uint64_type.width *= 2; 3929 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type); 3930 } 3931 { 3932 struct lp_type int64_type; 3933 int64_type = lp_int_type(type); 3934 int64_type.width *= 2; 3935 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type); 3936 } 3937 bld.mask = mask; 3938 bld.inputs = inputs; 3939 bld.outputs = outputs; 3940 bld.consts_ptr = consts_ptr; 3941 bld.const_sizes_ptr = const_sizes_ptr; 3942 bld.sampler = sampler; 3943 bld.bld_base.info = info; 3944 bld.indirect_files = info->indirect_files; 3945 bld.context_ptr = context_ptr; 3946 bld.thread_data_ptr = thread_data_ptr; 3947 3948 /* 3949 * If the number of temporaries is rather large then we just 3950 * allocate them as an array right from the start and treat 3951 * like indirect temporaries. 3952 */ 3953 if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) { 3954 bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY); 3955 } 3956 /* 3957 * For performance reason immediates are always backed in a static 3958 * array, but if their number is too great, we have to use just 3959 * a dynamically allocated array. 3960 */ 3961 bld.use_immediates_array = 3962 (info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES); 3963 if (bld.use_immediates_array) { 3964 bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE); 3965 } 3966 3967 3968 bld.bld_base.soa = TRUE; 3969 bld.bld_base.emit_debug = emit_debug; 3970 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant; 3971 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate; 3972 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input; 3973 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary; 3974 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value; 3975 bld.bld_base.emit_store = emit_store; 3976 3977 bld.bld_base.emit_declaration = lp_emit_declaration_soa; 3978 bld.bld_base.emit_immediate = lp_emit_immediate_soa; 3979 3980 bld.bld_base.emit_prologue = emit_prologue; 3981 bld.bld_base.emit_epilogue = emit_epilogue; 3982 3983 /* Set opcode actions */ 3984 lp_set_default_actions_cpu(&bld.bld_base); 3985 3986 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit; 3987 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit; 3988 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit; 3989 bld.bld_base.op_actions[TGSI_OPCODE_BREAKC].emit = breakc_emit; 3990 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit; 3991 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit; 3992 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit; 3993 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit; 3994 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit; 3995 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit; 3996 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit; 3997 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit; 3998 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit; 3999 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit; 4000 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit; 4001 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit; 4002 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit; 4003 bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit; 4004 bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit; 4005 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit; 4006 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit; 4007 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit; 4008 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit; 4009 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit; 4010 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit; 4011 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit; 4012 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit; 4013 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit; 4014 bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit; 4015 bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit; 4016 bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit; 4017 bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit; 4018 /* DX10 sampling ops */ 4019 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit; 4020 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit; 4021 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit; 4022 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit; 4023 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit; 4024 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit; 4025 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit; 4026 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit; 4027 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit; 4028 4029 if (gs_iface) { 4030 /* There's no specific value for this because it should always 4031 * be set, but apps using ext_geometry_shader4 quite often 4032 * were forgetting so we're using MAX_VERTEX_VARYING from 4033 * that spec even though we could debug_assert if it's not 4034 * set, but that's a lot uglier. */ 4035 uint max_output_vertices; 4036 4037 /* inputs are always indirect with gs */ 4038 bld.indirect_files |= (1 << TGSI_FILE_INPUT); 4039 bld.gs_iface = gs_iface; 4040 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input; 4041 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex; 4042 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive; 4043 4044 max_output_vertices = 4045 info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES]; 4046 if (!max_output_vertices) 4047 max_output_vertices = 32; 4048 4049 bld.max_output_vertices_vec = 4050 lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type, 4051 max_output_vertices); 4052 } 4053 4054 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld); 4055 4056 bld.system_values = *system_values; 4057 4058 lp_build_tgsi_llvm(&bld.bld_base, tokens); 4059 4060 if (0) { 4061 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder); 4062 LLVMValueRef function = LLVMGetBasicBlockParent(block); 4063 debug_printf("11111111111111111111111111111 \n"); 4064 tgsi_dump(tokens, 0); 4065 lp_debug_dump_value(function); 4066 debug_printf("2222222222222222222222222222 \n"); 4067 } 4068 4069 if (0) { 4070 LLVMModuleRef module = LLVMGetGlobalParent( 4071 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); 4072 LLVMDumpModule(module); 4073 4074 } 4075 lp_exec_mask_fini(&bld.exec_mask); 4076 } 4077