1 /************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * Copyright 2007-2008 VMware, Inc. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29 /** 30 * @file 31 * TGSI to LLVM IR translation -- SoA. 32 * 33 * @author Jose Fonseca <jfonseca (at) vmware.com> 34 * 35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, 36 * Brian Paul, and others. 37 */ 38 39 #include "pipe/p_config.h" 40 #include "pipe/p_shader_tokens.h" 41 #include "util/u_debug.h" 42 #include "util/u_math.h" 43 #include "util/u_memory.h" 44 #include "tgsi/tgsi_dump.h" 45 #include "tgsi/tgsi_exec.h" 46 #include "tgsi/tgsi_info.h" 47 #include "tgsi/tgsi_parse.h" 48 #include "tgsi/tgsi_util.h" 49 #include "tgsi/tgsi_scan.h" 50 #include "tgsi/tgsi_strings.h" 51 #include "lp_bld_tgsi_action.h" 52 #include "lp_bld_type.h" 53 #include "lp_bld_const.h" 54 #include "lp_bld_arit.h" 55 #include "lp_bld_bitarit.h" 56 #include "lp_bld_gather.h" 57 #include "lp_bld_init.h" 58 #include "lp_bld_logic.h" 59 #include "lp_bld_swizzle.h" 60 #include "lp_bld_flow.h" 61 #include "lp_bld_quad.h" 62 #include "lp_bld_tgsi.h" 63 #include "lp_bld_limits.h" 64 #include "lp_bld_debug.h" 65 #include "lp_bld_printf.h" 66 #include "lp_bld_sample.h" 67 #include "lp_bld_struct.h" 68 69 /* SM 4.0 says that subroutines can nest 32 deep and 70 * we need one more for our main function */ 71 #define LP_MAX_NUM_FUNCS 33 72 73 #define DUMP_GS_EMITS 0 74 75 /* 76 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI 77 * instruction. 78 * 79 * TODO: 80 * - take execution masks in consideration 81 * - debug control-flow instructions 82 */ 83 #define DEBUG_EXECUTION 0 84 85 86 /* 87 * Emit code to print a register value. 88 */ 89 static void 90 emit_dump_reg(struct gallivm_state *gallivm, 91 unsigned file, 92 unsigned index, 93 unsigned chan, 94 LLVMValueRef value) 95 { 96 char buf[32]; 97 98 util_snprintf(buf, sizeof buf, " %s[%u].%c = ", 99 tgsi_file_name(file), 100 index, "xyzw"[chan]); 101 102 lp_build_print_value(gallivm, buf, value); 103 } 104 105 /* 106 * Return the context for the current function. 107 * (always 'main', if shader doesn't do any function calls) 108 */ 109 static inline struct function_ctx * 110 func_ctx(struct lp_exec_mask *mask) 111 { 112 assert(mask->function_stack_size > 0); 113 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS); 114 return &mask->function_stack[mask->function_stack_size - 1]; 115 } 116 117 /* 118 * Returns true if we're in a loop. 119 * It's global, meaning that it returns true even if there's 120 * no loop inside the current function, but we were inside 121 * a loop inside another function, from which this one was called. 122 */ 123 static inline boolean 124 mask_has_loop(struct lp_exec_mask *mask) 125 { 126 int i; 127 for (i = mask->function_stack_size - 1; i >= 0; --i) { 128 const struct function_ctx *ctx = &mask->function_stack[i]; 129 if (ctx->loop_stack_size > 0) 130 return TRUE; 131 } 132 return FALSE; 133 } 134 135 /* 136 * Returns true if we're inside a switch statement. 137 * It's global, meaning that it returns true even if there's 138 * no switch in the current function, but we were inside 139 * a switch inside another function, from which this one was called. 140 */ 141 static inline boolean 142 mask_has_switch(struct lp_exec_mask *mask) 143 { 144 int i; 145 for (i = mask->function_stack_size - 1; i >= 0; --i) { 146 const struct function_ctx *ctx = &mask->function_stack[i]; 147 if (ctx->switch_stack_size > 0) 148 return TRUE; 149 } 150 return FALSE; 151 } 152 153 /* 154 * Returns true if we're inside a conditional. 155 * It's global, meaning that it returns true even if there's 156 * no conditional in the current function, but we were inside 157 * a conditional inside another function, from which this one was called. 158 */ 159 static inline boolean 160 mask_has_cond(struct lp_exec_mask *mask) 161 { 162 int i; 163 for (i = mask->function_stack_size - 1; i >= 0; --i) { 164 const struct function_ctx *ctx = &mask->function_stack[i]; 165 if (ctx->cond_stack_size > 0) 166 return TRUE; 167 } 168 return FALSE; 169 } 170 171 172 /* 173 * Initialize a function context at the specified index. 174 */ 175 static void 176 lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx) 177 { 178 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context); 179 LLVMBuilderRef builder = mask->bld->gallivm->builder; 180 struct function_ctx *ctx = &mask->function_stack[function_idx]; 181 182 ctx->cond_stack_size = 0; 183 ctx->loop_stack_size = 0; 184 ctx->switch_stack_size = 0; 185 186 if (function_idx == 0) { 187 ctx->ret_mask = mask->ret_mask; 188 } 189 190 ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm, 191 int_type, "looplimiter"); 192 LLVMBuildStore( 193 builder, 194 LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false), 195 ctx->loop_limiter); 196 } 197 198 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) 199 { 200 mask->bld = bld; 201 mask->has_mask = FALSE; 202 mask->ret_in_main = FALSE; 203 /* For the main function */ 204 mask->function_stack_size = 1; 205 206 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type); 207 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = 208 mask->cond_mask = mask->switch_mask = 209 LLVMConstAllOnes(mask->int_vec_type); 210 211 mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS, 212 sizeof(mask->function_stack[0])); 213 lp_exec_mask_function_init(mask, 0); 214 } 215 216 static void 217 lp_exec_mask_fini(struct lp_exec_mask *mask) 218 { 219 FREE(mask->function_stack); 220 } 221 222 static void lp_exec_mask_update(struct lp_exec_mask *mask) 223 { 224 LLVMBuilderRef builder = mask->bld->gallivm->builder; 225 boolean has_loop_mask = mask_has_loop(mask); 226 boolean has_cond_mask = mask_has_cond(mask); 227 boolean has_switch_mask = mask_has_switch(mask); 228 boolean has_ret_mask = mask->function_stack_size > 1 || 229 mask->ret_in_main; 230 231 if (has_loop_mask) { 232 /*for loops we need to update the entire mask at runtime */ 233 LLVMValueRef tmp; 234 assert(mask->break_mask); 235 tmp = LLVMBuildAnd(builder, 236 mask->cont_mask, 237 mask->break_mask, 238 "maskcb"); 239 mask->exec_mask = LLVMBuildAnd(builder, 240 mask->cond_mask, 241 tmp, 242 "maskfull"); 243 } else 244 mask->exec_mask = mask->cond_mask; 245 246 if (has_switch_mask) { 247 mask->exec_mask = LLVMBuildAnd(builder, 248 mask->exec_mask, 249 mask->switch_mask, 250 "switchmask"); 251 } 252 253 if (has_ret_mask) { 254 mask->exec_mask = LLVMBuildAnd(builder, 255 mask->exec_mask, 256 mask->ret_mask, 257 "callmask"); 258 } 259 260 mask->has_mask = (has_cond_mask || 261 has_loop_mask || 262 has_switch_mask || 263 has_ret_mask); 264 } 265 266 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, 267 LLVMValueRef val) 268 { 269 LLVMBuilderRef builder = mask->bld->gallivm->builder; 270 struct function_ctx *ctx = func_ctx(mask); 271 272 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) { 273 ctx->cond_stack_size++; 274 return; 275 } 276 if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) { 277 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type)); 278 } 279 ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask; 280 assert(LLVMTypeOf(val) == mask->int_vec_type); 281 mask->cond_mask = LLVMBuildAnd(builder, 282 mask->cond_mask, 283 val, 284 ""); 285 lp_exec_mask_update(mask); 286 } 287 288 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) 289 { 290 LLVMBuilderRef builder = mask->bld->gallivm->builder; 291 struct function_ctx *ctx = func_ctx(mask); 292 LLVMValueRef prev_mask; 293 LLVMValueRef inv_mask; 294 295 assert(ctx->cond_stack_size); 296 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) 297 return; 298 prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1]; 299 if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) { 300 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type)); 301 } 302 303 inv_mask = LLVMBuildNot(builder, mask->cond_mask, ""); 304 305 mask->cond_mask = LLVMBuildAnd(builder, 306 inv_mask, 307 prev_mask, ""); 308 lp_exec_mask_update(mask); 309 } 310 311 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) 312 { 313 struct function_ctx *ctx = func_ctx(mask); 314 assert(ctx->cond_stack_size); 315 --ctx->cond_stack_size; 316 if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) 317 return; 318 mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size]; 319 lp_exec_mask_update(mask); 320 } 321 322 static void lp_exec_bgnloop(struct lp_exec_mask *mask) 323 { 324 LLVMBuilderRef builder = mask->bld->gallivm->builder; 325 struct function_ctx *ctx = func_ctx(mask); 326 327 if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) { 328 ++ctx->loop_stack_size; 329 return; 330 } 331 332 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] = 333 ctx->break_type; 334 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP; 335 336 ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block; 337 ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask; 338 ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask; 339 ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var; 340 ++ctx->loop_stack_size; 341 342 ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, ""); 343 LLVMBuildStore(builder, mask->break_mask, ctx->break_var); 344 345 ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop"); 346 347 LLVMBuildBr(builder, ctx->loop_block); 348 LLVMPositionBuilderAtEnd(builder, ctx->loop_block); 349 350 mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, ""); 351 352 lp_exec_mask_update(mask); 353 } 354 355 static void lp_exec_break(struct lp_exec_mask *mask, 356 struct lp_build_tgsi_context * bld_base) 357 { 358 LLVMBuilderRef builder = mask->bld->gallivm->builder; 359 struct function_ctx *ctx = func_ctx(mask); 360 361 if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) { 362 LLVMValueRef exec_mask = LLVMBuildNot(builder, 363 mask->exec_mask, 364 "break"); 365 366 mask->break_mask = LLVMBuildAnd(builder, 367 mask->break_mask, 368 exec_mask, "break_full"); 369 } 370 else { 371 unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode; 372 boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH || 373 opcode == TGSI_OPCODE_CASE); 374 375 376 if (ctx->switch_in_default) { 377 /* 378 * stop default execution but only if this is an unconditional switch. 379 * (The condition here is not perfect since dead code after break is 380 * allowed but should be sufficient since false negatives are just 381 * unoptimized - so we don't have to pre-evaluate that). 382 */ 383 if(break_always && ctx->switch_pc) { 384 bld_base->pc = ctx->switch_pc; 385 return; 386 } 387 } 388 389 if (break_always) { 390 mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type); 391 } 392 else { 393 LLVMValueRef exec_mask = LLVMBuildNot(builder, 394 mask->exec_mask, 395 "break"); 396 mask->switch_mask = LLVMBuildAnd(builder, 397 mask->switch_mask, 398 exec_mask, "break_switch"); 399 } 400 } 401 402 lp_exec_mask_update(mask); 403 } 404 405 static void lp_exec_continue(struct lp_exec_mask *mask) 406 { 407 LLVMBuilderRef builder = mask->bld->gallivm->builder; 408 LLVMValueRef exec_mask = LLVMBuildNot(builder, 409 mask->exec_mask, 410 ""); 411 412 mask->cont_mask = LLVMBuildAnd(builder, 413 mask->cont_mask, 414 exec_mask, ""); 415 416 lp_exec_mask_update(mask); 417 } 418 419 420 static void lp_exec_endloop(struct gallivm_state *gallivm, 421 struct lp_exec_mask *mask) 422 { 423 LLVMBuilderRef builder = mask->bld->gallivm->builder; 424 struct function_ctx *ctx = func_ctx(mask); 425 LLVMBasicBlockRef endloop; 426 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context); 427 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context, 428 mask->bld->type.width * 429 mask->bld->type.length); 430 LLVMValueRef i1cond, i2cond, icond, limiter; 431 432 assert(mask->break_mask); 433 434 435 assert(ctx->loop_stack_size); 436 if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) { 437 --ctx->loop_stack_size; 438 return; 439 } 440 441 /* 442 * Restore the cont_mask, but don't pop 443 */ 444 mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask; 445 lp_exec_mask_update(mask); 446 447 /* 448 * Unlike the continue mask, the break_mask must be preserved across loop 449 * iterations 450 */ 451 LLVMBuildStore(builder, mask->break_mask, ctx->break_var); 452 453 /* Decrement the loop limiter */ 454 limiter = LLVMBuildLoad(builder, ctx->loop_limiter, ""); 455 456 limiter = LLVMBuildSub( 457 builder, 458 limiter, 459 LLVMConstInt(int_type, 1, false), 460 ""); 461 462 LLVMBuildStore(builder, limiter, ctx->loop_limiter); 463 464 /* i1cond = (mask != 0) */ 465 i1cond = LLVMBuildICmp( 466 builder, 467 LLVMIntNE, 468 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""), 469 LLVMConstNull(reg_type), "i1cond"); 470 471 /* i2cond = (looplimiter > 0) */ 472 i2cond = LLVMBuildICmp( 473 builder, 474 LLVMIntSGT, 475 limiter, 476 LLVMConstNull(int_type), "i2cond"); 477 478 /* if( i1cond && i2cond ) */ 479 icond = LLVMBuildAnd(builder, i1cond, i2cond, ""); 480 481 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop"); 482 483 LLVMBuildCondBr(builder, 484 icond, ctx->loop_block, endloop); 485 486 LLVMPositionBuilderAtEnd(builder, endloop); 487 488 assert(ctx->loop_stack_size); 489 --ctx->loop_stack_size; 490 mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask; 491 mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask; 492 ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block; 493 ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var; 494 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + 495 ctx->switch_stack_size]; 496 497 lp_exec_mask_update(mask); 498 } 499 500 static void lp_exec_switch(struct lp_exec_mask *mask, 501 LLVMValueRef switchval) 502 { 503 struct function_ctx *ctx = func_ctx(mask); 504 505 if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING || 506 ctx->loop_stack_size > LP_MAX_TGSI_NESTING) { 507 ctx->switch_stack_size++; 508 return; 509 } 510 511 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] = 512 ctx->break_type; 513 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH; 514 515 ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask; 516 ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val; 517 ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default; 518 ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default; 519 ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc; 520 ctx->switch_stack_size++; 521 522 mask->switch_mask = LLVMConstNull(mask->int_vec_type); 523 ctx->switch_val = switchval; 524 ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type); 525 ctx->switch_in_default = false; 526 ctx->switch_pc = 0; 527 528 lp_exec_mask_update(mask); 529 } 530 531 static void lp_exec_endswitch(struct lp_exec_mask *mask, 532 struct lp_build_tgsi_context * bld_base) 533 { 534 LLVMBuilderRef builder = mask->bld->gallivm->builder; 535 struct function_ctx *ctx = func_ctx(mask); 536 537 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) { 538 ctx->switch_stack_size--; 539 return; 540 } 541 542 /* check if there's deferred default if so do it now */ 543 if (ctx->switch_pc && !ctx->switch_in_default) { 544 LLVMValueRef prevmask, defaultmask; 545 unsigned tmp_pc; 546 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask; 547 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask"); 548 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask"); 549 ctx->switch_in_default = true; 550 551 lp_exec_mask_update(mask); 552 553 assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode == 554 TGSI_OPCODE_DEFAULT); 555 556 tmp_pc = bld_base->pc; 557 bld_base->pc = ctx->switch_pc; 558 /* 559 * re-purpose switch_pc to point to here again, since we stop execution of 560 * the deferred default after next break. 561 */ 562 ctx->switch_pc = tmp_pc - 1; 563 564 return; 565 } 566 567 else if (ctx->switch_pc && ctx->switch_in_default) { 568 assert(bld_base->pc == ctx->switch_pc + 1); 569 } 570 571 ctx->switch_stack_size--; 572 mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask; 573 ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val; 574 ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default; 575 ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default; 576 ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc; 577 578 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size]; 579 580 lp_exec_mask_update(mask); 581 } 582 583 static void lp_exec_case(struct lp_exec_mask *mask, 584 LLVMValueRef caseval) 585 { 586 LLVMBuilderRef builder = mask->bld->gallivm->builder; 587 struct function_ctx *ctx = func_ctx(mask); 588 589 LLVMValueRef casemask, prevmask; 590 591 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) { 592 return; 593 } 594 595 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */ 596 if (!ctx->switch_in_default) { 597 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask; 598 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val); 599 ctx->switch_mask_default = LLVMBuildOr(builder, casemask, 600 ctx->switch_mask_default, "sw_default_mask"); 601 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, ""); 602 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask"); 603 604 lp_exec_mask_update(mask); 605 } 606 } 607 608 /* 609 * Analyse default statement in a switch. 610 * \return true if default is last statement, false otherwise 611 * \param default_pc_start contains pc of instruction to jump to 612 * if default wasn't last but there's no 613 * fallthrough into default. 614 */ 615 static boolean default_analyse_is_last(struct lp_exec_mask *mask, 616 struct lp_build_tgsi_context * bld_base, 617 int *default_pc_start) 618 { 619 unsigned pc = bld_base->pc; 620 struct function_ctx *ctx = func_ctx(mask); 621 int curr_switch_stack = ctx->switch_stack_size; 622 623 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) { 624 return false; 625 } 626 627 /* skip over case statements which are together with default */ 628 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) { 629 pc++; 630 } 631 632 while (pc != ~0u && pc < bld_base->num_instructions) { 633 unsigned opcode = bld_base->instructions[pc].Instruction.Opcode; 634 switch (opcode) { 635 case TGSI_OPCODE_CASE: 636 if (curr_switch_stack == ctx->switch_stack_size) { 637 *default_pc_start = pc - 1; 638 return false; 639 } 640 break; 641 case TGSI_OPCODE_SWITCH: 642 curr_switch_stack++; 643 break; 644 case TGSI_OPCODE_ENDSWITCH: 645 if (curr_switch_stack == ctx->switch_stack_size) { 646 *default_pc_start = pc - 1; 647 return true; 648 } 649 curr_switch_stack--; 650 break; 651 } 652 pc++; 653 } 654 /* should never arrive here */ 655 assert(0); 656 return true; 657 } 658 659 static void lp_exec_default(struct lp_exec_mask *mask, 660 struct lp_build_tgsi_context * bld_base) 661 { 662 LLVMBuilderRef builder = mask->bld->gallivm->builder; 663 struct function_ctx *ctx = func_ctx(mask); 664 665 int default_exec_pc; 666 boolean default_is_last; 667 668 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) { 669 return; 670 } 671 672 /* 673 * This is a messy opcode, because it may not be always at the end and 674 * there can be fallthrough in and out of it. 675 */ 676 677 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc); 678 /* 679 * If it is last statement in switch (note that case statements appearing 680 * "at the same time" as default don't change that) everything is just fine, 681 * update switch mask and go on. This means we can handle default with 682 * fallthrough INTO it without overhead, if it is last. 683 */ 684 if (default_is_last) { 685 LLVMValueRef prevmask, defaultmask; 686 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask; 687 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask"); 688 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, ""); 689 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask"); 690 ctx->switch_in_default = true; 691 692 lp_exec_mask_update(mask); 693 } 694 else { 695 /* 696 * Technically, "case" immediately before default isn't really a 697 * fallthrough, however we still have to count them as such as we 698 * already have updated the masks. 699 * If that happens in practice could add a switch optimizer pass 700 * which just gets rid of all case statements appearing together with 701 * default (or could do switch analysis at switch start time instead). 702 */ 703 unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode; 704 boolean ft_into = (opcode != TGSI_OPCODE_BRK && 705 opcode != TGSI_OPCODE_SWITCH); 706 /* 707 * If it is not last statement and there was no fallthrough into it, 708 * we record the PC and continue execution at next case (again, those 709 * case encountered at the same time don't count). At endswitch 710 * time, we update switchmask, and go back executing the code we skipped 711 * until the next break (possibly re-executing some code with changed mask 712 * if there was a fallthrough out of default). 713 * Finally, if it is not last statement and there was a fallthrough into it, 714 * do the same as with the former case, except instead of skipping the code 715 * just execute it without updating the mask, then go back and re-execute. 716 */ 717 ctx->switch_pc = bld_base->pc; 718 if (!ft_into) { 719 bld_base->pc = default_exec_pc; 720 } 721 } 722 } 723 724 725 /* stores val into an address pointed to by dst_ptr. 726 * mask->exec_mask is used to figure out which bits of val 727 * should be stored into the address 728 * (0 means don't store this bit, 1 means do store). 729 */ 730 static void lp_exec_mask_store(struct lp_exec_mask *mask, 731 struct lp_build_context *bld_store, 732 LLVMValueRef val, 733 LLVMValueRef dst_ptr) 734 { 735 LLVMBuilderRef builder = mask->bld->gallivm->builder; 736 LLVMValueRef exec_mask = mask->has_mask ? mask->exec_mask : NULL; 737 738 assert(lp_check_value(bld_store->type, val)); 739 assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind); 740 assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val)); 741 742 if (exec_mask) { 743 LLVMValueRef res, dst; 744 745 dst = LLVMBuildLoad(builder, dst_ptr, ""); 746 res = lp_build_select(bld_store, exec_mask, val, dst); 747 LLVMBuildStore(builder, res, dst_ptr); 748 } else 749 LLVMBuildStore(builder, val, dst_ptr); 750 } 751 752 static void lp_exec_mask_call(struct lp_exec_mask *mask, 753 int func, 754 int *pc) 755 { 756 if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) { 757 return; 758 } 759 760 lp_exec_mask_function_init(mask, mask->function_stack_size); 761 mask->function_stack[mask->function_stack_size].pc = *pc; 762 mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask; 763 mask->function_stack_size++; 764 *pc = func; 765 } 766 767 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc) 768 { 769 LLVMBuilderRef builder = mask->bld->gallivm->builder; 770 struct function_ctx *ctx = func_ctx(mask); 771 LLVMValueRef exec_mask; 772 773 if (ctx->cond_stack_size == 0 && 774 ctx->loop_stack_size == 0 && 775 ctx->switch_stack_size == 0 && 776 mask->function_stack_size == 1) { 777 /* returning from main() */ 778 *pc = -1; 779 return; 780 } 781 782 if (mask->function_stack_size == 1) { 783 /* 784 * This requires special handling since we need to ensure 785 * we don't drop the mask even if we have no call stack 786 * (e.g. after a ret in a if clause after the endif) 787 */ 788 mask->ret_in_main = TRUE; 789 } 790 791 exec_mask = LLVMBuildNot(builder, 792 mask->exec_mask, 793 "ret"); 794 795 mask->ret_mask = LLVMBuildAnd(builder, 796 mask->ret_mask, 797 exec_mask, "ret_full"); 798 799 lp_exec_mask_update(mask); 800 } 801 802 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask) 803 { 804 } 805 806 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc) 807 { 808 struct function_ctx *ctx; 809 810 assert(mask->function_stack_size > 1); 811 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS); 812 813 ctx = func_ctx(mask); 814 mask->function_stack_size--; 815 816 *pc = ctx->pc; 817 mask->ret_mask = ctx->ret_mask; 818 819 lp_exec_mask_update(mask); 820 } 821 822 823 static LLVMValueRef 824 get_file_ptr(struct lp_build_tgsi_soa_context *bld, 825 unsigned file, 826 int index, 827 unsigned chan) 828 { 829 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 830 LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS]; 831 LLVMValueRef var_of_array; 832 833 switch (file) { 834 case TGSI_FILE_TEMPORARY: 835 array_of_vars = bld->temps; 836 var_of_array = bld->temps_array; 837 break; 838 case TGSI_FILE_OUTPUT: 839 array_of_vars = bld->outputs; 840 var_of_array = bld->outputs_array; 841 break; 842 default: 843 assert(0); 844 return NULL; 845 } 846 847 assert(chan < 4); 848 849 if (bld->indirect_files & (1 << file)) { 850 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan); 851 return LLVMBuildGEP(builder, var_of_array, &lindex, 1, ""); 852 } 853 else { 854 assert(index <= bld->bld_base.info->file_max[file]); 855 return array_of_vars[index][chan]; 856 } 857 } 858 859 860 /** 861 * Return pointer to a temporary register channel (src or dest). 862 * Note that indirect addressing cannot be handled here. 863 * \param index which temporary register 864 * \param chan which channel of the temp register. 865 */ 866 LLVMValueRef 867 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld, 868 unsigned index, 869 unsigned chan) 870 { 871 return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan); 872 } 873 874 /** 875 * Return pointer to a output register channel (src or dest). 876 * Note that indirect addressing cannot be handled here. 877 * \param index which output register 878 * \param chan which channel of the output register. 879 */ 880 LLVMValueRef 881 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld, 882 unsigned index, 883 unsigned chan) 884 { 885 return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan); 886 } 887 888 /* 889 * If we have indirect addressing in outputs copy our alloca array 890 * to the outputs slots specified by the caller to make sure 891 * our outputs are delivered consistently via the same interface. 892 */ 893 static void 894 gather_outputs(struct lp_build_tgsi_soa_context * bld) 895 { 896 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { 897 unsigned index, chan; 898 assert(bld->bld_base.info->num_outputs <= 899 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1); 900 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) { 901 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { 902 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan); 903 } 904 } 905 } 906 } 907 908 /** 909 * Gather vector. 910 * XXX the lp_build_gather() function should be capable of doing this 911 * with a little work. 912 */ 913 static LLVMValueRef 914 build_gather(struct lp_build_tgsi_context *bld_base, 915 LLVMValueRef base_ptr, 916 LLVMValueRef indexes, 917 LLVMValueRef overflow_mask, 918 LLVMValueRef indexes2) 919 { 920 struct gallivm_state *gallivm = bld_base->base.gallivm; 921 LLVMBuilderRef builder = gallivm->builder; 922 struct lp_build_context *uint_bld = &bld_base->uint_bld; 923 struct lp_build_context *bld = &bld_base->base; 924 LLVMValueRef res; 925 unsigned i; 926 927 if (indexes2) 928 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2)); 929 else 930 res = bld->undef; 931 /* 932 * overflow_mask is a vector telling us which channels 933 * in the vector overflowed. We use the overflow behavior for 934 * constant buffers which is defined as: 935 * Out of bounds access to constant buffer returns 0 in all 936 * components. Out of bounds behavior is always with respect 937 * to the size of the buffer bound at that slot. 938 */ 939 940 if (overflow_mask) { 941 /* 942 * We avoid per-element control flow here (also due to llvm going crazy, 943 * though I suspect it's better anyway since overflow is likely rare). 944 * Note that since we still fetch from buffers even if num_elements was 945 * zero (in this case we'll fetch from index zero) the jit func callers 946 * MUST provide valid fake constant buffers of size 4x32 (the values do 947 * not matter), otherwise we'd still need (not per element though) 948 * control flow. 949 */ 950 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes); 951 if (indexes2) 952 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2); 953 } 954 955 /* 956 * Loop over elements of index_vec, load scalar value, insert it into 'res'. 957 */ 958 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) { 959 LLVMValueRef si, di; 960 LLVMValueRef index; 961 LLVMValueRef scalar_ptr, scalar; 962 963 di = lp_build_const_int32(bld->gallivm, i); 964 if (indexes2) 965 si = lp_build_const_int32(bld->gallivm, i >> 1); 966 else 967 si = di; 968 969 if (indexes2 && (i & 1)) { 970 index = LLVMBuildExtractElement(builder, 971 indexes2, si, ""); 972 } else { 973 index = LLVMBuildExtractElement(builder, 974 indexes, si, ""); 975 } 976 scalar_ptr = LLVMBuildGEP(builder, base_ptr, 977 &index, 1, "gather_ptr"); 978 scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 979 980 res = LLVMBuildInsertElement(builder, res, scalar, di, ""); 981 } 982 983 if (overflow_mask) { 984 if (indexes2) { 985 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, ""); 986 overflow_mask = LLVMBuildSExt(builder, overflow_mask, 987 bld_base->dbl_bld.int_vec_type, ""); 988 res = lp_build_select(&bld_base->dbl_bld, overflow_mask, 989 bld_base->dbl_bld.zero, res); 990 } else 991 res = lp_build_select(bld, overflow_mask, bld->zero, res); 992 } 993 994 return res; 995 } 996 997 998 /** 999 * Scatter/store vector. 1000 */ 1001 static void 1002 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld, 1003 LLVMValueRef base_ptr, 1004 LLVMValueRef indexes, 1005 LLVMValueRef values, 1006 struct lp_exec_mask *mask) 1007 { 1008 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1009 LLVMBuilderRef builder = gallivm->builder; 1010 unsigned i; 1011 LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL; 1012 1013 /* 1014 * Loop over elements of index_vec, store scalar value. 1015 */ 1016 for (i = 0; i < bld->bld_base.base.type.length; i++) { 1017 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 1018 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, ""); 1019 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr"); 1020 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val"); 1021 LLVMValueRef scalar_pred = pred ? 1022 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL; 1023 1024 if (0) 1025 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n", 1026 ii, val, index, scalar_ptr); 1027 1028 if (scalar_pred) { 1029 LLVMValueRef real_val, dst_val; 1030 dst_val = LLVMBuildLoad(builder, scalar_ptr, ""); 1031 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val); 1032 LLVMBuildStore(builder, real_val, scalar_ptr); 1033 } 1034 else { 1035 LLVMBuildStore(builder, val, scalar_ptr); 1036 } 1037 } 1038 } 1039 1040 1041 /** 1042 * Read the current value of the ADDR register, convert the floats to 1043 * ints, add the base index and return the vector of offsets. 1044 * The offsets will be used to index into the constant buffer or 1045 * temporary register file. 1046 */ 1047 static LLVMValueRef 1048 get_indirect_index(struct lp_build_tgsi_soa_context *bld, 1049 unsigned reg_file, unsigned reg_index, 1050 const struct tgsi_ind_register *indirect_reg) 1051 { 1052 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 1053 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld; 1054 /* always use X component of address register */ 1055 unsigned swizzle = indirect_reg->Swizzle; 1056 LLVMValueRef base; 1057 LLVMValueRef rel; 1058 LLVMValueRef max_index; 1059 LLVMValueRef index; 1060 1061 assert(bld->indirect_files & (1 << reg_file)); 1062 1063 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index); 1064 1065 assert(swizzle < 4); 1066 switch (indirect_reg->File) { 1067 case TGSI_FILE_ADDRESS: 1068 rel = LLVMBuildLoad(builder, 1069 bld->addr[indirect_reg->Index][swizzle], 1070 "load addr reg"); 1071 /* ADDR LLVM values already have LLVM integer type. */ 1072 break; 1073 case TGSI_FILE_TEMPORARY: 1074 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle); 1075 rel = LLVMBuildLoad(builder, rel, "load temp reg"); 1076 /* TEMP LLVM values always have LLVM float type, but for indirection, the 1077 * value actually stored is expected to be an integer */ 1078 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, ""); 1079 break; 1080 default: 1081 assert(0); 1082 rel = uint_bld->zero; 1083 } 1084 1085 index = lp_build_add(uint_bld, base, rel); 1086 1087 /* 1088 * emit_fetch_constant handles constant buffer overflow so this code 1089 * is pointless for them. 1090 * Furthermore the D3D10 spec in section 6.5 says: 1091 * If the constant buffer bound to a slot is larger than the size 1092 * declared in the shader for that slot, implementations are allowed 1093 * to return incorrect data (not necessarily 0) for indices that are 1094 * larger than the declared size but smaller than the buffer size. 1095 */ 1096 if (reg_file != TGSI_FILE_CONSTANT) { 1097 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm, 1098 uint_bld->type, 1099 bld->bld_base.info->file_max[reg_file]); 1100 1101 assert(!uint_bld->type.sign); 1102 index = lp_build_min(uint_bld, index, max_index); 1103 } 1104 1105 return index; 1106 } 1107 1108 static struct lp_build_context * 1109 stype_to_fetch(struct lp_build_tgsi_context * bld_base, 1110 enum tgsi_opcode_type stype) 1111 { 1112 struct lp_build_context *bld_fetch; 1113 1114 switch (stype) { 1115 case TGSI_TYPE_FLOAT: 1116 case TGSI_TYPE_UNTYPED: 1117 bld_fetch = &bld_base->base; 1118 break; 1119 case TGSI_TYPE_UNSIGNED: 1120 bld_fetch = &bld_base->uint_bld; 1121 break; 1122 case TGSI_TYPE_SIGNED: 1123 bld_fetch = &bld_base->int_bld; 1124 break; 1125 case TGSI_TYPE_DOUBLE: 1126 bld_fetch = &bld_base->dbl_bld; 1127 break; 1128 case TGSI_TYPE_UNSIGNED64: 1129 bld_fetch = &bld_base->uint64_bld; 1130 break; 1131 case TGSI_TYPE_SIGNED64: 1132 bld_fetch = &bld_base->int64_bld; 1133 break; 1134 case TGSI_TYPE_VOID: 1135 default: 1136 assert(0); 1137 bld_fetch = NULL; 1138 break; 1139 } 1140 return bld_fetch; 1141 } 1142 1143 static LLVMValueRef 1144 get_soa_array_offsets(struct lp_build_context *uint_bld, 1145 LLVMValueRef indirect_index, 1146 unsigned chan_index, 1147 boolean need_perelement_offset) 1148 { 1149 struct gallivm_state *gallivm = uint_bld->gallivm; 1150 LLVMValueRef chan_vec = 1151 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index); 1152 LLVMValueRef length_vec = 1153 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length); 1154 LLVMValueRef index_vec; 1155 1156 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ 1157 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 1158 index_vec = lp_build_add(uint_bld, index_vec, chan_vec); 1159 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 1160 1161 if (need_perelement_offset) { 1162 LLVMValueRef pixel_offsets; 1163 unsigned i; 1164 /* build pixel offset vector: {0, 1, 2, 3, ...} */ 1165 pixel_offsets = uint_bld->undef; 1166 for (i = 0; i < uint_bld->type.length; i++) { 1167 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 1168 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets, 1169 ii, ii, ""); 1170 } 1171 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); 1172 } 1173 return index_vec; 1174 } 1175 1176 static LLVMValueRef 1177 emit_fetch_constant( 1178 struct lp_build_tgsi_context * bld_base, 1179 const struct tgsi_full_src_register * reg, 1180 enum tgsi_opcode_type stype, 1181 unsigned swizzle) 1182 { 1183 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1184 struct gallivm_state *gallivm = bld_base->base.gallivm; 1185 LLVMBuilderRef builder = gallivm->builder; 1186 struct lp_build_context *uint_bld = &bld_base->uint_bld; 1187 unsigned dimension = 0; 1188 LLVMValueRef consts_ptr; 1189 LLVMValueRef num_consts; 1190 LLVMValueRef res; 1191 1192 /* XXX: Handle fetching xyzw components as a vector */ 1193 assert(swizzle != ~0u); 1194 1195 if (reg->Register.Dimension) { 1196 assert(!reg->Dimension.Indirect); 1197 dimension = reg->Dimension.Index; 1198 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS); 1199 } 1200 1201 consts_ptr = bld->consts[dimension]; 1202 num_consts = bld->consts_sizes[dimension]; 1203 1204 if (reg->Register.Indirect) { 1205 LLVMValueRef indirect_index; 1206 LLVMValueRef swizzle_vec = 1207 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle); 1208 LLVMValueRef index_vec; /* index into the const buffer */ 1209 LLVMValueRef overflow_mask; 1210 LLVMValueRef index_vec2 = NULL; 1211 1212 indirect_index = get_indirect_index(bld, 1213 reg->Register.File, 1214 reg->Register.Index, 1215 ®->Indirect); 1216 1217 /* All fetches are from the same constant buffer, so 1218 * we need to propagate the size to a vector to do a 1219 * vector comparison */ 1220 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts); 1221 /* Construct a boolean vector telling us which channels 1222 * overflow the bound constant buffer */ 1223 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL, 1224 indirect_index, num_consts); 1225 1226 /* index_vec = indirect_index * 4 + swizzle */ 1227 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 1228 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 1229 1230 if (tgsi_type_is_64bit(stype)) { 1231 LLVMValueRef swizzle_vec2; 1232 swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle + 1); 1233 index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2); 1234 index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2); 1235 } 1236 /* Gather values from the constant buffer */ 1237 res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2); 1238 } 1239 else { 1240 LLVMValueRef index; /* index into the const buffer */ 1241 LLVMValueRef scalar, scalar_ptr; 1242 struct lp_build_context *bld_broad = &bld_base->base; 1243 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle); 1244 1245 scalar_ptr = LLVMBuildGEP(builder, consts_ptr, 1246 &index, 1, ""); 1247 if (stype == TGSI_TYPE_DOUBLE) { 1248 LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0); 1249 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, ""); 1250 bld_broad = &bld_base->dbl_bld; 1251 } else if (stype == TGSI_TYPE_UNSIGNED64) { 1252 LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0); 1253 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, ""); 1254 bld_broad = &bld_base->uint64_bld; 1255 } else if (stype == TGSI_TYPE_SIGNED64) { 1256 LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0); 1257 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, ""); 1258 bld_broad = &bld_base->int64_bld; 1259 } 1260 scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 1261 res = lp_build_broadcast_scalar(bld_broad, scalar); 1262 } 1263 1264 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) { 1265 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); 1266 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); 1267 } 1268 1269 return res; 1270 } 1271 1272 /** 1273 * Fetch 64-bit values from two separate channels. 1274 * 64-bit values are stored split across two channels, like xy and zw. 1275 * This function creates a set of vec_length*2 floats, 1276 * extracts the values from the two channels, 1277 * puts them in the correct place, then casts to vec_length 64-bits. 1278 */ 1279 static LLVMValueRef 1280 emit_fetch_64bit( 1281 struct lp_build_tgsi_context * bld_base, 1282 enum tgsi_opcode_type stype, 1283 LLVMValueRef input, 1284 LLVMValueRef input2) 1285 { 1286 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1287 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1288 LLVMBuilderRef builder = gallivm->builder; 1289 LLVMValueRef res; 1290 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); 1291 int i; 1292 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)]; 1293 int len = bld_base->base.type.length * 2; 1294 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32))); 1295 1296 for (i = 0; i < bld_base->base.type.length * 2; i+=2) { 1297 shuffles[i] = lp_build_const_int32(gallivm, i / 2); 1298 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length); 1299 } 1300 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), ""); 1301 1302 return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); 1303 } 1304 1305 static LLVMValueRef 1306 emit_fetch_immediate( 1307 struct lp_build_tgsi_context * bld_base, 1308 const struct tgsi_full_src_register * reg, 1309 enum tgsi_opcode_type stype, 1310 unsigned swizzle) 1311 { 1312 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1313 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1314 LLVMBuilderRef builder = gallivm->builder; 1315 LLVMValueRef res = NULL; 1316 1317 if (bld->use_immediates_array || reg->Register.Indirect) { 1318 LLVMValueRef imms_array; 1319 LLVMTypeRef fptr_type; 1320 1321 /* cast imms_array pointer to float* */ 1322 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1323 imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, ""); 1324 1325 if (reg->Register.Indirect) { 1326 LLVMValueRef indirect_index; 1327 LLVMValueRef index_vec; /* index into the immediate register array */ 1328 LLVMValueRef index_vec2 = NULL; 1329 indirect_index = get_indirect_index(bld, 1330 reg->Register.File, 1331 reg->Register.Index, 1332 ®->Indirect); 1333 /* 1334 * Unlike for other reg classes, adding pixel offsets is unnecessary - 1335 * immediates are stored as full vectors (FIXME??? - might be better 1336 * to store them the same as constants) but all elements are the same 1337 * in any case. 1338 */ 1339 index_vec = get_soa_array_offsets(&bld_base->uint_bld, 1340 indirect_index, 1341 swizzle, 1342 FALSE); 1343 if (tgsi_type_is_64bit(stype)) 1344 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld, 1345 indirect_index, 1346 swizzle + 1, 1347 FALSE); 1348 /* Gather values from the immediate register array */ 1349 res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2); 1350 } else { 1351 LLVMValueRef lindex = lp_build_const_int32(gallivm, 1352 reg->Register.Index * 4 + swizzle); 1353 LLVMValueRef imms_ptr = LLVMBuildGEP(builder, 1354 bld->imms_array, &lindex, 1, ""); 1355 res = LLVMBuildLoad(builder, imms_ptr, ""); 1356 1357 if (tgsi_type_is_64bit(stype)) { 1358 LLVMValueRef lindex1; 1359 LLVMValueRef imms_ptr2; 1360 LLVMValueRef res2; 1361 1362 lindex1 = lp_build_const_int32(gallivm, 1363 reg->Register.Index * 4 + swizzle + 1); 1364 imms_ptr2 = LLVMBuildGEP(builder, 1365 bld->imms_array, &lindex1, 1, ""); 1366 res2 = LLVMBuildLoad(builder, imms_ptr2, ""); 1367 res = emit_fetch_64bit(bld_base, stype, res, res2); 1368 } 1369 } 1370 } 1371 else { 1372 res = bld->immediates[reg->Register.Index][swizzle]; 1373 if (tgsi_type_is_64bit(stype)) 1374 res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle + 1]); 1375 } 1376 1377 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) { 1378 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); 1379 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); 1380 } 1381 return res; 1382 } 1383 1384 static LLVMValueRef 1385 emit_fetch_input( 1386 struct lp_build_tgsi_context * bld_base, 1387 const struct tgsi_full_src_register * reg, 1388 enum tgsi_opcode_type stype, 1389 unsigned swizzle) 1390 { 1391 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1392 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1393 LLVMBuilderRef builder = gallivm->builder; 1394 LLVMValueRef res; 1395 1396 if (reg->Register.Indirect) { 1397 LLVMValueRef indirect_index; 1398 LLVMValueRef index_vec; /* index into the input reg array */ 1399 LLVMValueRef index_vec2 = NULL; 1400 LLVMValueRef inputs_array; 1401 LLVMTypeRef fptr_type; 1402 1403 indirect_index = get_indirect_index(bld, 1404 reg->Register.File, 1405 reg->Register.Index, 1406 ®->Indirect); 1407 1408 index_vec = get_soa_array_offsets(&bld_base->uint_bld, 1409 indirect_index, 1410 swizzle, 1411 TRUE); 1412 if (tgsi_type_is_64bit(stype)) { 1413 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld, 1414 indirect_index, 1415 swizzle + 1, 1416 TRUE); 1417 } 1418 /* cast inputs_array pointer to float* */ 1419 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1420 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, ""); 1421 1422 /* Gather values from the input register array */ 1423 res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2); 1424 } else { 1425 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { 1426 LLVMValueRef lindex = lp_build_const_int32(gallivm, 1427 reg->Register.Index * 4 + swizzle); 1428 LLVMValueRef input_ptr = LLVMBuildGEP(builder, 1429 bld->inputs_array, &lindex, 1, ""); 1430 1431 res = LLVMBuildLoad(builder, input_ptr, ""); 1432 if (tgsi_type_is_64bit(stype)) { 1433 LLVMValueRef lindex1; 1434 LLVMValueRef input_ptr2; 1435 LLVMValueRef res2; 1436 1437 lindex1 = lp_build_const_int32(gallivm, 1438 reg->Register.Index * 4 + swizzle + 1); 1439 input_ptr2 = LLVMBuildGEP(builder, 1440 bld->inputs_array, &lindex1, 1, ""); 1441 res2 = LLVMBuildLoad(builder, input_ptr2, ""); 1442 res = emit_fetch_64bit(bld_base, stype, res, res2); 1443 } 1444 } 1445 else { 1446 res = bld->inputs[reg->Register.Index][swizzle]; 1447 if (tgsi_type_is_64bit(stype)) 1448 res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle + 1]); 1449 } 1450 } 1451 1452 assert(res); 1453 1454 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) { 1455 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); 1456 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); 1457 } 1458 1459 return res; 1460 } 1461 1462 1463 static LLVMValueRef 1464 emit_fetch_gs_input( 1465 struct lp_build_tgsi_context * bld_base, 1466 const struct tgsi_full_src_register * reg, 1467 enum tgsi_opcode_type stype, 1468 unsigned swizzle) 1469 { 1470 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1471 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1472 const struct tgsi_shader_info *info = bld->bld_base.info; 1473 LLVMBuilderRef builder = gallivm->builder; 1474 LLVMValueRef attrib_index = NULL; 1475 LLVMValueRef vertex_index = NULL; 1476 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle); 1477 LLVMValueRef res; 1478 1479 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) { 1480 /* This is really a system value not a regular input */ 1481 assert(!reg->Register.Indirect); 1482 assert(!reg->Dimension.Indirect); 1483 res = bld->system_values.prim_id; 1484 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) { 1485 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, ""); 1486 } 1487 return res; 1488 } 1489 1490 if (reg->Register.Indirect) { 1491 attrib_index = get_indirect_index(bld, 1492 reg->Register.File, 1493 reg->Register.Index, 1494 ®->Indirect); 1495 } else { 1496 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index); 1497 } 1498 1499 if (reg->Dimension.Indirect) { 1500 vertex_index = get_indirect_index(bld, 1501 reg->Register.File, 1502 reg->Dimension.Index, 1503 ®->DimIndirect); 1504 } else { 1505 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index); 1506 } 1507 1508 res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base, 1509 reg->Dimension.Indirect, 1510 vertex_index, 1511 reg->Register.Indirect, 1512 attrib_index, 1513 swizzle_index); 1514 1515 assert(res); 1516 if (tgsi_type_is_64bit(stype)) { 1517 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle + 1); 1518 LLVMValueRef res2; 1519 res2 = bld->gs_iface->fetch_input(bld->gs_iface, bld_base, 1520 reg->Dimension.Indirect, 1521 vertex_index, 1522 reg->Register.Indirect, 1523 attrib_index, 1524 swizzle_index); 1525 assert(res2); 1526 res = emit_fetch_64bit(bld_base, stype, res, res2); 1527 } else if (stype == TGSI_TYPE_UNSIGNED) { 1528 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, ""); 1529 } else if (stype == TGSI_TYPE_SIGNED) { 1530 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, ""); 1531 } 1532 1533 return res; 1534 } 1535 1536 static LLVMValueRef 1537 emit_fetch_temporary( 1538 struct lp_build_tgsi_context * bld_base, 1539 const struct tgsi_full_src_register * reg, 1540 enum tgsi_opcode_type stype, 1541 unsigned swizzle) 1542 { 1543 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1544 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1545 LLVMBuilderRef builder = gallivm->builder; 1546 LLVMValueRef res; 1547 1548 if (reg->Register.Indirect) { 1549 LLVMValueRef indirect_index; 1550 LLVMValueRef index_vec, index_vec2 = NULL; /* index into the temp reg array */ 1551 LLVMValueRef temps_array; 1552 LLVMTypeRef fptr_type; 1553 1554 indirect_index = get_indirect_index(bld, 1555 reg->Register.File, 1556 reg->Register.Index, 1557 ®->Indirect); 1558 1559 index_vec = get_soa_array_offsets(&bld_base->uint_bld, 1560 indirect_index, 1561 swizzle, 1562 TRUE); 1563 if (tgsi_type_is_64bit(stype)) { 1564 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld, 1565 indirect_index, 1566 swizzle + 1, 1567 TRUE); 1568 } 1569 1570 /* cast temps_array pointer to float* */ 1571 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1572 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, ""); 1573 1574 /* Gather values from the temporary register array */ 1575 res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2); 1576 } 1577 else { 1578 LLVMValueRef temp_ptr; 1579 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle); 1580 res = LLVMBuildLoad(builder, temp_ptr, ""); 1581 1582 if (tgsi_type_is_64bit(stype)) { 1583 LLVMValueRef temp_ptr2, res2; 1584 1585 temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle + 1); 1586 res2 = LLVMBuildLoad(builder, temp_ptr2, ""); 1587 res = emit_fetch_64bit(bld_base, stype, res, res2); 1588 } 1589 } 1590 1591 if (stype == TGSI_TYPE_SIGNED || 1592 stype == TGSI_TYPE_UNSIGNED || 1593 stype == TGSI_TYPE_DOUBLE || 1594 stype == TGSI_TYPE_SIGNED64 || 1595 stype == TGSI_TYPE_UNSIGNED64) { 1596 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); 1597 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); 1598 } 1599 1600 return res; 1601 } 1602 1603 static LLVMValueRef 1604 emit_fetch_system_value( 1605 struct lp_build_tgsi_context * bld_base, 1606 const struct tgsi_full_src_register * reg, 1607 enum tgsi_opcode_type stype, 1608 unsigned swizzle) 1609 { 1610 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1611 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1612 const struct tgsi_shader_info *info = bld->bld_base.info; 1613 LLVMBuilderRef builder = gallivm->builder; 1614 LLVMValueRef res; 1615 enum tgsi_opcode_type atype; // Actual type of the value 1616 1617 assert(!reg->Register.Indirect); 1618 1619 switch (info->system_value_semantic_name[reg->Register.Index]) { 1620 case TGSI_SEMANTIC_INSTANCEID: 1621 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id); 1622 atype = TGSI_TYPE_UNSIGNED; 1623 break; 1624 1625 case TGSI_SEMANTIC_VERTEXID: 1626 res = bld->system_values.vertex_id; 1627 atype = TGSI_TYPE_UNSIGNED; 1628 break; 1629 1630 case TGSI_SEMANTIC_VERTEXID_NOBASE: 1631 res = bld->system_values.vertex_id_nobase; 1632 atype = TGSI_TYPE_UNSIGNED; 1633 break; 1634 1635 case TGSI_SEMANTIC_BASEVERTEX: 1636 res = bld->system_values.basevertex; 1637 atype = TGSI_TYPE_UNSIGNED; 1638 break; 1639 1640 case TGSI_SEMANTIC_PRIMID: 1641 res = bld->system_values.prim_id; 1642 atype = TGSI_TYPE_UNSIGNED; 1643 break; 1644 1645 case TGSI_SEMANTIC_INVOCATIONID: 1646 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id); 1647 atype = TGSI_TYPE_UNSIGNED; 1648 break; 1649 1650 default: 1651 assert(!"unexpected semantic in emit_fetch_system_value"); 1652 res = bld_base->base.zero; 1653 atype = TGSI_TYPE_FLOAT; 1654 break; 1655 } 1656 1657 if (atype != stype) { 1658 if (stype == TGSI_TYPE_FLOAT) { 1659 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, ""); 1660 } else if (stype == TGSI_TYPE_UNSIGNED) { 1661 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, ""); 1662 } else if (stype == TGSI_TYPE_SIGNED) { 1663 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, ""); 1664 } 1665 } 1666 1667 return res; 1668 } 1669 1670 /** 1671 * Register fetch with derivatives. 1672 */ 1673 static void 1674 emit_fetch_deriv( 1675 struct lp_build_tgsi_soa_context *bld, 1676 LLVMValueRef src, 1677 LLVMValueRef *res, 1678 LLVMValueRef *ddx, 1679 LLVMValueRef *ddy) 1680 { 1681 if (res) 1682 *res = src; 1683 1684 /* TODO: use interpolation coeffs for inputs */ 1685 1686 if (ddx) 1687 *ddx = lp_build_ddx(&bld->bld_base.base, src); 1688 1689 if (ddy) 1690 *ddy = lp_build_ddy(&bld->bld_base.base, src); 1691 } 1692 1693 /** 1694 * store an array of vec-length 64-bit into two arrays of vec_length floats 1695 * i.e. 1696 * value is d0, d1, d2, d3 etc. 1697 * each 64-bit has high and low pieces x, y 1698 * so gets stored into the separate channels as: 1699 * chan_ptr = d0.x, d1.x, d2.x, d3.x 1700 * chan_ptr2 = d0.y, d1.y, d2.y, d3.y 1701 */ 1702 static void 1703 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base, 1704 LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2, 1705 LLVMValueRef value) 1706 { 1707 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1708 struct gallivm_state *gallivm = bld_base->base.gallivm; 1709 LLVMBuilderRef builder = gallivm->builder; 1710 struct lp_build_context *float_bld = &bld_base->base; 1711 unsigned i; 1712 LLVMValueRef temp, temp2; 1713 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32]; 1714 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32]; 1715 1716 for (i = 0; i < bld_base->base.type.length; i++) { 1717 shuffles[i] = lp_build_const_int32(gallivm, i * 2); 1718 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1); 1719 } 1720 1721 temp = LLVMBuildShuffleVector(builder, value, 1722 LLVMGetUndef(LLVMTypeOf(value)), 1723 LLVMConstVector(shuffles, 1724 bld_base->base.type.length), 1725 ""); 1726 temp2 = LLVMBuildShuffleVector(builder, value, 1727 LLVMGetUndef(LLVMTypeOf(value)), 1728 LLVMConstVector(shuffles2, 1729 bld_base->base.type.length), 1730 ""); 1731 1732 lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr); 1733 lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2); 1734 } 1735 1736 /** 1737 * Register store. 1738 */ 1739 static void 1740 emit_store_chan( 1741 struct lp_build_tgsi_context *bld_base, 1742 const struct tgsi_full_instruction *inst, 1743 unsigned index, 1744 unsigned chan_index, 1745 LLVMValueRef value) 1746 { 1747 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1748 struct gallivm_state *gallivm = bld_base->base.gallivm; 1749 LLVMBuilderRef builder = gallivm->builder; 1750 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 1751 struct lp_build_context *float_bld = &bld_base->base; 1752 struct lp_build_context *int_bld = &bld_base->int_bld; 1753 LLVMValueRef indirect_index = NULL; 1754 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index); 1755 1756 /* 1757 * Apply saturation. 1758 * 1759 * It is always assumed to be float. 1760 */ 1761 if (inst->Instruction.Saturate) { 1762 assert(dtype == TGSI_TYPE_FLOAT || 1763 dtype == TGSI_TYPE_UNTYPED); 1764 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); 1765 value = lp_build_clamp_zero_one_nanzero(float_bld, value); 1766 } 1767 1768 if (reg->Register.Indirect) { 1769 /* 1770 * Currently the mesa/st doesn't generate indirect stores 1771 * to 64-bit values, it normally uses MOV to do indirect stores. 1772 */ 1773 assert(!tgsi_type_is_64bit(dtype)); 1774 indirect_index = get_indirect_index(bld, 1775 reg->Register.File, 1776 reg->Register.Index, 1777 ®->Indirect); 1778 } else { 1779 assert(reg->Register.Index <= 1780 bld_base->info->file_max[reg->Register.File]); 1781 } 1782 1783 if (DEBUG_EXECUTION) { 1784 emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value); 1785 } 1786 1787 switch( reg->Register.File ) { 1788 case TGSI_FILE_OUTPUT: 1789 /* Outputs are always stored as floats */ 1790 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); 1791 1792 if (reg->Register.Indirect) { 1793 LLVMValueRef index_vec; /* indexes into the output registers */ 1794 LLVMValueRef outputs_array; 1795 LLVMTypeRef fptr_type; 1796 1797 index_vec = get_soa_array_offsets(&bld_base->uint_bld, 1798 indirect_index, 1799 chan_index, 1800 TRUE); 1801 1802 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1803 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, ""); 1804 1805 /* Scatter store values into output registers */ 1806 emit_mask_scatter(bld, outputs_array, index_vec, value, 1807 &bld->exec_mask); 1808 } 1809 else { 1810 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index, 1811 chan_index); 1812 1813 if (tgsi_type_is_64bit(dtype)) { 1814 LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index, 1815 chan_index + 1); 1816 emit_store_64bit_chan(bld_base, out_ptr, out_ptr2, 1817 value); 1818 } else 1819 lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr); 1820 } 1821 break; 1822 1823 case TGSI_FILE_TEMPORARY: 1824 /* Temporaries are always stored as floats */ 1825 if (!tgsi_type_is_64bit(dtype)) 1826 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); 1827 else 1828 value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), ""); 1829 1830 if (reg->Register.Indirect) { 1831 LLVMValueRef index_vec; /* indexes into the temp registers */ 1832 LLVMValueRef temps_array; 1833 LLVMTypeRef fptr_type; 1834 1835 index_vec = get_soa_array_offsets(&bld_base->uint_bld, 1836 indirect_index, 1837 chan_index, 1838 TRUE); 1839 1840 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1841 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, ""); 1842 1843 /* Scatter store values into temp registers */ 1844 emit_mask_scatter(bld, temps_array, index_vec, value, 1845 &bld->exec_mask); 1846 } 1847 else { 1848 LLVMValueRef temp_ptr; 1849 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index); 1850 1851 if (tgsi_type_is_64bit(dtype)) { 1852 LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld, 1853 reg->Register.Index, 1854 chan_index + 1); 1855 emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2, 1856 value); 1857 } 1858 else 1859 lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr); 1860 } 1861 break; 1862 1863 case TGSI_FILE_ADDRESS: 1864 assert(dtype == TGSI_TYPE_SIGNED); 1865 assert(LLVMTypeOf(value) == int_bld->vec_type); 1866 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, ""); 1867 lp_exec_mask_store(&bld->exec_mask, int_bld, value, 1868 bld->addr[reg->Register.Index][chan_index]); 1869 break; 1870 1871 default: 1872 assert( 0 ); 1873 } 1874 1875 (void)dtype; 1876 } 1877 1878 /* 1879 * Called at the beginning of the translation of each TGSI instruction, to 1880 * emit some debug code. 1881 */ 1882 static void 1883 emit_debug( 1884 struct lp_build_tgsi_context * bld_base, 1885 const struct tgsi_full_instruction * inst, 1886 const struct tgsi_opcode_info * info) 1887 1888 { 1889 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1890 1891 if (DEBUG_EXECUTION) { 1892 /* 1893 * Dump the TGSI instruction. 1894 */ 1895 1896 struct gallivm_state *gallivm = bld_base->base.gallivm; 1897 char buf[512]; 1898 buf[0] = '$'; 1899 buf[1] = ' '; 1900 tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2); 1901 lp_build_printf(gallivm, buf); 1902 1903 /* Dump the execution mask. 1904 */ 1905 if (bld->exec_mask.has_mask) { 1906 lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask); 1907 } 1908 } 1909 } 1910 1911 static void 1912 emit_store( 1913 struct lp_build_tgsi_context * bld_base, 1914 const struct tgsi_full_instruction * inst, 1915 const struct tgsi_opcode_info * info, 1916 unsigned index, 1917 LLVMValueRef dst[4]) 1918 1919 { 1920 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index); 1921 1922 unsigned writemask = inst->Dst[index].Register.WriteMask; 1923 while (writemask) { 1924 unsigned chan_index = u_bit_scan(&writemask); 1925 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3)) 1926 continue; 1927 emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]); 1928 } 1929 } 1930 1931 static unsigned 1932 tgsi_to_pipe_tex_target(unsigned tgsi_target) 1933 { 1934 switch (tgsi_target) { 1935 case TGSI_TEXTURE_BUFFER: 1936 return PIPE_BUFFER; 1937 case TGSI_TEXTURE_1D: 1938 case TGSI_TEXTURE_SHADOW1D: 1939 return PIPE_TEXTURE_1D; 1940 case TGSI_TEXTURE_2D: 1941 case TGSI_TEXTURE_SHADOW2D: 1942 case TGSI_TEXTURE_2D_MSAA: 1943 return PIPE_TEXTURE_2D; 1944 case TGSI_TEXTURE_3D: 1945 return PIPE_TEXTURE_3D; 1946 case TGSI_TEXTURE_CUBE: 1947 case TGSI_TEXTURE_SHADOWCUBE: 1948 return PIPE_TEXTURE_CUBE; 1949 case TGSI_TEXTURE_RECT: 1950 case TGSI_TEXTURE_SHADOWRECT: 1951 return PIPE_TEXTURE_RECT; 1952 case TGSI_TEXTURE_1D_ARRAY: 1953 case TGSI_TEXTURE_SHADOW1D_ARRAY: 1954 return PIPE_TEXTURE_1D_ARRAY; 1955 case TGSI_TEXTURE_2D_ARRAY: 1956 case TGSI_TEXTURE_SHADOW2D_ARRAY: 1957 case TGSI_TEXTURE_2D_ARRAY_MSAA: 1958 return PIPE_TEXTURE_2D_ARRAY; 1959 case TGSI_TEXTURE_CUBE_ARRAY: 1960 case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 1961 return PIPE_TEXTURE_CUBE_ARRAY; 1962 default: 1963 assert(0); 1964 return PIPE_BUFFER; 1965 } 1966 } 1967 1968 1969 static enum lp_sampler_lod_property 1970 lp_build_lod_property( 1971 struct lp_build_tgsi_context *bld_base, 1972 const struct tgsi_full_instruction *inst, 1973 unsigned src_op) 1974 { 1975 const struct tgsi_full_src_register *reg = &inst->Src[src_op]; 1976 enum lp_sampler_lod_property lod_property; 1977 1978 /* 1979 * Not much we can do here. We could try catching inputs declared 1980 * with constant interpolation but not sure it's worth it - since for 1981 * TEX opcodes as well as FETCH/LD the lod comes from same reg as 1982 * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just 1983 * like the constant/immediate recognition below. 1984 * What seems to be of more value would be to recognize temps holding 1985 * broadcasted scalars but no way we can do it. 1986 * Tried asking llvm but without any success (using LLVMIsConstant 1987 * even though this isn't exactly what we'd need), even as simple as 1988 * IMM[0] UINT32 (0,-1,0,0) 1989 * MOV TEMP[0] IMM[0].yyyy 1990 * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0] 1991 * doesn't work. 1992 * This means there's ZERO chance this will ever catch a scalar lod 1993 * with traditional tex opcodes as well as texel fetches, since the lod 1994 * comes from the same reg as coords (except some test shaders using 1995 * constant coords maybe). 1996 * There's at least hope for sample opcodes as well as size queries. 1997 */ 1998 if (reg->Register.File == TGSI_FILE_CONSTANT || 1999 reg->Register.File == TGSI_FILE_IMMEDIATE) { 2000 lod_property = LP_SAMPLER_LOD_SCALAR; 2001 } 2002 else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) { 2003 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) { 2004 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2005 } 2006 else { 2007 lod_property = LP_SAMPLER_LOD_PER_QUAD; 2008 } 2009 } 2010 else { 2011 /* never use scalar (per-quad) lod the results are just too wrong. */ 2012 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2013 } 2014 return lod_property; 2015 } 2016 2017 2018 /** 2019 * High-level instruction translators. 2020 */ 2021 2022 static void 2023 emit_tex( struct lp_build_tgsi_soa_context *bld, 2024 const struct tgsi_full_instruction *inst, 2025 enum lp_build_tex_modifier modifier, 2026 LLVMValueRef *texel, 2027 unsigned sampler_reg, 2028 enum lp_sampler_op_type sampler_op) 2029 { 2030 unsigned unit = inst->Src[sampler_reg].Register.Index; 2031 LLVMValueRef oow = NULL; 2032 LLVMValueRef lod = NULL; 2033 LLVMValueRef coords[5]; 2034 LLVMValueRef offsets[3] = { NULL }; 2035 struct lp_derivatives derivs; 2036 struct lp_sampler_params params; 2037 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR; 2038 unsigned num_derivs, num_offsets, i; 2039 unsigned shadow_coord = 0; 2040 unsigned layer_coord = 0; 2041 unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT; 2042 2043 memset(¶ms, 0, sizeof(params)); 2044 2045 if (!bld->sampler) { 2046 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 2047 for (i = 0; i < 4; i++) { 2048 texel[i] = bld->bld_base.base.undef; 2049 } 2050 return; 2051 } 2052 2053 switch (inst->Texture.Texture) { 2054 case TGSI_TEXTURE_1D_ARRAY: 2055 layer_coord = 1; 2056 /* fallthrough */ 2057 case TGSI_TEXTURE_1D: 2058 num_offsets = 1; 2059 num_derivs = 1; 2060 break; 2061 case TGSI_TEXTURE_2D_ARRAY: 2062 layer_coord = 2; 2063 /* fallthrough */ 2064 case TGSI_TEXTURE_2D: 2065 case TGSI_TEXTURE_RECT: 2066 num_offsets = 2; 2067 num_derivs = 2; 2068 break; 2069 case TGSI_TEXTURE_SHADOW1D_ARRAY: 2070 layer_coord = 1; 2071 /* fallthrough */ 2072 case TGSI_TEXTURE_SHADOW1D: 2073 shadow_coord = 2; 2074 num_offsets = 1; 2075 num_derivs = 1; 2076 break; 2077 case TGSI_TEXTURE_SHADOW2D_ARRAY: 2078 layer_coord = 2; 2079 shadow_coord = 3; 2080 num_offsets = 2; 2081 num_derivs = 2; 2082 break; 2083 case TGSI_TEXTURE_SHADOW2D: 2084 case TGSI_TEXTURE_SHADOWRECT: 2085 shadow_coord = 2; 2086 num_offsets = 2; 2087 num_derivs = 2; 2088 break; 2089 case TGSI_TEXTURE_CUBE: 2090 num_offsets = 2; 2091 num_derivs = 3; 2092 break; 2093 case TGSI_TEXTURE_3D: 2094 num_offsets = 3; 2095 num_derivs = 3; 2096 break; 2097 case TGSI_TEXTURE_SHADOWCUBE: 2098 shadow_coord = 3; 2099 num_offsets = 2; 2100 num_derivs = 3; 2101 break; 2102 case TGSI_TEXTURE_CUBE_ARRAY: 2103 num_offsets = 2; 2104 num_derivs = 3; 2105 layer_coord = 3; 2106 break; 2107 case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 2108 num_offsets = 2; 2109 num_derivs = 3; 2110 layer_coord = 3; 2111 shadow_coord = 4; /* shadow coord special different reg */ 2112 break; 2113 case TGSI_TEXTURE_2D_MSAA: 2114 case TGSI_TEXTURE_2D_ARRAY_MSAA: 2115 default: 2116 assert(0); 2117 return; 2118 } 2119 2120 /* Note lod and especially projected are illegal in a LOT of cases */ 2121 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS || 2122 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 2123 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 2124 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) { 2125 /* note that shadow cube array with bias/explicit lod does not exist */ 2126 lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0); 2127 } 2128 else { 2129 lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3); 2130 } 2131 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { 2132 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT; 2133 } 2134 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 2135 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT; 2136 } 2137 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0); 2138 } 2139 2140 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) { 2141 oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3); 2142 oow = lp_build_rcp(&bld->bld_base.base, oow); 2143 } 2144 2145 for (i = 0; i < num_derivs; i++) { 2146 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i); 2147 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) 2148 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow); 2149 } 2150 for (i = num_derivs; i < 5; i++) { 2151 coords[i] = bld->bld_base.base.undef; 2152 } 2153 2154 /* Layer coord always goes into 3rd slot, except for cube map arrays */ 2155 if (layer_coord) { 2156 if (layer_coord == 3) { 2157 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); 2158 } 2159 else { 2160 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); 2161 } 2162 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) 2163 coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow); 2164 } 2165 /* Shadow coord occupies always 5th slot. */ 2166 if (shadow_coord) { 2167 sample_key |= LP_SAMPLER_SHADOW; 2168 if (shadow_coord == 4) { 2169 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0); 2170 } 2171 else { 2172 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord); 2173 } 2174 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) 2175 coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow); 2176 } 2177 2178 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 2179 unsigned dim; 2180 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT; 2181 for (dim = 0; dim < num_derivs; ++dim) { 2182 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim); 2183 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim); 2184 } 2185 params.derivs = &derivs; 2186 /* 2187 * could also check all src regs if constant but I doubt such 2188 * cases exist in practice. 2189 */ 2190 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) { 2191 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) { 2192 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2193 } 2194 else { 2195 lod_property = LP_SAMPLER_LOD_PER_QUAD; 2196 } 2197 } 2198 else { 2199 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2200 } 2201 } 2202 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT; 2203 2204 /* we don't handle the 4 offset version of tg4 */ 2205 if (inst->Texture.NumOffsets == 1) { 2206 unsigned dim; 2207 sample_key |= LP_SAMPLER_OFFSETS; 2208 for (dim = 0; dim < num_offsets; dim++) { 2209 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim); 2210 } 2211 } 2212 2213 params.type = bld->bld_base.base.type; 2214 params.sample_key = sample_key; 2215 params.texture_index = unit; 2216 params.sampler_index = unit; 2217 params.context_ptr = bld->context_ptr; 2218 params.thread_data_ptr = bld->thread_data_ptr; 2219 params.coords = coords; 2220 params.offsets = offsets; 2221 params.lod = lod; 2222 params.texel = texel; 2223 2224 bld->sampler->emit_tex_sample(bld->sampler, 2225 bld->bld_base.base.gallivm, 2226 ¶ms); 2227 } 2228 2229 static void 2230 emit_sample(struct lp_build_tgsi_soa_context *bld, 2231 const struct tgsi_full_instruction *inst, 2232 enum lp_build_tex_modifier modifier, 2233 boolean compare, 2234 enum lp_sampler_op_type sample_type, 2235 LLVMValueRef *texel) 2236 { 2237 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 2238 unsigned texture_unit, sampler_unit; 2239 LLVMValueRef lod = NULL; 2240 LLVMValueRef coords[5]; 2241 LLVMValueRef offsets[3] = { NULL }; 2242 struct lp_derivatives derivs; 2243 struct lp_sampler_params params; 2244 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR; 2245 2246 unsigned num_offsets, num_derivs, i; 2247 unsigned layer_coord = 0; 2248 unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT; 2249 2250 memset(¶ms, 0, sizeof(params)); 2251 2252 if (!bld->sampler) { 2253 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 2254 for (i = 0; i < 4; i++) { 2255 texel[i] = bld->bld_base.base.undef; 2256 } 2257 return; 2258 } 2259 2260 /* 2261 * unlike old-style tex opcodes the texture/sampler indices 2262 * always come from src1 and src2 respectively. 2263 */ 2264 texture_unit = inst->Src[1].Register.Index; 2265 sampler_unit = inst->Src[2].Register.Index; 2266 2267 /* 2268 * Note inst->Texture.Texture will contain the number of offsets, 2269 * however the target information is NOT there and comes from the 2270 * declared sampler views instead. 2271 */ 2272 switch (bld->sv[texture_unit].Resource) { 2273 case TGSI_TEXTURE_1D: 2274 num_offsets = 1; 2275 num_derivs = 1; 2276 break; 2277 case TGSI_TEXTURE_1D_ARRAY: 2278 layer_coord = 1; 2279 num_offsets = 1; 2280 num_derivs = 1; 2281 break; 2282 case TGSI_TEXTURE_2D: 2283 case TGSI_TEXTURE_RECT: 2284 num_offsets = 2; 2285 num_derivs = 2; 2286 break; 2287 case TGSI_TEXTURE_2D_ARRAY: 2288 layer_coord = 2; 2289 num_offsets = 2; 2290 num_derivs = 2; 2291 break; 2292 case TGSI_TEXTURE_CUBE: 2293 num_offsets = 2; 2294 num_derivs = 3; 2295 break; 2296 case TGSI_TEXTURE_3D: 2297 num_offsets = 3; 2298 num_derivs = 3; 2299 break; 2300 case TGSI_TEXTURE_CUBE_ARRAY: 2301 layer_coord = 3; 2302 num_offsets = 2; 2303 num_derivs = 3; 2304 break; 2305 default: 2306 assert(0); 2307 return; 2308 } 2309 2310 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS || 2311 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 2312 lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0); 2313 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { 2314 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT; 2315 } 2316 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 2317 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT; 2318 } 2319 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0); 2320 } 2321 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) { 2322 /* XXX might be better to explicitly pass the level zero information */ 2323 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT; 2324 lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F); 2325 } 2326 2327 for (i = 0; i < num_derivs; i++) { 2328 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i); 2329 } 2330 for (i = num_derivs; i < 5; i++) { 2331 coords[i] = bld->bld_base.base.undef; 2332 } 2333 2334 /* Layer coord always goes into 3rd slot, except for cube map arrays */ 2335 if (layer_coord) { 2336 if (layer_coord == 3) 2337 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); 2338 else 2339 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); 2340 } 2341 /* Shadow coord occupies always 5th slot. */ 2342 if (compare) { 2343 sample_key |= LP_SAMPLER_SHADOW; 2344 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0); 2345 } 2346 2347 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 2348 unsigned dim; 2349 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT; 2350 for (dim = 0; dim < num_derivs; ++dim) { 2351 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim); 2352 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim); 2353 } 2354 params.derivs = &derivs; 2355 /* 2356 * could also check all src regs if constant but I doubt such 2357 * cases exist in practice. 2358 */ 2359 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) { 2360 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) { 2361 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2362 } 2363 else { 2364 lod_property = LP_SAMPLER_LOD_PER_QUAD; 2365 } 2366 } 2367 else { 2368 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2369 } 2370 } 2371 2372 /* some advanced gather instructions (txgo) would require 4 offsets */ 2373 if (inst->Texture.NumOffsets == 1) { 2374 unsigned dim; 2375 sample_key |= LP_SAMPLER_OFFSETS; 2376 for (dim = 0; dim < num_offsets; dim++) { 2377 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim); 2378 } 2379 } 2380 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT; 2381 2382 params.type = bld->bld_base.base.type; 2383 params.sample_key = sample_key; 2384 params.texture_index = texture_unit; 2385 params.sampler_index = sampler_unit; 2386 params.context_ptr = bld->context_ptr; 2387 params.thread_data_ptr = bld->thread_data_ptr; 2388 params.coords = coords; 2389 params.offsets = offsets; 2390 params.lod = lod; 2391 params.texel = texel; 2392 2393 bld->sampler->emit_tex_sample(bld->sampler, 2394 bld->bld_base.base.gallivm, 2395 ¶ms); 2396 2397 if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X || 2398 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y || 2399 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z || 2400 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) { 2401 unsigned char swizzles[4]; 2402 swizzles[0] = inst->Src[1].Register.SwizzleX; 2403 swizzles[1] = inst->Src[1].Register.SwizzleY; 2404 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2405 swizzles[3] = inst->Src[1].Register.SwizzleW; 2406 2407 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles); 2408 } 2409 } 2410 2411 static void 2412 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld, 2413 const struct tgsi_full_instruction *inst, 2414 LLVMValueRef *texel, 2415 boolean is_samplei) 2416 { 2417 unsigned unit, target; 2418 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type); 2419 LLVMValueRef explicit_lod = NULL; 2420 LLVMValueRef coords[5]; 2421 LLVMValueRef offsets[3] = { NULL }; 2422 struct lp_sampler_params params; 2423 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR; 2424 unsigned dims, i; 2425 unsigned layer_coord = 0; 2426 unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT; 2427 2428 memset(¶ms, 0, sizeof(params)); 2429 2430 if (!bld->sampler) { 2431 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 2432 for (i = 0; i < 4; i++) { 2433 texel[i] = coord_undef; 2434 } 2435 return; 2436 } 2437 2438 unit = inst->Src[1].Register.Index; 2439 2440 if (is_samplei) { 2441 target = bld->sv[unit].Resource; 2442 } 2443 else { 2444 target = inst->Texture.Texture; 2445 } 2446 2447 switch (target) { 2448 case TGSI_TEXTURE_1D: 2449 case TGSI_TEXTURE_BUFFER: 2450 dims = 1; 2451 break; 2452 case TGSI_TEXTURE_1D_ARRAY: 2453 layer_coord = 1; 2454 dims = 1; 2455 break; 2456 case TGSI_TEXTURE_2D: 2457 case TGSI_TEXTURE_RECT: 2458 case TGSI_TEXTURE_2D_MSAA: 2459 dims = 2; 2460 break; 2461 case TGSI_TEXTURE_2D_ARRAY: 2462 case TGSI_TEXTURE_2D_ARRAY_MSAA: 2463 layer_coord = 2; 2464 dims = 2; 2465 break; 2466 case TGSI_TEXTURE_3D: 2467 dims = 3; 2468 break; 2469 default: 2470 assert(0); 2471 return; 2472 } 2473 2474 /* always have lod except for buffers and msaa targets ? */ 2475 if (target != TGSI_TEXTURE_BUFFER && 2476 target != TGSI_TEXTURE_2D_MSAA && 2477 target != TGSI_TEXTURE_2D_ARRAY_MSAA) { 2478 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT; 2479 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3); 2480 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0); 2481 } 2482 /* 2483 * XXX: for real msaa support, the w component (or src2.x for sample_i_ms) 2484 * would be the sample index. 2485 */ 2486 2487 for (i = 0; i < dims; i++) { 2488 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i); 2489 } 2490 /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */ 2491 for (i = dims; i < 5; i++) { 2492 coords[i] = coord_undef; 2493 } 2494 if (layer_coord) 2495 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); 2496 2497 if (inst->Texture.NumOffsets == 1) { 2498 unsigned dim; 2499 sample_key |= LP_SAMPLER_OFFSETS; 2500 for (dim = 0; dim < dims; dim++) { 2501 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim); 2502 } 2503 } 2504 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT; 2505 2506 params.type = bld->bld_base.base.type; 2507 params.sample_key = sample_key; 2508 params.texture_index = unit; 2509 /* 2510 * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS 2511 * and trigger some assertions with d3d10 where the sampler view number 2512 * can exceed this. 2513 */ 2514 params.sampler_index = 0; 2515 params.context_ptr = bld->context_ptr; 2516 params.thread_data_ptr = bld->thread_data_ptr; 2517 params.coords = coords; 2518 params.offsets = offsets; 2519 params.derivs = NULL; 2520 params.lod = explicit_lod; 2521 params.texel = texel; 2522 2523 bld->sampler->emit_tex_sample(bld->sampler, 2524 bld->bld_base.base.gallivm, 2525 ¶ms); 2526 2527 if (is_samplei && 2528 (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X || 2529 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y || 2530 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z || 2531 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) { 2532 unsigned char swizzles[4]; 2533 swizzles[0] = inst->Src[1].Register.SwizzleX; 2534 swizzles[1] = inst->Src[1].Register.SwizzleY; 2535 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2536 swizzles[3] = inst->Src[1].Register.SwizzleW; 2537 2538 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles); 2539 } 2540 } 2541 2542 static void 2543 emit_size_query( struct lp_build_tgsi_soa_context *bld, 2544 const struct tgsi_full_instruction *inst, 2545 LLVMValueRef *sizes_out, 2546 boolean is_sviewinfo) 2547 { 2548 LLVMValueRef explicit_lod; 2549 enum lp_sampler_lod_property lod_property; 2550 unsigned has_lod; 2551 unsigned i; 2552 unsigned unit = inst->Src[1].Register.Index; 2553 unsigned target, pipe_target; 2554 struct lp_sampler_size_query_params params; 2555 2556 if (is_sviewinfo) { 2557 target = bld->sv[unit].Resource; 2558 } 2559 else { 2560 target = inst->Texture.Texture; 2561 } 2562 switch (target) { 2563 case TGSI_TEXTURE_BUFFER: 2564 case TGSI_TEXTURE_RECT: 2565 case TGSI_TEXTURE_SHADOWRECT: 2566 has_lod = 0; 2567 break; 2568 default: 2569 has_lod = 1; 2570 break; 2571 } 2572 2573 if (!bld->sampler) { 2574 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n"); 2575 for (i = 0; i < 4; i++) 2576 sizes_out[i] = bld->bld_base.int_bld.undef; 2577 return; 2578 } 2579 2580 if (has_lod) { 2581 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0); 2582 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0); 2583 } 2584 else { 2585 explicit_lod = NULL; 2586 lod_property = LP_SAMPLER_LOD_SCALAR; 2587 } 2588 2589 2590 pipe_target = tgsi_to_pipe_tex_target(target); 2591 2592 params.int_type = bld->bld_base.int_bld.type; 2593 params.texture_unit = unit; 2594 params.target = pipe_target; 2595 params.context_ptr = bld->context_ptr; 2596 params.is_sviewinfo = TRUE; 2597 params.lod_property = lod_property; 2598 params.explicit_lod = explicit_lod; 2599 params.sizes_out = sizes_out; 2600 2601 bld->sampler->emit_size_query(bld->sampler, 2602 bld->bld_base.base.gallivm, 2603 ¶ms); 2604 } 2605 2606 static boolean 2607 near_end_of_shader(struct lp_build_tgsi_soa_context *bld, 2608 int pc) 2609 { 2610 unsigned i; 2611 2612 for (i = 0; i < 5; i++) { 2613 unsigned opcode; 2614 2615 if (pc + i >= bld->bld_base.info->num_instructions) 2616 return TRUE; 2617 2618 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode; 2619 2620 if (opcode == TGSI_OPCODE_END) 2621 return TRUE; 2622 2623 if (opcode == TGSI_OPCODE_TEX || 2624 opcode == TGSI_OPCODE_TXP || 2625 opcode == TGSI_OPCODE_TXD || 2626 opcode == TGSI_OPCODE_TXB || 2627 opcode == TGSI_OPCODE_TXL || 2628 opcode == TGSI_OPCODE_TXF || 2629 opcode == TGSI_OPCODE_TXQ || 2630 opcode == TGSI_OPCODE_TEX2 || 2631 opcode == TGSI_OPCODE_TXB2 || 2632 opcode == TGSI_OPCODE_TXL2 || 2633 opcode == TGSI_OPCODE_SAMPLE || 2634 opcode == TGSI_OPCODE_SAMPLE_B || 2635 opcode == TGSI_OPCODE_SAMPLE_C || 2636 opcode == TGSI_OPCODE_SAMPLE_C_LZ || 2637 opcode == TGSI_OPCODE_SAMPLE_D || 2638 opcode == TGSI_OPCODE_SAMPLE_I || 2639 opcode == TGSI_OPCODE_SAMPLE_I_MS || 2640 opcode == TGSI_OPCODE_SAMPLE_L || 2641 opcode == TGSI_OPCODE_SVIEWINFO || 2642 opcode == TGSI_OPCODE_CAL || 2643 opcode == TGSI_OPCODE_IF || 2644 opcode == TGSI_OPCODE_UIF || 2645 opcode == TGSI_OPCODE_BGNLOOP || 2646 opcode == TGSI_OPCODE_SWITCH) 2647 return FALSE; 2648 } 2649 2650 return TRUE; 2651 } 2652 2653 2654 2655 /** 2656 * Kill fragment if any of the src register values are negative. 2657 */ 2658 static void 2659 emit_kill_if( 2660 struct lp_build_tgsi_soa_context *bld, 2661 const struct tgsi_full_instruction *inst, 2662 int pc) 2663 { 2664 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 2665 const struct tgsi_full_src_register *reg = &inst->Src[0]; 2666 LLVMValueRef terms[TGSI_NUM_CHANNELS]; 2667 LLVMValueRef mask; 2668 unsigned chan_index; 2669 2670 memset(&terms, 0, sizeof terms); 2671 2672 TGSI_FOR_EACH_CHANNEL( chan_index ) { 2673 unsigned swizzle; 2674 2675 /* Unswizzle channel */ 2676 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 2677 2678 /* Check if the component has not been already tested. */ 2679 assert(swizzle < TGSI_NUM_CHANNELS); 2680 if( !terms[swizzle] ) 2681 /* TODO: change the comparison operator instead of setting the sign */ 2682 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index ); 2683 } 2684 2685 mask = NULL; 2686 TGSI_FOR_EACH_CHANNEL( chan_index ) { 2687 if(terms[chan_index]) { 2688 LLVMValueRef chan_mask; 2689 2690 /* 2691 * If term < 0 then mask = 0 else mask = ~0. 2692 */ 2693 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero); 2694 2695 if(mask) 2696 mask = LLVMBuildAnd(builder, mask, chan_mask, ""); 2697 else 2698 mask = chan_mask; 2699 } 2700 } 2701 2702 if (bld->exec_mask.has_mask) { 2703 LLVMValueRef invmask; 2704 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp"); 2705 mask = LLVMBuildOr(builder, mask, invmask, ""); 2706 } 2707 2708 lp_build_mask_update(bld->mask, mask); 2709 if (!near_end_of_shader(bld, pc)) 2710 lp_build_mask_check(bld->mask); 2711 } 2712 2713 2714 /** 2715 * Unconditional fragment kill. 2716 * The only predication is the execution mask which will apply if 2717 * we're inside a loop or conditional. 2718 */ 2719 static void 2720 emit_kill(struct lp_build_tgsi_soa_context *bld, 2721 int pc) 2722 { 2723 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 2724 LLVMValueRef mask; 2725 2726 /* For those channels which are "alive", disable fragment shader 2727 * execution. 2728 */ 2729 if (bld->exec_mask.has_mask) { 2730 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp"); 2731 } 2732 else { 2733 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type); 2734 mask = zero; 2735 } 2736 2737 lp_build_mask_update(bld->mask, mask); 2738 2739 if (!near_end_of_shader(bld, pc)) 2740 lp_build_mask_check(bld->mask); 2741 } 2742 2743 2744 /** 2745 * Emit code which will dump the value of all the temporary registers 2746 * to stdout. 2747 */ 2748 static void 2749 emit_dump_file(struct lp_build_tgsi_soa_context *bld, 2750 unsigned file) 2751 { 2752 const struct tgsi_shader_info *info = bld->bld_base.info; 2753 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 2754 LLVMBuilderRef builder = gallivm->builder; 2755 LLVMValueRef reg_ptr; 2756 int index; 2757 int max_index = info->file_max[file]; 2758 2759 /* 2760 * Some register files, particularly constants, can be very large, 2761 * and dumping everything could make this unusably slow. 2762 */ 2763 max_index = MIN2(max_index, 32); 2764 2765 for (index = 0; index <= max_index; index++) { 2766 LLVMValueRef res; 2767 unsigned mask; 2768 int chan; 2769 2770 if (index < 8 * sizeof(unsigned) && 2771 (info->file_mask[file] & (1u << index)) == 0) { 2772 /* This was not declared.*/ 2773 continue; 2774 } 2775 2776 if (file == TGSI_FILE_INPUT) { 2777 mask = info->input_usage_mask[index]; 2778 } else { 2779 mask = TGSI_WRITEMASK_XYZW; 2780 } 2781 2782 for (chan = 0; chan < 4; chan++) { 2783 if ((mask & (1 << chan)) == 0) { 2784 /* This channel is not used.*/ 2785 continue; 2786 } 2787 2788 if (file == TGSI_FILE_CONSTANT) { 2789 struct tgsi_full_src_register reg; 2790 memset(®, 0, sizeof reg); 2791 reg.Register.File = file; 2792 reg.Register.Index = index; 2793 reg.Register.SwizzleX = 0; 2794 reg.Register.SwizzleY = 1; 2795 reg.Register.SwizzleZ = 2; 2796 reg.Register.SwizzleW = 3; 2797 2798 res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, ®, TGSI_TYPE_FLOAT, chan); 2799 if (!res) { 2800 continue; 2801 } 2802 } else if (file == TGSI_FILE_INPUT) { 2803 res = bld->inputs[index][chan]; 2804 if (!res) { 2805 continue; 2806 } 2807 } else if (file == TGSI_FILE_TEMPORARY) { 2808 reg_ptr = lp_get_temp_ptr_soa(bld, index, chan); 2809 assert(reg_ptr); 2810 res = LLVMBuildLoad(builder, reg_ptr, ""); 2811 } else if (file == TGSI_FILE_OUTPUT) { 2812 reg_ptr = lp_get_output_ptr(bld, index, chan); 2813 assert(reg_ptr); 2814 res = LLVMBuildLoad(builder, reg_ptr, ""); 2815 } else { 2816 assert(0); 2817 continue; 2818 } 2819 2820 emit_dump_reg(gallivm, file, index, chan, res); 2821 } 2822 } 2823 } 2824 2825 2826 2827 void 2828 lp_emit_declaration_soa( 2829 struct lp_build_tgsi_context *bld_base, 2830 const struct tgsi_full_declaration *decl) 2831 { 2832 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 2833 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 2834 LLVMTypeRef vec_type = bld->bld_base.base.vec_type; 2835 const unsigned first = decl->Range.First; 2836 const unsigned last = decl->Range.Last; 2837 unsigned idx, i; 2838 2839 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]); 2840 2841 switch (decl->Declaration.File) { 2842 case TGSI_FILE_TEMPORARY: 2843 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) { 2844 assert(last < LP_MAX_INLINED_TEMPS); 2845 for (idx = first; idx <= last; ++idx) { 2846 for (i = 0; i < TGSI_NUM_CHANNELS; i++) 2847 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp"); 2848 } 2849 } 2850 break; 2851 2852 case TGSI_FILE_OUTPUT: 2853 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { 2854 for (idx = first; idx <= last; ++idx) { 2855 for (i = 0; i < TGSI_NUM_CHANNELS; i++) 2856 bld->outputs[idx][i] = lp_build_alloca(gallivm, 2857 vec_type, "output"); 2858 } 2859 } 2860 break; 2861 2862 case TGSI_FILE_ADDRESS: 2863 /* ADDR registers are only allocated with an integer LLVM IR type, 2864 * as they are guaranteed to always have integers. 2865 * XXX: Not sure if this exception is worthwhile (or the whole idea of 2866 * an ADDR register for that matter). 2867 */ 2868 assert(last < LP_MAX_TGSI_ADDRS); 2869 for (idx = first; idx <= last; ++idx) { 2870 assert(idx < LP_MAX_TGSI_ADDRS); 2871 for (i = 0; i < TGSI_NUM_CHANNELS; i++) 2872 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr"); 2873 } 2874 break; 2875 2876 case TGSI_FILE_SAMPLER_VIEW: 2877 /* 2878 * The target stored here MUST match whatever there actually 2879 * is in the set sampler views (what about return type?). 2880 */ 2881 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS); 2882 for (idx = first; idx <= last; ++idx) { 2883 bld->sv[idx] = decl->SamplerView; 2884 } 2885 break; 2886 2887 case TGSI_FILE_CONSTANT: 2888 { 2889 /* 2890 * We could trivially fetch the per-buffer pointer when fetching the 2891 * constant, relying on llvm to figure out it's always the same pointer 2892 * anyway. However, doing so results in a huge (more than factor of 10) 2893 * slowdown in llvm compilation times for some (but not all) shaders 2894 * (more specifically, the IR optimization spends way more time in 2895 * DominatorTree::dominates). At least with llvm versions 3.1, 3.3. 2896 */ 2897 unsigned idx2D = decl->Dim.Index2D; 2898 LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D); 2899 assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS); 2900 bld->consts[idx2D] = 2901 lp_build_array_get(gallivm, bld->consts_ptr, index2D); 2902 bld->consts_sizes[idx2D] = 2903 lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D); 2904 } 2905 break; 2906 2907 default: 2908 /* don't need to declare other vars */ 2909 break; 2910 } 2911 } 2912 2913 2914 void lp_emit_immediate_soa( 2915 struct lp_build_tgsi_context *bld_base, 2916 const struct tgsi_full_immediate *imm) 2917 { 2918 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 2919 struct gallivm_state * gallivm = bld_base->base.gallivm; 2920 LLVMValueRef imms[4]; 2921 unsigned i; 2922 const uint size = imm->Immediate.NrTokens - 1; 2923 assert(size <= 4); 2924 switch (imm->Immediate.DataType) { 2925 case TGSI_IMM_FLOAT32: 2926 for( i = 0; i < size; ++i ) 2927 imms[i] = 2928 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float); 2929 2930 break; 2931 case TGSI_IMM_FLOAT64: 2932 case TGSI_IMM_UINT64: 2933 case TGSI_IMM_INT64: 2934 case TGSI_IMM_UINT32: 2935 for( i = 0; i < size; ++i ) { 2936 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint); 2937 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type); 2938 } 2939 2940 break; 2941 case TGSI_IMM_INT32: 2942 for( i = 0; i < size; ++i ) { 2943 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int); 2944 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type); 2945 } 2946 2947 break; 2948 } 2949 for( i = size; i < 4; ++i ) 2950 imms[i] = bld_base->base.undef; 2951 2952 if (bld->use_immediates_array) { 2953 unsigned index = bld->num_immediates; 2954 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 2955 LLVMBuilderRef builder = gallivm->builder; 2956 2957 assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)); 2958 for (i = 0; i < 4; ++i ) { 2959 LLVMValueRef lindex = lp_build_const_int32( 2960 bld->bld_base.base.gallivm, index * 4 + i); 2961 LLVMValueRef imm_ptr = LLVMBuildGEP(builder, 2962 bld->imms_array, &lindex, 1, ""); 2963 LLVMBuildStore(builder, imms[i], imm_ptr); 2964 } 2965 } else { 2966 /* simply copy the immediate values into the next immediates[] slot */ 2967 unsigned i; 2968 assert(imm->Immediate.NrTokens - 1 <= 4); 2969 assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES); 2970 2971 for(i = 0; i < 4; ++i ) 2972 bld->immediates[bld->num_immediates][i] = imms[i]; 2973 2974 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) { 2975 unsigned index = bld->num_immediates; 2976 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 2977 LLVMBuilderRef builder = gallivm->builder; 2978 for (i = 0; i < 4; ++i ) { 2979 LLVMValueRef lindex = lp_build_const_int32( 2980 bld->bld_base.base.gallivm, index * 4 + i); 2981 LLVMValueRef imm_ptr = LLVMBuildGEP(builder, 2982 bld->imms_array, &lindex, 1, ""); 2983 LLVMBuildStore(builder, 2984 bld->immediates[index][i], 2985 imm_ptr); 2986 } 2987 } 2988 } 2989 2990 bld->num_immediates++; 2991 } 2992 2993 static void 2994 ddx_emit( 2995 const struct lp_build_tgsi_action * action, 2996 struct lp_build_tgsi_context * bld_base, 2997 struct lp_build_emit_data * emit_data) 2998 { 2999 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3000 3001 emit_fetch_deriv(bld, emit_data->args[0], NULL, 3002 &emit_data->output[emit_data->chan], NULL); 3003 } 3004 3005 static void 3006 ddy_emit( 3007 const struct lp_build_tgsi_action * action, 3008 struct lp_build_tgsi_context * bld_base, 3009 struct lp_build_emit_data * emit_data) 3010 { 3011 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3012 3013 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL, 3014 &emit_data->output[emit_data->chan]); 3015 } 3016 3017 static void 3018 kill_emit( 3019 const struct lp_build_tgsi_action * action, 3020 struct lp_build_tgsi_context * bld_base, 3021 struct lp_build_emit_data * emit_data) 3022 { 3023 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3024 3025 emit_kill(bld, bld_base->pc - 1); 3026 } 3027 3028 static void 3029 kill_if_emit( 3030 const struct lp_build_tgsi_action * action, 3031 struct lp_build_tgsi_context * bld_base, 3032 struct lp_build_emit_data * emit_data) 3033 { 3034 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3035 3036 emit_kill_if(bld, emit_data->inst, bld_base->pc - 1); 3037 } 3038 3039 static void 3040 tex_emit( 3041 const struct lp_build_tgsi_action * action, 3042 struct lp_build_tgsi_context * bld_base, 3043 struct lp_build_emit_data * emit_data) 3044 { 3045 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3046 3047 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3048 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE); 3049 } 3050 3051 static void 3052 tex2_emit( 3053 const struct lp_build_tgsi_action * action, 3054 struct lp_build_tgsi_context * bld_base, 3055 struct lp_build_emit_data * emit_data) 3056 { 3057 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3058 3059 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3060 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE); 3061 } 3062 3063 static void 3064 txb_emit( 3065 const struct lp_build_tgsi_action * action, 3066 struct lp_build_tgsi_context * bld_base, 3067 struct lp_build_emit_data * emit_data) 3068 { 3069 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3070 3071 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, 3072 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE); 3073 } 3074 3075 static void 3076 txb2_emit( 3077 const struct lp_build_tgsi_action * action, 3078 struct lp_build_tgsi_context * bld_base, 3079 struct lp_build_emit_data * emit_data) 3080 { 3081 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3082 3083 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, 3084 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE); 3085 } 3086 3087 static void 3088 txd_emit( 3089 const struct lp_build_tgsi_action * action, 3090 struct lp_build_tgsi_context * bld_base, 3091 struct lp_build_emit_data * emit_data) 3092 { 3093 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3094 3095 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, 3096 emit_data->output, 3, LP_SAMPLER_OP_TEXTURE); 3097 } 3098 3099 static void 3100 txl_emit( 3101 const struct lp_build_tgsi_action * action, 3102 struct lp_build_tgsi_context * bld_base, 3103 struct lp_build_emit_data * emit_data) 3104 { 3105 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3106 3107 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, 3108 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE); 3109 } 3110 3111 static void 3112 txl2_emit( 3113 const struct lp_build_tgsi_action * action, 3114 struct lp_build_tgsi_context * bld_base, 3115 struct lp_build_emit_data * emit_data) 3116 { 3117 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3118 3119 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, 3120 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE); 3121 } 3122 3123 static void 3124 txp_emit( 3125 const struct lp_build_tgsi_action * action, 3126 struct lp_build_tgsi_context * bld_base, 3127 struct lp_build_emit_data * emit_data) 3128 { 3129 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3130 3131 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED, 3132 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE); 3133 } 3134 3135 static void 3136 tg4_emit( 3137 const struct lp_build_tgsi_action * action, 3138 struct lp_build_tgsi_context * bld_base, 3139 struct lp_build_emit_data * emit_data) 3140 { 3141 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3142 3143 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3144 emit_data->output, 2, LP_SAMPLER_OP_GATHER); 3145 } 3146 3147 static void 3148 lodq_emit( 3149 const struct lp_build_tgsi_action * action, 3150 struct lp_build_tgsi_context * bld_base, 3151 struct lp_build_emit_data * emit_data) 3152 { 3153 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3154 3155 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3156 emit_data->output, 1, LP_SAMPLER_OP_LODQ); 3157 } 3158 3159 static void 3160 txq_emit( 3161 const struct lp_build_tgsi_action * action, 3162 struct lp_build_tgsi_context * bld_base, 3163 struct lp_build_emit_data * emit_data) 3164 { 3165 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3166 3167 emit_size_query(bld, emit_data->inst, emit_data->output, FALSE); 3168 } 3169 3170 static void 3171 txf_emit( 3172 const struct lp_build_tgsi_action * action, 3173 struct lp_build_tgsi_context * bld_base, 3174 struct lp_build_emit_data * emit_data) 3175 { 3176 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3177 3178 emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE); 3179 } 3180 3181 static void 3182 sample_i_emit( 3183 const struct lp_build_tgsi_action * action, 3184 struct lp_build_tgsi_context * bld_base, 3185 struct lp_build_emit_data * emit_data) 3186 { 3187 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3188 3189 emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE); 3190 } 3191 3192 static void 3193 sample_emit( 3194 const struct lp_build_tgsi_action * action, 3195 struct lp_build_tgsi_context * bld_base, 3196 struct lp_build_emit_data * emit_data) 3197 { 3198 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3199 3200 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3201 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output); 3202 } 3203 3204 static void 3205 sample_b_emit( 3206 const struct lp_build_tgsi_action * action, 3207 struct lp_build_tgsi_context * bld_base, 3208 struct lp_build_emit_data * emit_data) 3209 { 3210 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3211 3212 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, 3213 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output); 3214 } 3215 3216 static void 3217 sample_c_emit( 3218 const struct lp_build_tgsi_action * action, 3219 struct lp_build_tgsi_context * bld_base, 3220 struct lp_build_emit_data * emit_data) 3221 { 3222 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3223 3224 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3225 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output); 3226 } 3227 3228 static void 3229 sample_c_lz_emit( 3230 const struct lp_build_tgsi_action * action, 3231 struct lp_build_tgsi_context * bld_base, 3232 struct lp_build_emit_data * emit_data) 3233 { 3234 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3235 3236 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO, 3237 TRUE, LP_SAMPLER_OP_TEXTURE, emit_data->output); 3238 } 3239 3240 static void 3241 sample_d_emit( 3242 const struct lp_build_tgsi_action * action, 3243 struct lp_build_tgsi_context * bld_base, 3244 struct lp_build_emit_data * emit_data) 3245 { 3246 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3247 3248 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, 3249 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output); 3250 } 3251 3252 static void 3253 sample_l_emit( 3254 const struct lp_build_tgsi_action * action, 3255 struct lp_build_tgsi_context * bld_base, 3256 struct lp_build_emit_data * emit_data) 3257 { 3258 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3259 3260 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, 3261 FALSE, LP_SAMPLER_OP_TEXTURE, emit_data->output); 3262 } 3263 3264 static void 3265 gather4_emit( 3266 const struct lp_build_tgsi_action * action, 3267 struct lp_build_tgsi_context * bld_base, 3268 struct lp_build_emit_data * emit_data) 3269 { 3270 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3271 3272 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3273 FALSE, LP_SAMPLER_OP_GATHER, emit_data->output); 3274 } 3275 3276 static void 3277 sviewinfo_emit( 3278 const struct lp_build_tgsi_action * action, 3279 struct lp_build_tgsi_context * bld_base, 3280 struct lp_build_emit_data * emit_data) 3281 { 3282 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3283 3284 emit_size_query(bld, emit_data->inst, emit_data->output, TRUE); 3285 } 3286 3287 static void 3288 lod_emit( 3289 const struct lp_build_tgsi_action * action, 3290 struct lp_build_tgsi_context * bld_base, 3291 struct lp_build_emit_data * emit_data) 3292 { 3293 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3294 3295 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, 3296 FALSE, LP_SAMPLER_OP_LODQ, emit_data->output); 3297 } 3298 3299 static LLVMValueRef 3300 mask_vec(struct lp_build_tgsi_context *bld_base) 3301 { 3302 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3303 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 3304 struct lp_exec_mask *exec_mask = &bld->exec_mask; 3305 3306 if (!exec_mask->has_mask) { 3307 return lp_build_mask_value(bld->mask); 3308 } 3309 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask), 3310 exec_mask->exec_mask, ""); 3311 } 3312 3313 static void 3314 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base, 3315 LLVMValueRef ptr, 3316 LLVMValueRef mask) 3317 { 3318 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 3319 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, ""); 3320 3321 current_vec = LLVMBuildSub(builder, current_vec, mask, ""); 3322 3323 LLVMBuildStore(builder, current_vec, ptr); 3324 } 3325 3326 static void 3327 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base, 3328 LLVMValueRef ptr, 3329 LLVMValueRef mask) 3330 { 3331 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 3332 LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, ""); 3333 3334 current_vec = lp_build_select(&bld_base->uint_bld, 3335 mask, 3336 bld_base->uint_bld.zero, 3337 current_vec); 3338 3339 LLVMBuildStore(builder, current_vec, ptr); 3340 } 3341 3342 static LLVMValueRef 3343 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld, 3344 LLVMValueRef current_mask_vec, 3345 LLVMValueRef total_emitted_vertices_vec) 3346 { 3347 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 3348 struct lp_build_context *int_bld = &bld->bld_base.int_bld; 3349 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS, 3350 total_emitted_vertices_vec, 3351 bld->max_output_vertices_vec); 3352 3353 return LLVMBuildAnd(builder, current_mask_vec, max_mask, ""); 3354 } 3355 3356 static void 3357 emit_vertex( 3358 const struct lp_build_tgsi_action * action, 3359 struct lp_build_tgsi_context * bld_base, 3360 struct lp_build_emit_data * emit_data) 3361 { 3362 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3363 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 3364 3365 if (bld->gs_iface->emit_vertex) { 3366 LLVMValueRef mask = mask_vec(bld_base); 3367 LLVMValueRef total_emitted_vertices_vec = 3368 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, ""); 3369 mask = clamp_mask_to_max_output_vertices(bld, mask, 3370 total_emitted_vertices_vec); 3371 gather_outputs(bld); 3372 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base, 3373 bld->outputs, 3374 total_emitted_vertices_vec); 3375 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr, 3376 mask); 3377 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr, 3378 mask); 3379 #if DUMP_GS_EMITS 3380 lp_build_print_value(bld->bld_base.base.gallivm, 3381 " +++ emit vertex masked ones = ", 3382 mask); 3383 lp_build_print_value(bld->bld_base.base.gallivm, 3384 " +++ emit vertex emitted = ", 3385 total_emitted_vertices_vec); 3386 #endif 3387 } 3388 } 3389 3390 3391 static void 3392 end_primitive_masked(struct lp_build_tgsi_context * bld_base, 3393 LLVMValueRef mask) 3394 { 3395 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3396 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 3397 3398 if (bld->gs_iface->end_primitive) { 3399 struct lp_build_context *uint_bld = &bld_base->uint_bld; 3400 LLVMValueRef emitted_vertices_vec = 3401 LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, ""); 3402 LLVMValueRef emitted_prims_vec = 3403 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, ""); 3404 3405 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, 3406 emitted_vertices_vec, 3407 uint_bld->zero); 3408 /* We need to combine the current execution mask with the mask 3409 telling us which, if any, execution slots actually have 3410 unemitted primitives, this way we make sure that end_primitives 3411 executes only on the paths that have unflushed vertices */ 3412 mask = LLVMBuildAnd(builder, mask, emitted_mask, ""); 3413 3414 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base, 3415 emitted_vertices_vec, 3416 emitted_prims_vec); 3417 3418 #if DUMP_GS_EMITS 3419 lp_build_print_value(bld->bld_base.base.gallivm, 3420 " +++ end prim masked ones = ", 3421 mask); 3422 lp_build_print_value(bld->bld_base.base.gallivm, 3423 " +++ end prim emitted verts1 = ", 3424 emitted_vertices_vec); 3425 lp_build_print_value(bld->bld_base.base.gallivm, 3426 " +++ end prim emitted prims1 = ", 3427 LLVMBuildLoad(builder, 3428 bld->emitted_prims_vec_ptr, "")); 3429 #endif 3430 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr, 3431 mask); 3432 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr, 3433 mask); 3434 #if DUMP_GS_EMITS 3435 lp_build_print_value(bld->bld_base.base.gallivm, 3436 " +++ end prim emitted verts2 = ", 3437 LLVMBuildLoad(builder, 3438 bld->emitted_vertices_vec_ptr, "")); 3439 #endif 3440 } 3441 3442 } 3443 3444 static void 3445 end_primitive( 3446 const struct lp_build_tgsi_action * action, 3447 struct lp_build_tgsi_context * bld_base, 3448 struct lp_build_emit_data * emit_data) 3449 { 3450 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3451 3452 if (bld->gs_iface->end_primitive) { 3453 LLVMValueRef mask = mask_vec(bld_base); 3454 end_primitive_masked(bld_base, mask); 3455 } 3456 } 3457 3458 static void 3459 cal_emit( 3460 const struct lp_build_tgsi_action * action, 3461 struct lp_build_tgsi_context * bld_base, 3462 struct lp_build_emit_data * emit_data) 3463 { 3464 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3465 3466 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label, 3467 &bld_base->pc); 3468 } 3469 3470 static void 3471 ret_emit( 3472 const struct lp_build_tgsi_action * action, 3473 struct lp_build_tgsi_context * bld_base, 3474 struct lp_build_emit_data * emit_data) 3475 { 3476 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3477 3478 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc); 3479 } 3480 3481 static void 3482 brk_emit( 3483 const struct lp_build_tgsi_action * action, 3484 struct lp_build_tgsi_context * bld_base, 3485 struct lp_build_emit_data * emit_data) 3486 { 3487 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3488 3489 lp_exec_break(&bld->exec_mask, bld_base); 3490 } 3491 3492 static void 3493 if_emit( 3494 const struct lp_build_tgsi_action * action, 3495 struct lp_build_tgsi_context * bld_base, 3496 struct lp_build_emit_data * emit_data) 3497 { 3498 LLVMValueRef tmp; 3499 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3500 3501 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL, 3502 emit_data->args[0], bld->bld_base.base.zero); 3503 lp_exec_mask_cond_push(&bld->exec_mask, tmp); 3504 } 3505 3506 static void 3507 uif_emit( 3508 const struct lp_build_tgsi_action * action, 3509 struct lp_build_tgsi_context * bld_base, 3510 struct lp_build_emit_data * emit_data) 3511 { 3512 LLVMValueRef tmp; 3513 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3514 struct lp_build_context *uint_bld = &bld_base->uint_bld; 3515 3516 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, 3517 emit_data->args[0], uint_bld->zero); 3518 lp_exec_mask_cond_push(&bld->exec_mask, tmp); 3519 } 3520 3521 static void 3522 case_emit( 3523 const struct lp_build_tgsi_action * action, 3524 struct lp_build_tgsi_context * bld_base, 3525 struct lp_build_emit_data * emit_data) 3526 { 3527 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3528 3529 lp_exec_case(&bld->exec_mask, emit_data->args[0]); 3530 } 3531 3532 static void 3533 default_emit( 3534 const struct lp_build_tgsi_action * action, 3535 struct lp_build_tgsi_context * bld_base, 3536 struct lp_build_emit_data * emit_data) 3537 { 3538 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3539 3540 lp_exec_default(&bld->exec_mask, bld_base); 3541 } 3542 3543 static void 3544 switch_emit( 3545 const struct lp_build_tgsi_action * action, 3546 struct lp_build_tgsi_context * bld_base, 3547 struct lp_build_emit_data * emit_data) 3548 { 3549 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3550 3551 lp_exec_switch(&bld->exec_mask, emit_data->args[0]); 3552 } 3553 3554 static void 3555 endswitch_emit( 3556 const struct lp_build_tgsi_action * action, 3557 struct lp_build_tgsi_context * bld_base, 3558 struct lp_build_emit_data * emit_data) 3559 { 3560 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3561 3562 lp_exec_endswitch(&bld->exec_mask, bld_base); 3563 } 3564 3565 static void 3566 bgnloop_emit( 3567 const struct lp_build_tgsi_action * action, 3568 struct lp_build_tgsi_context * bld_base, 3569 struct lp_build_emit_data * emit_data) 3570 { 3571 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3572 3573 lp_exec_bgnloop(&bld->exec_mask); 3574 } 3575 3576 static void 3577 bgnsub_emit( 3578 const struct lp_build_tgsi_action * action, 3579 struct lp_build_tgsi_context * bld_base, 3580 struct lp_build_emit_data * emit_data) 3581 { 3582 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3583 3584 lp_exec_mask_bgnsub(&bld->exec_mask); 3585 } 3586 3587 static void 3588 else_emit( 3589 const struct lp_build_tgsi_action * action, 3590 struct lp_build_tgsi_context * bld_base, 3591 struct lp_build_emit_data * emit_data) 3592 { 3593 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3594 3595 lp_exec_mask_cond_invert(&bld->exec_mask); 3596 } 3597 3598 static void 3599 endif_emit( 3600 const struct lp_build_tgsi_action * action, 3601 struct lp_build_tgsi_context * bld_base, 3602 struct lp_build_emit_data * emit_data) 3603 { 3604 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3605 3606 lp_exec_mask_cond_pop(&bld->exec_mask); 3607 } 3608 3609 static void 3610 endloop_emit( 3611 const struct lp_build_tgsi_action * action, 3612 struct lp_build_tgsi_context * bld_base, 3613 struct lp_build_emit_data * emit_data) 3614 { 3615 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3616 3617 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask); 3618 } 3619 3620 static void 3621 endsub_emit( 3622 const struct lp_build_tgsi_action * action, 3623 struct lp_build_tgsi_context * bld_base, 3624 struct lp_build_emit_data * emit_data) 3625 { 3626 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3627 3628 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc); 3629 } 3630 3631 static void 3632 cont_emit( 3633 const struct lp_build_tgsi_action * action, 3634 struct lp_build_tgsi_context * bld_base, 3635 struct lp_build_emit_data * emit_data) 3636 { 3637 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3638 3639 lp_exec_continue(&bld->exec_mask); 3640 } 3641 3642 static void emit_prologue(struct lp_build_tgsi_context * bld_base) 3643 { 3644 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3645 struct gallivm_state * gallivm = bld_base->base.gallivm; 3646 3647 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 3648 LLVMValueRef array_size = 3649 lp_build_const_int32(gallivm, 3650 bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4); 3651 bld->temps_array = lp_build_array_alloca(gallivm, 3652 bld_base->base.vec_type, array_size, 3653 "temp_array"); 3654 } 3655 3656 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { 3657 LLVMValueRef array_size = 3658 lp_build_const_int32(gallivm, 3659 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4); 3660 bld->outputs_array = lp_build_array_alloca(gallivm, 3661 bld_base->base.vec_type, array_size, 3662 "output_array"); 3663 } 3664 3665 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) { 3666 LLVMValueRef array_size = 3667 lp_build_const_int32(gallivm, 3668 bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4); 3669 bld->imms_array = lp_build_array_alloca(gallivm, 3670 bld_base->base.vec_type, array_size, 3671 "imms_array"); 3672 } 3673 3674 /* If we have indirect addressing in inputs we need to copy them into 3675 * our alloca array to be able to iterate over them */ 3676 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) { 3677 unsigned index, chan; 3678 LLVMTypeRef vec_type = bld_base->base.vec_type; 3679 LLVMValueRef array_size = lp_build_const_int32(gallivm, 3680 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4); 3681 bld->inputs_array = lp_build_array_alloca(gallivm, 3682 vec_type, array_size, 3683 "input_array"); 3684 3685 assert(bld_base->info->num_inputs 3686 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1); 3687 3688 for (index = 0; index < bld_base->info->num_inputs; ++index) { 3689 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { 3690 LLVMValueRef lindex = 3691 lp_build_const_int32(gallivm, index * 4 + chan); 3692 LLVMValueRef input_ptr = 3693 LLVMBuildGEP(gallivm->builder, bld->inputs_array, 3694 &lindex, 1, ""); 3695 LLVMValueRef value = bld->inputs[index][chan]; 3696 if (value) 3697 LLVMBuildStore(gallivm->builder, value, input_ptr); 3698 } 3699 } 3700 } 3701 3702 if (bld->gs_iface) { 3703 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld; 3704 bld->emitted_prims_vec_ptr = 3705 lp_build_alloca(gallivm, 3706 uint_bld->vec_type, 3707 "emitted_prims_ptr"); 3708 bld->emitted_vertices_vec_ptr = 3709 lp_build_alloca(gallivm, 3710 uint_bld->vec_type, 3711 "emitted_vertices_ptr"); 3712 bld->total_emitted_vertices_vec_ptr = 3713 lp_build_alloca(gallivm, 3714 uint_bld->vec_type, 3715 "total_emitted_vertices_ptr"); 3716 3717 LLVMBuildStore(gallivm->builder, uint_bld->zero, 3718 bld->emitted_prims_vec_ptr); 3719 LLVMBuildStore(gallivm->builder, uint_bld->zero, 3720 bld->emitted_vertices_vec_ptr); 3721 LLVMBuildStore(gallivm->builder, uint_bld->zero, 3722 bld->total_emitted_vertices_vec_ptr); 3723 } 3724 3725 if (DEBUG_EXECUTION) { 3726 lp_build_printf(gallivm, "\n"); 3727 emit_dump_file(bld, TGSI_FILE_CONSTANT); 3728 if (!bld->gs_iface) 3729 emit_dump_file(bld, TGSI_FILE_INPUT); 3730 } 3731 } 3732 3733 static void emit_epilogue(struct lp_build_tgsi_context * bld_base) 3734 { 3735 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 3736 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 3737 3738 if (DEBUG_EXECUTION) { 3739 /* for debugging */ 3740 if (0) { 3741 emit_dump_file(bld, TGSI_FILE_TEMPORARY); 3742 } 3743 emit_dump_file(bld, TGSI_FILE_OUTPUT); 3744 lp_build_printf(bld_base->base.gallivm, "\n"); 3745 } 3746 3747 /* If we have indirect addressing in outputs we need to copy our alloca array 3748 * to the outputs slots specified by the caller */ 3749 if (bld->gs_iface) { 3750 LLVMValueRef total_emitted_vertices_vec; 3751 LLVMValueRef emitted_prims_vec; 3752 /* implicit end_primitives, needed in case there are any unflushed 3753 vertices in the cache. Note must not call end_primitive here 3754 since the exec_mask is not valid at this point. */ 3755 end_primitive_masked(bld_base, lp_build_mask_value(bld->mask)); 3756 3757 total_emitted_vertices_vec = 3758 LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, ""); 3759 emitted_prims_vec = 3760 LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, ""); 3761 3762 bld->gs_iface->gs_epilogue(bld->gs_iface, 3763 &bld->bld_base, 3764 total_emitted_vertices_vec, 3765 emitted_prims_vec); 3766 } else { 3767 gather_outputs(bld); 3768 } 3769 } 3770 3771 void 3772 lp_build_tgsi_soa(struct gallivm_state *gallivm, 3773 const struct tgsi_token *tokens, 3774 struct lp_type type, 3775 struct lp_build_mask_context *mask, 3776 LLVMValueRef consts_ptr, 3777 LLVMValueRef const_sizes_ptr, 3778 const struct lp_bld_tgsi_system_values *system_values, 3779 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS], 3780 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], 3781 LLVMValueRef context_ptr, 3782 LLVMValueRef thread_data_ptr, 3783 struct lp_build_sampler_soa *sampler, 3784 const struct tgsi_shader_info *info, 3785 const struct lp_build_tgsi_gs_iface *gs_iface) 3786 { 3787 struct lp_build_tgsi_soa_context bld; 3788 3789 struct lp_type res_type; 3790 3791 assert(type.length <= LP_MAX_VECTOR_LENGTH); 3792 memset(&res_type, 0, sizeof res_type); 3793 res_type.width = type.width; 3794 res_type.length = type.length; 3795 res_type.sign = 1; 3796 3797 /* Setup build context */ 3798 memset(&bld, 0, sizeof bld); 3799 lp_build_context_init(&bld.bld_base.base, gallivm, type); 3800 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type)); 3801 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type)); 3802 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type)); 3803 { 3804 struct lp_type dbl_type; 3805 dbl_type = type; 3806 dbl_type.width *= 2; 3807 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type); 3808 } 3809 { 3810 struct lp_type uint64_type; 3811 uint64_type = lp_uint_type(type); 3812 uint64_type.width *= 2; 3813 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type); 3814 } 3815 { 3816 struct lp_type int64_type; 3817 int64_type = lp_int_type(type); 3818 int64_type.width *= 2; 3819 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type); 3820 } 3821 bld.mask = mask; 3822 bld.inputs = inputs; 3823 bld.outputs = outputs; 3824 bld.consts_ptr = consts_ptr; 3825 bld.const_sizes_ptr = const_sizes_ptr; 3826 bld.sampler = sampler; 3827 bld.bld_base.info = info; 3828 bld.indirect_files = info->indirect_files; 3829 bld.context_ptr = context_ptr; 3830 bld.thread_data_ptr = thread_data_ptr; 3831 3832 /* 3833 * If the number of temporaries is rather large then we just 3834 * allocate them as an array right from the start and treat 3835 * like indirect temporaries. 3836 */ 3837 if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) { 3838 bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY); 3839 } 3840 /* 3841 * For performance reason immediates are always backed in a static 3842 * array, but if their number is too great, we have to use just 3843 * a dynamically allocated array. 3844 */ 3845 bld.use_immediates_array = 3846 (info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES); 3847 if (bld.use_immediates_array) { 3848 bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE); 3849 } 3850 3851 3852 bld.bld_base.soa = TRUE; 3853 bld.bld_base.emit_debug = emit_debug; 3854 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant; 3855 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate; 3856 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input; 3857 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary; 3858 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value; 3859 bld.bld_base.emit_store = emit_store; 3860 3861 bld.bld_base.emit_declaration = lp_emit_declaration_soa; 3862 bld.bld_base.emit_immediate = lp_emit_immediate_soa; 3863 3864 bld.bld_base.emit_prologue = emit_prologue; 3865 bld.bld_base.emit_epilogue = emit_epilogue; 3866 3867 /* Set opcode actions */ 3868 lp_set_default_actions_cpu(&bld.bld_base); 3869 3870 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit; 3871 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit; 3872 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit; 3873 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit; 3874 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit; 3875 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit; 3876 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit; 3877 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit; 3878 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit; 3879 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit; 3880 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit; 3881 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit; 3882 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit; 3883 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit; 3884 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit; 3885 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit; 3886 bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit; 3887 bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit; 3888 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit; 3889 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit; 3890 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit; 3891 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit; 3892 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit; 3893 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit; 3894 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit; 3895 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit; 3896 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit; 3897 bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit; 3898 bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit; 3899 bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit; 3900 bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit; 3901 bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit; 3902 /* DX10 sampling ops */ 3903 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit; 3904 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit; 3905 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit; 3906 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit; 3907 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit; 3908 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit; 3909 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit; 3910 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit; 3911 bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit; 3912 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit; 3913 bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit; 3914 3915 3916 if (gs_iface) { 3917 /* There's no specific value for this because it should always 3918 * be set, but apps using ext_geometry_shader4 quite often 3919 * were forgetting so we're using MAX_VERTEX_VARYING from 3920 * that spec even though we could debug_assert if it's not 3921 * set, but that's a lot uglier. */ 3922 uint max_output_vertices; 3923 3924 /* inputs are always indirect with gs */ 3925 bld.indirect_files |= (1 << TGSI_FILE_INPUT); 3926 bld.gs_iface = gs_iface; 3927 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input; 3928 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex; 3929 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive; 3930 3931 max_output_vertices = 3932 info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES]; 3933 if (!max_output_vertices) 3934 max_output_vertices = 32; 3935 3936 bld.max_output_vertices_vec = 3937 lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type, 3938 max_output_vertices); 3939 } 3940 3941 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld); 3942 3943 bld.system_values = *system_values; 3944 3945 lp_build_tgsi_llvm(&bld.bld_base, tokens); 3946 3947 if (0) { 3948 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder); 3949 LLVMValueRef function = LLVMGetBasicBlockParent(block); 3950 debug_printf("11111111111111111111111111111 \n"); 3951 tgsi_dump(tokens, 0); 3952 lp_debug_dump_value(function); 3953 debug_printf("2222222222222222222222222222 \n"); 3954 } 3955 3956 if (0) { 3957 LLVMModuleRef module = LLVMGetGlobalParent( 3958 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); 3959 LLVMDumpModule(module); 3960 3961 } 3962 lp_exec_mask_fini(&bld.exec_mask); 3963 } 3964