1 /************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29 /** 30 * @file 31 * TGSI to LLVM IR translation -- SoA. 32 * 33 * @author Jose Fonseca <jfonseca (at) vmware.com> 34 * 35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, 36 * Brian Paul, and others. 37 */ 38 39 #include "pipe/p_config.h" 40 #include "pipe/p_shader_tokens.h" 41 #include "util/u_debug.h" 42 #include "util/u_math.h" 43 #include "util/u_memory.h" 44 #include "tgsi/tgsi_dump.h" 45 #include "tgsi/tgsi_exec.h" 46 #include "tgsi/tgsi_info.h" 47 #include "tgsi/tgsi_parse.h" 48 #include "tgsi/tgsi_util.h" 49 #include "tgsi/tgsi_scan.h" 50 #include "lp_bld_tgsi_action.h" 51 #include "lp_bld_type.h" 52 #include "lp_bld_const.h" 53 #include "lp_bld_arit.h" 54 #include "lp_bld_bitarit.h" 55 #include "lp_bld_gather.h" 56 #include "lp_bld_init.h" 57 #include "lp_bld_logic.h" 58 #include "lp_bld_swizzle.h" 59 #include "lp_bld_flow.h" 60 #include "lp_bld_quad.h" 61 #include "lp_bld_tgsi.h" 62 #include "lp_bld_limits.h" 63 #include "lp_bld_debug.h" 64 #include "lp_bld_printf.h" 65 #include "lp_bld_sample.h" 66 67 68 static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) 69 { 70 LLVMTypeRef int_type = LLVMInt32TypeInContext(bld->gallivm->context); 71 LLVMBuilderRef builder = bld->gallivm->builder; 72 73 mask->bld = bld; 74 mask->has_mask = FALSE; 75 mask->cond_stack_size = 0; 76 mask->loop_stack_size = 0; 77 mask->call_stack_size = 0; 78 79 mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type); 80 mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = mask->cond_mask = 81 LLVMConstAllOnes(mask->int_vec_type); 82 83 mask->loop_limiter = lp_build_alloca(bld->gallivm, int_type, "looplimiter"); 84 85 LLVMBuildStore( 86 builder, 87 LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false), 88 mask->loop_limiter); 89 } 90 91 static void lp_exec_mask_update(struct lp_exec_mask *mask) 92 { 93 LLVMBuilderRef builder = mask->bld->gallivm->builder; 94 95 if (mask->loop_stack_size) { 96 /*for loops we need to update the entire mask at runtime */ 97 LLVMValueRef tmp; 98 assert(mask->break_mask); 99 tmp = LLVMBuildAnd(builder, 100 mask->cont_mask, 101 mask->break_mask, 102 "maskcb"); 103 mask->exec_mask = LLVMBuildAnd(builder, 104 mask->cond_mask, 105 tmp, 106 "maskfull"); 107 } else 108 mask->exec_mask = mask->cond_mask; 109 110 if (mask->call_stack_size) { 111 mask->exec_mask = LLVMBuildAnd(builder, 112 mask->exec_mask, 113 mask->ret_mask, 114 "callmask"); 115 } 116 117 mask->has_mask = (mask->cond_stack_size > 0 || 118 mask->loop_stack_size > 0 || 119 mask->call_stack_size > 0); 120 } 121 122 static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, 123 LLVMValueRef val) 124 { 125 LLVMBuilderRef builder = mask->bld->gallivm->builder; 126 127 assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING); 128 if (mask->cond_stack_size == 0) { 129 assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type)); 130 } 131 mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask; 132 assert(LLVMTypeOf(val) == mask->int_vec_type); 133 mask->cond_mask = LLVMBuildAnd(builder, 134 mask->cond_mask, 135 val, 136 ""); 137 lp_exec_mask_update(mask); 138 } 139 140 static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) 141 { 142 LLVMBuilderRef builder = mask->bld->gallivm->builder; 143 LLVMValueRef prev_mask; 144 LLVMValueRef inv_mask; 145 146 assert(mask->cond_stack_size); 147 prev_mask = mask->cond_stack[mask->cond_stack_size - 1]; 148 if (mask->cond_stack_size == 1) { 149 assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type)); 150 } 151 152 inv_mask = LLVMBuildNot(builder, mask->cond_mask, ""); 153 154 mask->cond_mask = LLVMBuildAnd(builder, 155 inv_mask, 156 prev_mask, ""); 157 lp_exec_mask_update(mask); 158 } 159 160 static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) 161 { 162 assert(mask->cond_stack_size); 163 mask->cond_mask = mask->cond_stack[--mask->cond_stack_size]; 164 lp_exec_mask_update(mask); 165 } 166 167 static void lp_exec_bgnloop(struct lp_exec_mask *mask) 168 { 169 LLVMBuilderRef builder = mask->bld->gallivm->builder; 170 171 if (mask->loop_stack_size == 0) { 172 assert(mask->loop_block == NULL); 173 assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type)); 174 assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type)); 175 assert(mask->break_var == NULL); 176 } 177 178 assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING); 179 180 mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block; 181 mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask; 182 mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask; 183 mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var; 184 ++mask->loop_stack_size; 185 186 mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, ""); 187 LLVMBuildStore(builder, mask->break_mask, mask->break_var); 188 189 mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop"); 190 191 LLVMBuildBr(builder, mask->loop_block); 192 LLVMPositionBuilderAtEnd(builder, mask->loop_block); 193 194 mask->break_mask = LLVMBuildLoad(builder, mask->break_var, ""); 195 196 lp_exec_mask_update(mask); 197 } 198 199 static void lp_exec_break(struct lp_exec_mask *mask) 200 { 201 LLVMBuilderRef builder = mask->bld->gallivm->builder; 202 LLVMValueRef exec_mask = LLVMBuildNot(builder, 203 mask->exec_mask, 204 "break"); 205 206 mask->break_mask = LLVMBuildAnd(builder, 207 mask->break_mask, 208 exec_mask, "break_full"); 209 210 lp_exec_mask_update(mask); 211 } 212 213 static void lp_exec_continue(struct lp_exec_mask *mask) 214 { 215 LLVMBuilderRef builder = mask->bld->gallivm->builder; 216 LLVMValueRef exec_mask = LLVMBuildNot(builder, 217 mask->exec_mask, 218 ""); 219 220 mask->cont_mask = LLVMBuildAnd(builder, 221 mask->cont_mask, 222 exec_mask, ""); 223 224 lp_exec_mask_update(mask); 225 } 226 227 228 static void lp_exec_endloop(struct gallivm_state *gallivm, 229 struct lp_exec_mask *mask) 230 { 231 LLVMBuilderRef builder = mask->bld->gallivm->builder; 232 LLVMBasicBlockRef endloop; 233 LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context); 234 LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context, 235 mask->bld->type.width * 236 mask->bld->type.length); 237 LLVMValueRef i1cond, i2cond, icond, limiter; 238 239 assert(mask->break_mask); 240 241 /* 242 * Restore the cont_mask, but don't pop 243 */ 244 assert(mask->loop_stack_size); 245 mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask; 246 lp_exec_mask_update(mask); 247 248 /* 249 * Unlike the continue mask, the break_mask must be preserved across loop 250 * iterations 251 */ 252 LLVMBuildStore(builder, mask->break_mask, mask->break_var); 253 254 /* Decrement the loop limiter */ 255 limiter = LLVMBuildLoad(builder, mask->loop_limiter, ""); 256 257 limiter = LLVMBuildSub( 258 builder, 259 limiter, 260 LLVMConstInt(int_type, 1, false), 261 ""); 262 263 LLVMBuildStore(builder, limiter, mask->loop_limiter); 264 265 /* i1cond = (mask != 0) */ 266 i1cond = LLVMBuildICmp( 267 builder, 268 LLVMIntNE, 269 LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""), 270 LLVMConstNull(reg_type), ""); 271 272 /* i2cond = (looplimiter > 0) */ 273 i2cond = LLVMBuildICmp( 274 builder, 275 LLVMIntSGT, 276 limiter, 277 LLVMConstNull(int_type), ""); 278 279 /* if( i1cond && i2cond ) */ 280 icond = LLVMBuildAnd(builder, i1cond, i2cond, ""); 281 282 endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop"); 283 284 LLVMBuildCondBr(builder, 285 icond, mask->loop_block, endloop); 286 287 LLVMPositionBuilderAtEnd(builder, endloop); 288 289 assert(mask->loop_stack_size); 290 --mask->loop_stack_size; 291 mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block; 292 mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask; 293 mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask; 294 mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var; 295 296 lp_exec_mask_update(mask); 297 } 298 299 /* stores val into an address pointed to by dst. 300 * mask->exec_mask is used to figure out which bits of val 301 * should be stored into the address 302 * (0 means don't store this bit, 1 means do store). 303 */ 304 static void lp_exec_mask_store(struct lp_exec_mask *mask, 305 struct lp_build_context *bld_store, 306 LLVMValueRef pred, 307 LLVMValueRef val, 308 LLVMValueRef dst) 309 { 310 LLVMBuilderRef builder = mask->bld->gallivm->builder; 311 312 /* Mix the predicate and execution mask */ 313 if (mask->has_mask) { 314 if (pred) { 315 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, ""); 316 } else { 317 pred = mask->exec_mask; 318 } 319 } 320 321 if (pred) { 322 LLVMValueRef real_val, dst_val; 323 324 dst_val = LLVMBuildLoad(builder, dst, ""); 325 real_val = lp_build_select(bld_store, 326 pred, 327 val, dst_val); 328 329 LLVMBuildStore(builder, real_val, dst); 330 } else 331 LLVMBuildStore(builder, val, dst); 332 } 333 334 static void lp_exec_mask_call(struct lp_exec_mask *mask, 335 int func, 336 int *pc) 337 { 338 assert(mask->call_stack_size < LP_MAX_TGSI_NESTING); 339 mask->call_stack[mask->call_stack_size].pc = *pc; 340 mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask; 341 mask->call_stack_size++; 342 *pc = func; 343 } 344 345 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc) 346 { 347 LLVMBuilderRef builder = mask->bld->gallivm->builder; 348 LLVMValueRef exec_mask; 349 350 if (mask->call_stack_size == 0) { 351 /* returning from main() */ 352 *pc = -1; 353 return; 354 } 355 exec_mask = LLVMBuildNot(builder, 356 mask->exec_mask, 357 "ret"); 358 359 mask->ret_mask = LLVMBuildAnd(builder, 360 mask->ret_mask, 361 exec_mask, "ret_full"); 362 363 lp_exec_mask_update(mask); 364 } 365 366 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask) 367 { 368 } 369 370 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc) 371 { 372 assert(mask->call_stack_size); 373 mask->call_stack_size--; 374 *pc = mask->call_stack[mask->call_stack_size].pc; 375 mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask; 376 lp_exec_mask_update(mask); 377 } 378 379 380 /** 381 * Return pointer to a temporary register channel (src or dest). 382 * Note that indirect addressing cannot be handled here. 383 * \param index which temporary register 384 * \param chan which channel of the temp register. 385 */ 386 LLVMValueRef 387 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld, 388 unsigned index, 389 unsigned chan) 390 { 391 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 392 assert(chan < 4); 393 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 394 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan); 395 return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, ""); 396 } 397 else { 398 return bld->temps[index][chan]; 399 } 400 } 401 402 /** 403 * Return pointer to a output register channel (src or dest). 404 * Note that indirect addressing cannot be handled here. 405 * \param index which output register 406 * \param chan which channel of the output register. 407 */ 408 LLVMValueRef 409 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld, 410 unsigned index, 411 unsigned chan) 412 { 413 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 414 assert(chan < 4); 415 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { 416 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, 417 index * 4 + chan); 418 return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, ""); 419 } 420 else { 421 return bld->outputs[index][chan]; 422 } 423 } 424 425 /** 426 * Gather vector. 427 * XXX the lp_build_gather() function should be capable of doing this 428 * with a little work. 429 */ 430 static LLVMValueRef 431 build_gather(struct lp_build_context *bld, 432 LLVMValueRef base_ptr, 433 LLVMValueRef indexes) 434 { 435 LLVMBuilderRef builder = bld->gallivm->builder; 436 LLVMValueRef res = bld->undef; 437 unsigned i; 438 439 /* 440 * Loop over elements of index_vec, load scalar value, insert it into 'res'. 441 */ 442 for (i = 0; i < bld->type.length; i++) { 443 LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i); 444 LLVMValueRef index = LLVMBuildExtractElement(builder, 445 indexes, ii, ""); 446 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, 447 &index, 1, "gather_ptr"); 448 LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 449 450 res = LLVMBuildInsertElement(builder, res, scalar, ii, ""); 451 } 452 453 return res; 454 } 455 456 457 /** 458 * Scatter/store vector. 459 */ 460 static void 461 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld, 462 LLVMValueRef base_ptr, 463 LLVMValueRef indexes, 464 LLVMValueRef values, 465 struct lp_exec_mask *mask, 466 LLVMValueRef pred) 467 { 468 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 469 LLVMBuilderRef builder = gallivm->builder; 470 unsigned i; 471 472 /* Mix the predicate and execution mask */ 473 if (mask->has_mask) { 474 if (pred) { 475 pred = LLVMBuildAnd(builder, pred, mask->exec_mask, ""); 476 } 477 else { 478 pred = mask->exec_mask; 479 } 480 } 481 482 /* 483 * Loop over elements of index_vec, store scalar value. 484 */ 485 for (i = 0; i < bld->bld_base.base.type.length; i++) { 486 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 487 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, ""); 488 LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr"); 489 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val"); 490 LLVMValueRef scalar_pred = pred ? 491 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL; 492 493 if (0) 494 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n", 495 ii, val, index, scalar_ptr); 496 497 if (scalar_pred) { 498 LLVMValueRef real_val, dst_val; 499 dst_val = LLVMBuildLoad(builder, scalar_ptr, ""); 500 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val); 501 LLVMBuildStore(builder, real_val, scalar_ptr); 502 } 503 else { 504 LLVMBuildStore(builder, val, scalar_ptr); 505 } 506 } 507 } 508 509 510 /** 511 * Read the current value of the ADDR register, convert the floats to 512 * ints, add the base index and return the vector of offsets. 513 * The offsets will be used to index into the constant buffer or 514 * temporary register file. 515 */ 516 static LLVMValueRef 517 get_indirect_index(struct lp_build_tgsi_soa_context *bld, 518 unsigned reg_file, unsigned reg_index, 519 const struct tgsi_src_register *indirect_reg) 520 { 521 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 522 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld; 523 /* always use X component of address register */ 524 unsigned swizzle = indirect_reg->SwizzleX; 525 LLVMValueRef base; 526 LLVMValueRef rel; 527 LLVMValueRef max_index; 528 LLVMValueRef index; 529 530 assert(bld->indirect_files & (1 << reg_file)); 531 532 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index); 533 534 assert(swizzle < 4); 535 rel = LLVMBuildLoad(builder, 536 bld->addr[indirect_reg->Index][swizzle], 537 "load addr reg"); 538 539 index = lp_build_add(uint_bld, base, rel); 540 541 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm, 542 uint_bld->type, 543 bld->bld_base.info->file_max[reg_file]); 544 545 assert(!uint_bld->type.sign); 546 index = lp_build_min(uint_bld, index, max_index); 547 548 return index; 549 } 550 551 static struct lp_build_context * 552 stype_to_fetch(struct lp_build_tgsi_context * bld_base, 553 enum tgsi_opcode_type stype) 554 { 555 struct lp_build_context *bld_fetch; 556 557 switch (stype) { 558 case TGSI_TYPE_FLOAT: 559 case TGSI_TYPE_UNTYPED: 560 bld_fetch = &bld_base->base; 561 break; 562 case TGSI_TYPE_UNSIGNED: 563 bld_fetch = &bld_base->uint_bld; 564 break; 565 case TGSI_TYPE_SIGNED: 566 bld_fetch = &bld_base->int_bld; 567 break; 568 case TGSI_TYPE_VOID: 569 case TGSI_TYPE_DOUBLE: 570 default: 571 assert(0); 572 bld_fetch = NULL; 573 break; 574 } 575 return bld_fetch; 576 } 577 578 static LLVMValueRef 579 emit_fetch_constant( 580 struct lp_build_tgsi_context * bld_base, 581 const struct tgsi_full_src_register * reg, 582 enum tgsi_opcode_type stype, 583 unsigned swizzle) 584 { 585 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 586 struct gallivm_state *gallivm = bld_base->base.gallivm; 587 LLVMBuilderRef builder = gallivm->builder; 588 struct lp_build_context *uint_bld = &bld_base->uint_bld; 589 LLVMValueRef indirect_index = NULL; 590 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); 591 592 /* XXX: Handle fetching xyzw components as a vector */ 593 assert(swizzle != ~0); 594 595 if (reg->Register.Indirect) { 596 indirect_index = get_indirect_index(bld, 597 reg->Register.File, 598 reg->Register.Index, 599 ®->Indirect); 600 } 601 602 if (reg->Register.Indirect) { 603 LLVMValueRef swizzle_vec = 604 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle); 605 LLVMValueRef index_vec; /* index into the const buffer */ 606 607 /* index_vec = indirect_index * 4 + swizzle */ 608 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 609 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 610 611 /* Gather values from the constant buffer */ 612 return build_gather(bld_fetch, bld->consts_ptr, index_vec); 613 } 614 else { 615 LLVMValueRef index; /* index into the const buffer */ 616 LLVMValueRef scalar, scalar_ptr; 617 618 index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle); 619 620 scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, 621 &index, 1, ""); 622 623 if (stype != TGSI_TYPE_FLOAT && stype != TGSI_TYPE_UNTYPED) { 624 LLVMTypeRef ivtype = LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0); 625 LLVMValueRef temp_ptr; 626 temp_ptr = LLVMBuildBitCast(builder, scalar_ptr, ivtype, ""); 627 scalar = LLVMBuildLoad(builder, temp_ptr, ""); 628 } else 629 scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 630 631 return lp_build_broadcast_scalar(bld_fetch, scalar); 632 } 633 } 634 635 static LLVMValueRef 636 emit_fetch_immediate( 637 struct lp_build_tgsi_context * bld_base, 638 const struct tgsi_full_src_register * reg, 639 enum tgsi_opcode_type stype, 640 unsigned swizzle) 641 { 642 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 643 LLVMValueRef res = bld->immediates[reg->Register.Index][swizzle]; 644 assert(res); 645 646 if (stype == TGSI_TYPE_UNSIGNED) { 647 res = LLVMConstBitCast(res, bld_base->uint_bld.vec_type); 648 } else if (stype == TGSI_TYPE_SIGNED) { 649 res = LLVMConstBitCast(res, bld_base->int_bld.vec_type); 650 } 651 return res; 652 } 653 654 static LLVMValueRef 655 emit_fetch_input( 656 struct lp_build_tgsi_context * bld_base, 657 const struct tgsi_full_src_register * reg, 658 enum tgsi_opcode_type stype, 659 unsigned swizzle) 660 { 661 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 662 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 663 LLVMBuilderRef builder = gallivm->builder; 664 struct lp_build_context *uint_bld = &bld_base->uint_bld; 665 LLVMValueRef indirect_index = NULL; 666 LLVMValueRef res; 667 668 if (reg->Register.Indirect) { 669 indirect_index = get_indirect_index(bld, 670 reg->Register.File, 671 reg->Register.Index, 672 ®->Indirect); 673 } 674 675 if (reg->Register.Indirect) { 676 LLVMValueRef swizzle_vec = 677 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle); 678 LLVMValueRef length_vec = 679 lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length); 680 LLVMValueRef index_vec; /* index into the const buffer */ 681 LLVMValueRef inputs_array; 682 LLVMTypeRef float4_ptr_type; 683 684 /* index_vec = (indirect_index * 4 + swizzle) * length */ 685 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 686 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 687 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 688 689 /* cast inputs_array pointer to float* */ 690 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 691 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, 692 float4_ptr_type, ""); 693 694 /* Gather values from the temporary register array */ 695 res = build_gather(&bld_base->base, inputs_array, index_vec); 696 } else { 697 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { 698 LLVMValueRef lindex = lp_build_const_int32(gallivm, 699 reg->Register.Index * 4 + swizzle); 700 LLVMValueRef input_ptr = LLVMBuildGEP(builder, 701 bld->inputs_array, &lindex, 1, ""); 702 res = LLVMBuildLoad(builder, input_ptr, ""); 703 } 704 else { 705 res = bld->inputs[reg->Register.Index][swizzle]; 706 } 707 } 708 709 assert(res); 710 711 if (stype == TGSI_TYPE_UNSIGNED) { 712 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, ""); 713 } else if (stype == TGSI_TYPE_SIGNED) { 714 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, ""); 715 } 716 717 return res; 718 } 719 720 static LLVMValueRef 721 emit_fetch_temporary( 722 struct lp_build_tgsi_context * bld_base, 723 const struct tgsi_full_src_register * reg, 724 enum tgsi_opcode_type stype, 725 unsigned swizzle) 726 { 727 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 728 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 729 LLVMBuilderRef builder = gallivm->builder; 730 struct lp_build_context *uint_bld = &bld_base->uint_bld; 731 LLVMValueRef indirect_index = NULL; 732 LLVMValueRef res; 733 734 if (reg->Register.Indirect) { 735 indirect_index = get_indirect_index(bld, 736 reg->Register.File, 737 reg->Register.Index, 738 ®->Indirect); 739 } 740 741 if (reg->Register.Indirect) { 742 LLVMValueRef swizzle_vec = 743 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle); 744 LLVMValueRef length_vec = 745 lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, 746 bld->bld_base.base.type.length); 747 LLVMValueRef index_vec; /* index into the const buffer */ 748 LLVMValueRef temps_array; 749 LLVMTypeRef float4_ptr_type; 750 751 /* index_vec = (indirect_index * 4 + swizzle) * length */ 752 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 753 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); 754 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 755 756 /* cast temps_array pointer to float* */ 757 float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context), 0); 758 temps_array = LLVMBuildBitCast(builder, bld->temps_array, 759 float4_ptr_type, ""); 760 761 /* Gather values from the temporary register array */ 762 res = build_gather(&bld_base->base, temps_array, index_vec); 763 } 764 else { 765 LLVMValueRef temp_ptr; 766 if (stype != TGSI_TYPE_FLOAT && stype != TGSI_TYPE_UNTYPED) { 767 LLVMTypeRef itype = LLVMPointerType(bld->bld_base.int_bld.vec_type, 0); 768 LLVMValueRef tint_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, 769 swizzle); 770 temp_ptr = LLVMBuildBitCast(builder, tint_ptr, itype, ""); 771 } else 772 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle); 773 res = LLVMBuildLoad(builder, temp_ptr, ""); 774 if (!res) 775 return bld->bld_base.base.undef; 776 } 777 778 return res; 779 } 780 781 static LLVMValueRef 782 emit_fetch_system_value( 783 struct lp_build_tgsi_context * bld_base, 784 const struct tgsi_full_src_register * reg, 785 enum tgsi_opcode_type stype, 786 unsigned swizzle) 787 { 788 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 789 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 790 const struct tgsi_shader_info *info = bld->bld_base.info; 791 LLVMBuilderRef builder = gallivm->builder; 792 LLVMValueRef res; 793 enum tgsi_opcode_type atype; // Actual type of the value 794 795 assert(!reg->Register.Indirect); 796 797 switch (info->system_value_semantic_name[reg->Register.Index]) { 798 case TGSI_SEMANTIC_INSTANCEID: 799 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id); 800 atype = TGSI_TYPE_UNSIGNED; 801 break; 802 803 case TGSI_SEMANTIC_VERTEXID: 804 res = bld->system_values.vertex_id; 805 atype = TGSI_TYPE_UNSIGNED; 806 break; 807 808 default: 809 assert(!"unexpected semantic in emit_fetch_system_value"); 810 res = bld_base->base.zero; 811 atype = TGSI_TYPE_FLOAT; 812 break; 813 } 814 815 if (atype != stype) { 816 if (stype == TGSI_TYPE_FLOAT) { 817 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, ""); 818 } else if (stype == TGSI_TYPE_UNSIGNED) { 819 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, ""); 820 } else if (stype == TGSI_TYPE_SIGNED) { 821 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, ""); 822 } 823 } 824 825 return res; 826 } 827 828 /** 829 * Register fetch with derivatives. 830 */ 831 static void 832 emit_fetch_deriv( 833 struct lp_build_tgsi_soa_context *bld, 834 LLVMValueRef src, 835 LLVMValueRef *res, 836 LLVMValueRef *ddx, 837 LLVMValueRef *ddy) 838 { 839 if(res) 840 *res = src; 841 842 /* TODO: use interpolation coeffs for inputs */ 843 844 if(ddx) 845 *ddx = lp_build_ddx(&bld->bld_base.base, src); 846 847 if(ddy) 848 *ddy = lp_build_ddy(&bld->bld_base.base, src); 849 } 850 851 852 /** 853 * Predicate. 854 */ 855 static void 856 emit_fetch_predicate( 857 struct lp_build_tgsi_soa_context *bld, 858 const struct tgsi_full_instruction *inst, 859 LLVMValueRef *pred) 860 { 861 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 862 unsigned index; 863 unsigned char swizzles[4]; 864 LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL}; 865 LLVMValueRef value; 866 unsigned chan; 867 868 if (!inst->Instruction.Predicate) { 869 TGSI_FOR_EACH_CHANNEL( chan ) { 870 pred[chan] = NULL; 871 } 872 return; 873 } 874 875 swizzles[0] = inst->Predicate.SwizzleX; 876 swizzles[1] = inst->Predicate.SwizzleY; 877 swizzles[2] = inst->Predicate.SwizzleZ; 878 swizzles[3] = inst->Predicate.SwizzleW; 879 880 index = inst->Predicate.Index; 881 assert(index < LP_MAX_TGSI_PREDS); 882 883 TGSI_FOR_EACH_CHANNEL( chan ) { 884 unsigned swizzle = swizzles[chan]; 885 886 /* 887 * Only fetch the predicate register channels that are actually listed 888 * in the swizzles 889 */ 890 if (!unswizzled[swizzle]) { 891 value = LLVMBuildLoad(builder, 892 bld->preds[index][swizzle], ""); 893 894 /* 895 * Convert the value to an integer mask. 896 * 897 * TODO: Short-circuit this comparison -- a D3D setp_xx instructions 898 * is needlessly causing two comparisons due to storing the intermediate 899 * result as float vector instead of an integer mask vector. 900 */ 901 value = lp_build_compare(bld->bld_base.base.gallivm, 902 bld->bld_base.base.type, 903 PIPE_FUNC_NOTEQUAL, 904 value, 905 bld->bld_base.base.zero); 906 if (inst->Predicate.Negate) { 907 value = LLVMBuildNot(builder, value, ""); 908 } 909 910 unswizzled[swizzle] = value; 911 } else { 912 value = unswizzled[swizzle]; 913 } 914 915 pred[chan] = value; 916 } 917 } 918 919 /** 920 * Register store. 921 */ 922 static void 923 emit_store_chan( 924 struct lp_build_tgsi_context *bld_base, 925 const struct tgsi_full_instruction *inst, 926 unsigned index, 927 unsigned chan_index, 928 LLVMValueRef pred, 929 LLVMValueRef value) 930 { 931 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 932 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 933 LLVMBuilderRef builder = gallivm->builder; 934 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 935 struct lp_build_context *uint_bld = &bld_base->uint_bld; 936 LLVMValueRef indirect_index = NULL; 937 struct lp_build_context *bld_store; 938 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode); 939 940 switch (dtype) { 941 default: 942 case TGSI_TYPE_FLOAT: 943 case TGSI_TYPE_UNTYPED: 944 bld_store = &bld_base->base; 945 break; 946 case TGSI_TYPE_UNSIGNED: 947 bld_store = &bld_base->uint_bld; 948 break; 949 case TGSI_TYPE_SIGNED: 950 bld_store = &bld_base->int_bld; 951 break; 952 case TGSI_TYPE_DOUBLE: 953 case TGSI_TYPE_VOID: 954 assert(0); 955 bld_store = NULL; 956 break; 957 } 958 959 switch( inst->Instruction.Saturate ) { 960 case TGSI_SAT_NONE: 961 break; 962 963 case TGSI_SAT_ZERO_ONE: 964 value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero); 965 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one); 966 break; 967 968 case TGSI_SAT_MINUS_PLUS_ONE: 969 value = lp_build_max(&bld->bld_base.base, value, lp_build_const_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, -1.0)); 970 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one); 971 break; 972 973 default: 974 assert(0); 975 } 976 977 if (reg->Register.Indirect) { 978 indirect_index = get_indirect_index(bld, 979 reg->Register.File, 980 reg->Register.Index, 981 ®->Indirect); 982 } else { 983 assert(reg->Register.Index <= 984 bld->bld_base.info->file_max[reg->Register.File]); 985 } 986 987 switch( reg->Register.File ) { 988 case TGSI_FILE_OUTPUT: 989 if (reg->Register.Indirect) { 990 LLVMValueRef chan_vec = 991 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index); 992 LLVMValueRef length_vec = 993 lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length); 994 LLVMValueRef index_vec; /* indexes into the temp registers */ 995 LLVMValueRef outputs_array; 996 LLVMValueRef pixel_offsets; 997 LLVMTypeRef float_ptr_type; 998 int i; 999 1000 /* build pixel offset vector: {0, 1, 2, 3, ...} */ 1001 pixel_offsets = uint_bld->undef; 1002 for (i = 0; i < bld->bld_base.base.type.length; i++) { 1003 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 1004 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, 1005 ii, ii, ""); 1006 } 1007 1008 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ 1009 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 1010 index_vec = lp_build_add(uint_bld, index_vec, chan_vec); 1011 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 1012 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); 1013 1014 float_ptr_type = 1015 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1016 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, 1017 float_ptr_type, ""); 1018 1019 /* Scatter store values into temp registers */ 1020 emit_mask_scatter(bld, outputs_array, index_vec, value, 1021 &bld->exec_mask, pred); 1022 } 1023 else { 1024 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index, 1025 chan_index); 1026 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, out_ptr); 1027 } 1028 break; 1029 1030 case TGSI_FILE_TEMPORARY: 1031 if (reg->Register.Indirect) { 1032 LLVMValueRef chan_vec = 1033 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index); 1034 LLVMValueRef length_vec = 1035 lp_build_const_int_vec(gallivm, uint_bld->type, 1036 bld->bld_base.base.type.length); 1037 LLVMValueRef index_vec; /* indexes into the temp registers */ 1038 LLVMValueRef temps_array; 1039 LLVMValueRef pixel_offsets; 1040 LLVMTypeRef float_ptr_type; 1041 int i; 1042 1043 /* build pixel offset vector: {0, 1, 2, 3, ...} */ 1044 pixel_offsets = uint_bld->undef; 1045 for (i = 0; i < bld->bld_base.base.type.length; i++) { 1046 LLVMValueRef ii = lp_build_const_int32(gallivm, i); 1047 pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets, 1048 ii, ii, ""); 1049 } 1050 1051 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ 1052 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); 1053 index_vec = lp_build_add(uint_bld, index_vec, chan_vec); 1054 index_vec = lp_build_mul(uint_bld, index_vec, length_vec); 1055 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); 1056 1057 float_ptr_type = 1058 LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); 1059 temps_array = LLVMBuildBitCast(builder, bld->temps_array, 1060 float_ptr_type, ""); 1061 1062 /* Scatter store values into temp registers */ 1063 emit_mask_scatter(bld, temps_array, index_vec, value, 1064 &bld->exec_mask, pred); 1065 } 1066 else { 1067 LLVMValueRef temp_ptr; 1068 1069 switch (dtype) { 1070 case TGSI_TYPE_UNSIGNED: 1071 case TGSI_TYPE_SIGNED: { 1072 LLVMTypeRef itype = bld_base->int_bld.vec_type; 1073 LLVMTypeRef ivtype = LLVMPointerType(itype, 0); 1074 LLVMValueRef tint_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, 1075 chan_index); 1076 LLVMValueRef temp_value_ptr; 1077 1078 temp_ptr = LLVMBuildBitCast(builder, tint_ptr, ivtype, ""); 1079 temp_value_ptr = LLVMBuildBitCast(builder, value, itype, ""); 1080 value = temp_value_ptr; 1081 break; 1082 } 1083 default: 1084 case TGSI_TYPE_FLOAT: 1085 case TGSI_TYPE_UNTYPED: 1086 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, 1087 chan_index); 1088 break; 1089 } 1090 1091 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, temp_ptr); 1092 } 1093 break; 1094 1095 case TGSI_FILE_ADDRESS: 1096 assert(dtype == TGSI_TYPE_SIGNED); 1097 assert(LLVMTypeOf(value) == bld_base->base.int_vec_type); 1098 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, 1099 bld->addr[reg->Register.Index][chan_index]); 1100 break; 1101 1102 case TGSI_FILE_PREDICATE: 1103 lp_exec_mask_store(&bld->exec_mask, bld_store, pred, value, 1104 bld->preds[reg->Register.Index][chan_index]); 1105 break; 1106 1107 default: 1108 assert( 0 ); 1109 } 1110 } 1111 1112 static void 1113 emit_store( 1114 struct lp_build_tgsi_context * bld_base, 1115 const struct tgsi_full_instruction * inst, 1116 const struct tgsi_opcode_info * info, 1117 LLVMValueRef dst[4]) 1118 1119 { 1120 unsigned chan_index; 1121 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1122 1123 if(info->num_dst) { 1124 LLVMValueRef pred[TGSI_NUM_CHANNELS]; 1125 1126 emit_fetch_predicate( bld, inst, pred ); 1127 1128 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { 1129 emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]); 1130 } 1131 } 1132 } 1133 1134 /** 1135 * High-level instruction translators. 1136 */ 1137 1138 static void 1139 emit_tex( struct lp_build_tgsi_soa_context *bld, 1140 const struct tgsi_full_instruction *inst, 1141 enum lp_build_tex_modifier modifier, 1142 LLVMValueRef *texel) 1143 { 1144 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 1145 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1146 unsigned unit; 1147 LLVMValueRef lod_bias, explicit_lod; 1148 LLVMValueRef oow = NULL; 1149 LLVMValueRef coords[3]; 1150 struct lp_derivatives derivs; 1151 unsigned num_coords; 1152 unsigned dims; 1153 unsigned i; 1154 1155 if (!bld->sampler) { 1156 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 1157 for (i = 0; i < 4; i++) { 1158 texel[i] = bld->bld_base.base.undef; 1159 } 1160 return; 1161 } 1162 1163 derivs.ddx_ddy[0] = bld->bld_base.base.undef; 1164 derivs.ddx_ddy[1] = bld->bld_base.base.undef; 1165 1166 switch (inst->Texture.Texture) { 1167 case TGSI_TEXTURE_1D: 1168 num_coords = 1; 1169 dims = 1; 1170 break; 1171 case TGSI_TEXTURE_1D_ARRAY: 1172 num_coords = 2; 1173 dims = 1; 1174 break; 1175 case TGSI_TEXTURE_2D: 1176 case TGSI_TEXTURE_RECT: 1177 num_coords = 2; 1178 dims = 2; 1179 break; 1180 case TGSI_TEXTURE_SHADOW1D: 1181 case TGSI_TEXTURE_SHADOW1D_ARRAY: 1182 num_coords = 3; 1183 dims = 1; 1184 break; 1185 case TGSI_TEXTURE_SHADOW2D: 1186 case TGSI_TEXTURE_SHADOWRECT: 1187 case TGSI_TEXTURE_2D_ARRAY: 1188 case TGSI_TEXTURE_CUBE: 1189 num_coords = 3; 1190 dims = 2; 1191 break; 1192 case TGSI_TEXTURE_3D: 1193 num_coords = 3; 1194 dims = 3; 1195 break; 1196 case TGSI_TEXTURE_SHADOW2D_ARRAY: 1197 num_coords = 4; 1198 dims = 2; 1199 break; 1200 default: 1201 assert(0); 1202 return; 1203 } 1204 1205 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { 1206 lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 ); 1207 explicit_lod = NULL; 1208 } 1209 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { 1210 lod_bias = NULL; 1211 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 ); 1212 } 1213 else { 1214 lod_bias = NULL; 1215 explicit_lod = NULL; 1216 } 1217 1218 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) { 1219 oow = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 ); 1220 oow = lp_build_rcp(&bld->bld_base.base, oow); 1221 } 1222 1223 for (i = 0; i < num_coords; i++) { 1224 coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i ); 1225 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) 1226 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow); 1227 } 1228 for (i = num_coords; i < 3; i++) { 1229 coords[i] = bld->bld_base.base.undef; 1230 } 1231 1232 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 1233 LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); 1234 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 1235 LLVMValueRef ddxdyonec[3]; 1236 unsigned length = bld->bld_base.base.type.length; 1237 unsigned num_quads = length / 4; 1238 unsigned dim; 1239 unsigned quad; 1240 1241 for (dim = 0; dim < dims; ++dim) { 1242 LLVMValueRef srcx = lp_build_emit_fetch( &bld->bld_base, inst, 1, dim ); 1243 LLVMValueRef srcy = lp_build_emit_fetch( &bld->bld_base, inst, 2, dim ); 1244 for (quad = 0; quad < num_quads; ++quad) { 1245 unsigned s1 = 4*quad; 1246 unsigned s2 = 4*quad + length; 1247 shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1); 1248 shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s2); 1249 shuffles[4*quad + 2] = i32undef; 1250 shuffles[4*quad + 3] = i32undef; 1251 } 1252 ddxdyonec[dim] = LLVMBuildShuffleVector(builder, srcx, srcy, 1253 LLVMConstVector(shuffles, length), ""); 1254 } 1255 if (dims == 1) { 1256 derivs.ddx_ddy[0] = ddxdyonec[0]; 1257 } 1258 else if (dims >= 2) { 1259 for (quad = 0; quad < num_quads; ++quad) { 1260 unsigned s1 = 4*quad; 1261 unsigned s2 = 4*quad + length; 1262 shuffles[4*quad + 0] = lp_build_const_int32(gallivm, s1); 1263 shuffles[4*quad + 1] = lp_build_const_int32(gallivm, s1 + 1); 1264 shuffles[4*quad + 2] = lp_build_const_int32(gallivm, s2); 1265 shuffles[4*quad + 3] = lp_build_const_int32(gallivm, s2 + 1); 1266 } 1267 derivs.ddx_ddy[0] = LLVMBuildShuffleVector(builder, ddxdyonec[0], ddxdyonec[1], 1268 LLVMConstVector(shuffles, length), ""); 1269 if (dims == 3) { 1270 derivs.ddx_ddy[1] = ddxdyonec[2]; 1271 } 1272 } 1273 unit = inst->Src[3].Register.Index; 1274 } else { 1275 if (dims == 1) { 1276 derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[0]); 1277 } 1278 else if (dims >= 2) { 1279 derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(&bld->bld_base.base, 1280 coords[0], coords[1]); 1281 if (dims == 3) { 1282 derivs.ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(&bld->bld_base.base, coords[2]); 1283 } 1284 } 1285 unit = inst->Src[1].Register.Index; 1286 } 1287 1288 bld->sampler->emit_fetch_texel(bld->sampler, 1289 bld->bld_base.base.gallivm, 1290 bld->bld_base.base.type, 1291 unit, num_coords, coords, 1292 &derivs, 1293 lod_bias, explicit_lod, 1294 texel); 1295 } 1296 1297 static void 1298 emit_txq( struct lp_build_tgsi_soa_context *bld, 1299 const struct tgsi_full_instruction *inst, 1300 LLVMValueRef *sizes_out) 1301 { 1302 LLVMValueRef explicit_lod; 1303 unsigned num_coords, has_lod; 1304 unsigned i; 1305 1306 switch (inst->Texture.Texture) { 1307 case TGSI_TEXTURE_1D: 1308 case TGSI_TEXTURE_SHADOW1D: 1309 case TGSI_TEXTURE_SHADOW2D: 1310 case TGSI_TEXTURE_SHADOWCUBE: 1311 num_coords = 1; 1312 has_lod = 1; 1313 break; 1314 case TGSI_TEXTURE_2D: 1315 case TGSI_TEXTURE_CUBE: 1316 case TGSI_TEXTURE_1D_ARRAY: 1317 case TGSI_TEXTURE_SHADOW1D_ARRAY: 1318 num_coords = 2; 1319 has_lod = 1; 1320 break; 1321 case TGSI_TEXTURE_3D: 1322 // case TGSI_TEXTURE_CUBE_ARRAY: 1323 // case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 1324 case TGSI_TEXTURE_2D_ARRAY: 1325 case TGSI_TEXTURE_SHADOW2D_ARRAY: 1326 num_coords = 3; 1327 has_lod = 1; 1328 break; 1329 1330 case TGSI_TEXTURE_BUFFER: 1331 num_coords = 1; 1332 has_lod = 0; 1333 break; 1334 1335 case TGSI_TEXTURE_RECT: 1336 case TGSI_TEXTURE_SHADOWRECT: 1337 // case TGSI_TEXTURE_2D_MS: 1338 num_coords = 2; 1339 has_lod = 0; 1340 break; 1341 1342 // case TGSI_TEXTURE_2D_MS_ARRAY: 1343 // num_coords = 3; 1344 // has_lod = 0; 1345 // break; 1346 1347 default: 1348 assert(0); 1349 return; 1350 } 1351 1352 if (!bld->sampler) { 1353 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n"); 1354 for (i = 0; i < num_coords; i++) 1355 sizes_out[i] = bld->bld_base.base.undef; 1356 return; 1357 } 1358 1359 if (has_lod) 1360 explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 2 ); 1361 else 1362 explicit_lod = NULL; 1363 1364 bld->sampler->emit_size_query(bld->sampler, 1365 bld->bld_base.base.gallivm, 1366 bld->bld_base.int_bld.type, 1367 inst->Src[1].Register.Index, 1368 explicit_lod, 1369 sizes_out); 1370 } 1371 1372 static boolean 1373 near_end_of_shader(struct lp_build_tgsi_soa_context *bld, 1374 int pc) 1375 { 1376 int i; 1377 1378 for (i = 0; i < 5; i++) { 1379 unsigned opcode; 1380 1381 if (pc + i >= bld->bld_base.info->num_instructions) 1382 return TRUE; 1383 1384 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode; 1385 1386 if (opcode == TGSI_OPCODE_END) 1387 return TRUE; 1388 1389 if (opcode == TGSI_OPCODE_TEX || 1390 opcode == TGSI_OPCODE_TXP || 1391 opcode == TGSI_OPCODE_TXD || 1392 opcode == TGSI_OPCODE_TXB || 1393 opcode == TGSI_OPCODE_TXL || 1394 opcode == TGSI_OPCODE_TXF || 1395 opcode == TGSI_OPCODE_TXQ || 1396 opcode == TGSI_OPCODE_CAL || 1397 opcode == TGSI_OPCODE_CALLNZ || 1398 opcode == TGSI_OPCODE_IF || 1399 opcode == TGSI_OPCODE_IFC || 1400 opcode == TGSI_OPCODE_BGNLOOP || 1401 opcode == TGSI_OPCODE_SWITCH) 1402 return FALSE; 1403 } 1404 1405 return TRUE; 1406 } 1407 1408 1409 1410 /** 1411 * Kill fragment if any of the src register values are negative. 1412 */ 1413 static void 1414 emit_kil( 1415 struct lp_build_tgsi_soa_context *bld, 1416 const struct tgsi_full_instruction *inst, 1417 int pc) 1418 { 1419 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 1420 const struct tgsi_full_src_register *reg = &inst->Src[0]; 1421 LLVMValueRef terms[TGSI_NUM_CHANNELS]; 1422 LLVMValueRef mask; 1423 unsigned chan_index; 1424 1425 memset(&terms, 0, sizeof terms); 1426 1427 TGSI_FOR_EACH_CHANNEL( chan_index ) { 1428 unsigned swizzle; 1429 1430 /* Unswizzle channel */ 1431 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1432 1433 /* Check if the component has not been already tested. */ 1434 assert(swizzle < TGSI_NUM_CHANNELS); 1435 if( !terms[swizzle] ) 1436 /* TODO: change the comparison operator instead of setting the sign */ 1437 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index ); 1438 } 1439 1440 mask = NULL; 1441 TGSI_FOR_EACH_CHANNEL( chan_index ) { 1442 if(terms[chan_index]) { 1443 LLVMValueRef chan_mask; 1444 1445 /* 1446 * If term < 0 then mask = 0 else mask = ~0. 1447 */ 1448 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero); 1449 1450 if(mask) 1451 mask = LLVMBuildAnd(builder, mask, chan_mask, ""); 1452 else 1453 mask = chan_mask; 1454 } 1455 } 1456 1457 if(mask) { 1458 lp_build_mask_update(bld->mask, mask); 1459 1460 if (!near_end_of_shader(bld, pc)) 1461 lp_build_mask_check(bld->mask); 1462 } 1463 } 1464 1465 1466 /** 1467 * Predicated fragment kill. 1468 * XXX Actually, we do an unconditional kill (as in tgsi_exec.c). 1469 * The only predication is the execution mask which will apply if 1470 * we're inside a loop or conditional. 1471 */ 1472 static void 1473 emit_kilp(struct lp_build_tgsi_soa_context *bld, 1474 int pc) 1475 { 1476 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 1477 LLVMValueRef mask; 1478 1479 /* For those channels which are "alive", disable fragment shader 1480 * execution. 1481 */ 1482 if (bld->exec_mask.has_mask) { 1483 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp"); 1484 } 1485 else { 1486 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type); 1487 mask = zero; 1488 } 1489 1490 lp_build_mask_update(bld->mask, mask); 1491 1492 if (!near_end_of_shader(bld, pc)) 1493 lp_build_mask_check(bld->mask); 1494 } 1495 1496 1497 /** 1498 * Emit code which will dump the value of all the temporary registers 1499 * to stdout. 1500 */ 1501 static void 1502 emit_dump_temps(struct lp_build_tgsi_soa_context *bld) 1503 { 1504 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1505 LLVMBuilderRef builder = gallivm->builder; 1506 LLVMValueRef temp_ptr; 1507 LLVMValueRef i0 = lp_build_const_int32(gallivm, 0); 1508 LLVMValueRef i1 = lp_build_const_int32(gallivm, 1); 1509 LLVMValueRef i2 = lp_build_const_int32(gallivm, 2); 1510 LLVMValueRef i3 = lp_build_const_int32(gallivm, 3); 1511 int index; 1512 int n = bld->bld_base.info->file_max[TGSI_FILE_TEMPORARY]; 1513 1514 for (index = 0; index < n; index++) { 1515 LLVMValueRef idx = lp_build_const_int32(gallivm, index); 1516 LLVMValueRef v[4][4], res; 1517 int chan; 1518 1519 lp_build_printf(gallivm, "TEMP[%d]:\n", idx); 1520 1521 for (chan = 0; chan < 4; chan++) { 1522 temp_ptr = lp_get_temp_ptr_soa(bld, index, chan); 1523 res = LLVMBuildLoad(builder, temp_ptr, ""); 1524 v[chan][0] = LLVMBuildExtractElement(builder, res, i0, ""); 1525 v[chan][1] = LLVMBuildExtractElement(builder, res, i1, ""); 1526 v[chan][2] = LLVMBuildExtractElement(builder, res, i2, ""); 1527 v[chan][3] = LLVMBuildExtractElement(builder, res, i3, ""); 1528 } 1529 1530 lp_build_printf(gallivm, " X: %f %f %f %f\n", 1531 v[0][0], v[0][1], v[0][2], v[0][3]); 1532 lp_build_printf(gallivm, " Y: %f %f %f %f\n", 1533 v[1][0], v[1][1], v[1][2], v[1][3]); 1534 lp_build_printf(gallivm, " Z: %f %f %f %f\n", 1535 v[2][0], v[2][1], v[2][2], v[2][3]); 1536 lp_build_printf(gallivm, " W: %f %f %f %f\n", 1537 v[3][0], v[3][1], v[3][2], v[3][3]); 1538 } 1539 } 1540 1541 1542 1543 void 1544 lp_emit_declaration_soa( 1545 struct lp_build_tgsi_context *bld_base, 1546 const struct tgsi_full_declaration *decl) 1547 { 1548 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 1549 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 1550 LLVMTypeRef vec_type = bld->bld_base.base.vec_type; 1551 const unsigned first = decl->Range.First; 1552 const unsigned last = decl->Range.Last; 1553 unsigned idx, i; 1554 1555 for (idx = first; idx <= last; ++idx) { 1556 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]); 1557 switch (decl->Declaration.File) { 1558 case TGSI_FILE_TEMPORARY: 1559 assert(idx < LP_MAX_TGSI_TEMPS); 1560 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) { 1561 for (i = 0; i < TGSI_NUM_CHANNELS; i++) 1562 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp"); 1563 } 1564 break; 1565 1566 case TGSI_FILE_OUTPUT: 1567 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { 1568 for (i = 0; i < TGSI_NUM_CHANNELS; i++) 1569 bld->outputs[idx][i] = lp_build_alloca(gallivm, 1570 vec_type, "output"); 1571 } 1572 break; 1573 1574 case TGSI_FILE_ADDRESS: 1575 /* ADDR registers are the only allocated with an integer LLVM IR type, 1576 * as they are guaranteed to always have integers. 1577 * XXX: Not sure if this exception is worthwhile (or the whole idea of 1578 * an ADDR register for that matter). 1579 */ 1580 assert(idx < LP_MAX_TGSI_ADDRS); 1581 for (i = 0; i < TGSI_NUM_CHANNELS; i++) 1582 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr"); 1583 break; 1584 1585 case TGSI_FILE_PREDICATE: 1586 assert(idx < LP_MAX_TGSI_PREDS); 1587 for (i = 0; i < TGSI_NUM_CHANNELS; i++) 1588 bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type, 1589 "predicate"); 1590 break; 1591 1592 default: 1593 /* don't need to declare other vars */ 1594 break; 1595 } 1596 } 1597 } 1598 1599 1600 void lp_emit_immediate_soa( 1601 struct lp_build_tgsi_context *bld_base, 1602 const struct tgsi_full_immediate *imm) 1603 { 1604 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); 1605 struct gallivm_state * gallivm = bld_base->base.gallivm; 1606 1607 /* simply copy the immediate values into the next immediates[] slot */ 1608 unsigned i; 1609 const uint size = imm->Immediate.NrTokens - 1; 1610 assert(size <= 4); 1611 assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES); 1612 switch (imm->Immediate.DataType) { 1613 case TGSI_IMM_FLOAT32: 1614 for( i = 0; i < size; ++i ) 1615 bld->immediates[bld->num_immediates][i] = 1616 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float); 1617 1618 break; 1619 case TGSI_IMM_UINT32: 1620 for( i = 0; i < size; ++i ) { 1621 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint); 1622 bld->immediates[bld->num_immediates][i] = 1623 LLVMConstBitCast(tmp, bld_base->base.vec_type); 1624 } 1625 1626 break; 1627 case TGSI_IMM_INT32: 1628 for( i = 0; i < size; ++i ) { 1629 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int); 1630 bld->immediates[bld->num_immediates][i] = 1631 LLVMConstBitCast(tmp, bld_base->base.vec_type); 1632 } 1633 1634 break; 1635 } 1636 for( i = size; i < 4; ++i ) 1637 bld->immediates[bld->num_immediates][i] = bld_base->base.undef; 1638 1639 bld->num_immediates++; 1640 } 1641 1642 static void 1643 ddx_emit( 1644 const struct lp_build_tgsi_action * action, 1645 struct lp_build_tgsi_context * bld_base, 1646 struct lp_build_emit_data * emit_data) 1647 { 1648 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1649 1650 emit_fetch_deriv(bld, emit_data->args[0], NULL, 1651 &emit_data->output[emit_data->chan], NULL); 1652 } 1653 1654 static void 1655 ddy_emit( 1656 const struct lp_build_tgsi_action * action, 1657 struct lp_build_tgsi_context * bld_base, 1658 struct lp_build_emit_data * emit_data) 1659 { 1660 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1661 1662 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL, 1663 &emit_data->output[emit_data->chan]); 1664 } 1665 1666 static void 1667 kilp_emit( 1668 const struct lp_build_tgsi_action * action, 1669 struct lp_build_tgsi_context * bld_base, 1670 struct lp_build_emit_data * emit_data) 1671 { 1672 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1673 1674 emit_kilp(bld, bld_base->pc - 1); 1675 } 1676 1677 static void 1678 kil_emit( 1679 const struct lp_build_tgsi_action * action, 1680 struct lp_build_tgsi_context * bld_base, 1681 struct lp_build_emit_data * emit_data) 1682 { 1683 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1684 1685 emit_kil(bld, emit_data->inst, bld_base->pc - 1); 1686 } 1687 1688 static void 1689 tex_emit( 1690 const struct lp_build_tgsi_action * action, 1691 struct lp_build_tgsi_context * bld_base, 1692 struct lp_build_emit_data * emit_data) 1693 { 1694 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1695 1696 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, emit_data->output); 1697 } 1698 1699 static void 1700 txb_emit( 1701 const struct lp_build_tgsi_action * action, 1702 struct lp_build_tgsi_context * bld_base, 1703 struct lp_build_emit_data * emit_data) 1704 { 1705 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1706 1707 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, 1708 emit_data->output); 1709 } 1710 1711 static void 1712 txd_emit( 1713 const struct lp_build_tgsi_action * action, 1714 struct lp_build_tgsi_context * bld_base, 1715 struct lp_build_emit_data * emit_data) 1716 { 1717 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1718 1719 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, 1720 emit_data->output); 1721 } 1722 1723 static void 1724 txl_emit( 1725 const struct lp_build_tgsi_action * action, 1726 struct lp_build_tgsi_context * bld_base, 1727 struct lp_build_emit_data * emit_data) 1728 { 1729 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1730 1731 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, 1732 emit_data->output); 1733 } 1734 1735 static void 1736 txp_emit( 1737 const struct lp_build_tgsi_action * action, 1738 struct lp_build_tgsi_context * bld_base, 1739 struct lp_build_emit_data * emit_data) 1740 { 1741 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1742 1743 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED, 1744 emit_data->output); 1745 } 1746 1747 static void 1748 txq_emit( 1749 const struct lp_build_tgsi_action * action, 1750 struct lp_build_tgsi_context * bld_base, 1751 struct lp_build_emit_data * emit_data) 1752 { 1753 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1754 1755 emit_txq(bld, emit_data->inst, emit_data->output); 1756 } 1757 1758 static void 1759 cal_emit( 1760 const struct lp_build_tgsi_action * action, 1761 struct lp_build_tgsi_context * bld_base, 1762 struct lp_build_emit_data * emit_data) 1763 { 1764 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1765 1766 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label, 1767 &bld_base->pc); 1768 } 1769 1770 static void 1771 ret_emit( 1772 const struct lp_build_tgsi_action * action, 1773 struct lp_build_tgsi_context * bld_base, 1774 struct lp_build_emit_data * emit_data) 1775 { 1776 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1777 1778 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc); 1779 } 1780 1781 static void 1782 brk_emit( 1783 const struct lp_build_tgsi_action * action, 1784 struct lp_build_tgsi_context * bld_base, 1785 struct lp_build_emit_data * emit_data) 1786 { 1787 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1788 1789 lp_exec_break(&bld->exec_mask); 1790 } 1791 1792 static void 1793 if_emit( 1794 const struct lp_build_tgsi_action * action, 1795 struct lp_build_tgsi_context * bld_base, 1796 struct lp_build_emit_data * emit_data) 1797 { 1798 LLVMValueRef tmp; 1799 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1800 1801 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL, 1802 emit_data->args[0], bld->bld_base.base.zero); 1803 lp_exec_mask_cond_push(&bld->exec_mask, tmp); 1804 } 1805 1806 static void 1807 bgnloop_emit( 1808 const struct lp_build_tgsi_action * action, 1809 struct lp_build_tgsi_context * bld_base, 1810 struct lp_build_emit_data * emit_data) 1811 { 1812 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1813 1814 lp_exec_bgnloop(&bld->exec_mask); 1815 } 1816 1817 static void 1818 bgnsub_emit( 1819 const struct lp_build_tgsi_action * action, 1820 struct lp_build_tgsi_context * bld_base, 1821 struct lp_build_emit_data * emit_data) 1822 { 1823 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1824 1825 lp_exec_mask_bgnsub(&bld->exec_mask); 1826 } 1827 1828 static void 1829 else_emit( 1830 const struct lp_build_tgsi_action * action, 1831 struct lp_build_tgsi_context * bld_base, 1832 struct lp_build_emit_data * emit_data) 1833 { 1834 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1835 1836 lp_exec_mask_cond_invert(&bld->exec_mask); 1837 } 1838 1839 static void 1840 endif_emit( 1841 const struct lp_build_tgsi_action * action, 1842 struct lp_build_tgsi_context * bld_base, 1843 struct lp_build_emit_data * emit_data) 1844 { 1845 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1846 1847 lp_exec_mask_cond_pop(&bld->exec_mask); 1848 } 1849 1850 static void 1851 endloop_emit( 1852 const struct lp_build_tgsi_action * action, 1853 struct lp_build_tgsi_context * bld_base, 1854 struct lp_build_emit_data * emit_data) 1855 { 1856 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1857 1858 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask); 1859 } 1860 1861 static void 1862 endsub_emit( 1863 const struct lp_build_tgsi_action * action, 1864 struct lp_build_tgsi_context * bld_base, 1865 struct lp_build_emit_data * emit_data) 1866 { 1867 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1868 1869 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc); 1870 } 1871 1872 static void 1873 cont_emit( 1874 const struct lp_build_tgsi_action * action, 1875 struct lp_build_tgsi_context * bld_base, 1876 struct lp_build_emit_data * emit_data) 1877 { 1878 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1879 1880 lp_exec_continue(&bld->exec_mask); 1881 } 1882 1883 /* XXX: Refactor and move it to lp_bld_tgsi_action.c 1884 * 1885 * XXX: What do the comments about xmm registers mean? Maybe they are left over 1886 * from old code, but there is no garauntee that LLVM will use those registers 1887 * for this code. 1888 * 1889 * XXX: There should be no calls to lp_build_emit_fetch in this function. This 1890 * should be handled by the emit_data->fetch_args function. */ 1891 static void 1892 nrm_emit( 1893 const struct lp_build_tgsi_action * action, 1894 struct lp_build_tgsi_context * bld_base, 1895 struct lp_build_emit_data * emit_data) 1896 { 1897 LLVMValueRef tmp0, tmp1; 1898 LLVMValueRef tmp4 = NULL; 1899 LLVMValueRef tmp5 = NULL; 1900 LLVMValueRef tmp6 = NULL; 1901 LLVMValueRef tmp7 = NULL; 1902 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1903 1904 uint dims = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; 1905 1906 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) || 1907 TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y) || 1908 TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z) || 1909 (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 4)) { 1910 1911 /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */ 1912 1913 /* xmm4 = src.x */ 1914 /* xmm0 = src.x * src.x */ 1915 tmp0 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_X); 1916 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) { 1917 tmp4 = tmp0; 1918 } 1919 tmp0 = lp_build_mul( &bld->bld_base.base, tmp0, tmp0); 1920 1921 /* xmm5 = src.y */ 1922 /* xmm0 = xmm0 + src.y * src.y */ 1923 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Y); 1924 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) { 1925 tmp5 = tmp1; 1926 } 1927 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1); 1928 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1); 1929 1930 /* xmm6 = src.z */ 1931 /* xmm0 = xmm0 + src.z * src.z */ 1932 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Z); 1933 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) { 1934 tmp6 = tmp1; 1935 } 1936 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1); 1937 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1); 1938 1939 if (dims == 4) { 1940 /* xmm7 = src.w */ 1941 /* xmm0 = xmm0 + src.w * src.w */ 1942 tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_W); 1943 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W)) { 1944 tmp7 = tmp1; 1945 } 1946 tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1); 1947 tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1); 1948 } 1949 /* xmm1 = 1 / sqrt(xmm0) */ 1950 tmp1 = lp_build_rsqrt( &bld->bld_base.base, tmp0); 1951 /* dst.x = xmm1 * src.x */ 1952 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) { 1953 emit_data->output[TGSI_CHAN_X] = lp_build_mul( &bld->bld_base.base, tmp4, tmp1); 1954 } 1955 /* dst.y = xmm1 * src.y */ 1956 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) { 1957 emit_data->output[TGSI_CHAN_Y] = lp_build_mul( &bld->bld_base.base, tmp5, tmp1); 1958 } 1959 1960 /* dst.z = xmm1 * src.z */ 1961 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) { 1962 emit_data->output[TGSI_CHAN_Z] = lp_build_mul( &bld->bld_base.base, tmp6, tmp1); 1963 } 1964 /* dst.w = xmm1 * src.w */ 1965 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) && dims == 4) { 1966 emit_data->output[TGSI_CHAN_W] = lp_build_mul( &bld->bld_base.base, tmp7, tmp1); 1967 } 1968 } 1969 1970 /* dst.w = 1.0 */ 1971 if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 3) { 1972 emit_data->output[TGSI_CHAN_W] = bld->bld_base.base.one; 1973 } 1974 } 1975 1976 static void emit_prologue(struct lp_build_tgsi_context * bld_base) 1977 { 1978 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 1979 struct gallivm_state * gallivm = bld_base->base.gallivm; 1980 1981 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 1982 LLVMValueRef array_size = 1983 lp_build_const_int32(gallivm, 1984 bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4); 1985 bld->temps_array = lp_build_array_alloca(gallivm, 1986 bld_base->base.vec_type, array_size, 1987 "temp_array"); 1988 } 1989 1990 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { 1991 LLVMValueRef array_size = 1992 lp_build_const_int32(gallivm, 1993 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4); 1994 bld->outputs_array = lp_build_array_alloca(gallivm, 1995 bld_base->base.vec_type, array_size, 1996 "output_array"); 1997 } 1998 1999 /* If we have indirect addressing in inputs we need to copy them into 2000 * our alloca array to be able to iterate over them */ 2001 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { 2002 unsigned index, chan; 2003 LLVMTypeRef vec_type = bld_base->base.vec_type; 2004 LLVMValueRef array_size = lp_build_const_int32(gallivm, 2005 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4); 2006 bld->inputs_array = lp_build_array_alloca(gallivm, 2007 vec_type, array_size, 2008 "input_array"); 2009 2010 assert(bld_base->info->num_inputs 2011 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1); 2012 2013 for (index = 0; index < bld_base->info->num_inputs; ++index) { 2014 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { 2015 LLVMValueRef lindex = 2016 lp_build_const_int32(gallivm, index * 4 + chan); 2017 LLVMValueRef input_ptr = 2018 LLVMBuildGEP(gallivm->builder, bld->inputs_array, 2019 &lindex, 1, ""); 2020 LLVMValueRef value = bld->inputs[index][chan]; 2021 if (value) 2022 LLVMBuildStore(gallivm->builder, value, input_ptr); 2023 } 2024 } 2025 } 2026 } 2027 2028 static void emit_epilogue(struct lp_build_tgsi_context * bld_base) 2029 { 2030 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); 2031 2032 if (0) { 2033 /* for debugging */ 2034 emit_dump_temps(bld); 2035 } 2036 2037 /* If we have indirect addressing in outputs we need to copy our alloca array 2038 * to the outputs slots specified by the called */ 2039 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { 2040 unsigned index, chan; 2041 assert(bld_base->info->num_outputs <= 2042 bld_base->info->file_max[TGSI_FILE_OUTPUT] + 1); 2043 for (index = 0; index < bld_base->info->num_outputs; ++index) { 2044 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { 2045 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan); 2046 } 2047 } 2048 } 2049 } 2050 2051 void 2052 lp_build_tgsi_soa(struct gallivm_state *gallivm, 2053 const struct tgsi_token *tokens, 2054 struct lp_type type, 2055 struct lp_build_mask_context *mask, 2056 LLVMValueRef consts_ptr, 2057 const struct lp_bld_tgsi_system_values *system_values, 2058 const LLVMValueRef *pos, 2059 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS], 2060 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], 2061 struct lp_build_sampler_soa *sampler, 2062 const struct tgsi_shader_info *info) 2063 { 2064 struct lp_build_tgsi_soa_context bld; 2065 2066 struct lp_type res_type; 2067 2068 assert(type.length <= LP_MAX_VECTOR_LENGTH); 2069 memset(&res_type, 0, sizeof res_type); 2070 res_type.width = type.width; 2071 res_type.length = type.length; 2072 res_type.sign = 1; 2073 2074 /* Setup build context */ 2075 memset(&bld, 0, sizeof bld); 2076 lp_build_context_init(&bld.bld_base.base, gallivm, type); 2077 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type)); 2078 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type)); 2079 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type)); 2080 bld.mask = mask; 2081 bld.pos = pos; 2082 bld.inputs = inputs; 2083 bld.outputs = outputs; 2084 bld.consts_ptr = consts_ptr; 2085 bld.sampler = sampler; 2086 bld.bld_base.info = info; 2087 bld.indirect_files = info->indirect_files; 2088 2089 bld.bld_base.soa = TRUE; 2090 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant; 2091 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate; 2092 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input; 2093 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary; 2094 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value; 2095 bld.bld_base.emit_store = emit_store; 2096 2097 bld.bld_base.emit_declaration = lp_emit_declaration_soa; 2098 bld.bld_base.emit_immediate = lp_emit_immediate_soa; 2099 2100 bld.bld_base.emit_prologue = emit_prologue; 2101 bld.bld_base.emit_epilogue = emit_epilogue; 2102 2103 /* Set opcode actions */ 2104 lp_set_default_actions_cpu(&bld.bld_base); 2105 2106 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit; 2107 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit; 2108 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit; 2109 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit; 2110 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit; 2111 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit; 2112 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit; 2113 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit; 2114 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit; 2115 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit; 2116 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit; 2117 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit; 2118 bld.bld_base.op_actions[TGSI_OPCODE_KIL].emit = kil_emit; 2119 bld.bld_base.op_actions[TGSI_OPCODE_KILP].emit = kilp_emit; 2120 bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit; 2121 bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit; 2122 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit; 2123 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit; 2124 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit; 2125 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit; 2126 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit; 2127 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit; 2128 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit; 2129 2130 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base); 2131 2132 bld.system_values = *system_values; 2133 2134 lp_build_tgsi_llvm(&bld.bld_base, tokens); 2135 2136 if (0) { 2137 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder); 2138 LLVMValueRef function = LLVMGetBasicBlockParent(block); 2139 debug_printf("11111111111111111111111111111 \n"); 2140 tgsi_dump(tokens, 0); 2141 lp_debug_dump_value(function); 2142 debug_printf("2222222222222222222222222222 \n"); 2143 } 2144 2145 if (0) { 2146 LLVMModuleRef module = LLVMGetGlobalParent( 2147 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); 2148 LLVMDumpModule(module); 2149 2150 } 2151 } 2152