1 /************************************************************************** 2 * 3 * Copyright 2010 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 /** 29 * @file 30 * TGSI to LLVM IR translation -- AoS. 31 * 32 * FIXME: 33 * - No control flow support: the existing control flow code should be factored 34 * out into from the SoA code into a common module and shared. 35 * - No derivatives. Derivate logic should be pluggable, just like the samplers. 36 * 37 * @author Jose Fonseca <jfonseca (at) vmware.com> 38 */ 39 40 #include "pipe/p_config.h" 41 #include "pipe/p_shader_tokens.h" 42 #include "util/u_debug.h" 43 #include "util/u_math.h" 44 #include "util/u_memory.h" 45 #include "tgsi/tgsi_dump.h" 46 #include "tgsi/tgsi_info.h" 47 #include "tgsi/tgsi_parse.h" 48 #include "tgsi/tgsi_util.h" 49 #include "tgsi/tgsi_scan.h" 50 #include "lp_bld_type.h" 51 #include "lp_bld_const.h" 52 #include "lp_bld_arit.h" 53 #include "lp_bld_logic.h" 54 #include "lp_bld_swizzle.h" 55 #include "lp_bld_flow.h" 56 #include "lp_bld_quad.h" 57 #include "lp_bld_tgsi.h" 58 #include "lp_bld_debug.h" 59 #include "lp_bld_sample.h" 60 61 62 /** 63 * Wrapper around lp_build_swizzle_aos which translates swizzles to another 64 * ordering. 65 */ 66 static LLVMValueRef 67 swizzle_aos(struct lp_build_tgsi_context *bld_base, 68 LLVMValueRef a, 69 unsigned swizzle_x, 70 unsigned swizzle_y, 71 unsigned swizzle_z, 72 unsigned swizzle_w) 73 { 74 unsigned char swizzles[4]; 75 struct lp_build_tgsi_aos_context *bld = lp_aos_context(bld_base); 76 77 assert(swizzle_x < 4); 78 assert(swizzle_y < 4); 79 assert(swizzle_z < 4); 80 assert(swizzle_w < 4); 81 82 swizzles[bld->inv_swizzles[0]] = bld->swizzles[swizzle_x]; 83 swizzles[bld->inv_swizzles[1]] = bld->swizzles[swizzle_y]; 84 swizzles[bld->inv_swizzles[2]] = bld->swizzles[swizzle_z]; 85 swizzles[bld->inv_swizzles[3]] = bld->swizzles[swizzle_w]; 86 87 return lp_build_swizzle_aos(&bld->bld_base.base, a, swizzles); 88 } 89 90 91 static LLVMValueRef 92 swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld, 93 LLVMValueRef a, 94 unsigned chan) 95 { 96 chan = bld->swizzles[chan]; 97 return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan, 4); 98 } 99 100 101 static LLVMValueRef 102 emit_fetch_constant( 103 struct lp_build_tgsi_context * bld_base, 104 const struct tgsi_full_src_register * reg, 105 enum tgsi_opcode_type stype, 106 unsigned swizzle) 107 { 108 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base); 109 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 110 struct lp_type type = bld_base->base.type; 111 LLVMValueRef res; 112 unsigned chan; 113 114 assert(!reg->Register.Indirect); 115 116 /* 117 * Get the constants components 118 */ 119 120 res = bld->bld_base.base.undef; 121 for (chan = 0; chan < 4; ++chan) { 122 LLVMValueRef index; 123 LLVMValueRef scalar_ptr; 124 LLVMValueRef scalar; 125 LLVMValueRef swizzle; 126 127 index = lp_build_const_int32(bld->bld_base.base.gallivm, 128 reg->Register.Index * 4 + chan); 129 130 scalar_ptr = LLVMBuildGEP(builder, bld->consts_ptr, &index, 1, ""); 131 132 scalar = LLVMBuildLoad(builder, scalar_ptr, ""); 133 134 lp_build_name(scalar, "const[%u].%c", reg->Register.Index, "xyzw"[chan]); 135 136 /* 137 * NOTE: constants array is always assumed to be RGBA 138 */ 139 140 swizzle = lp_build_const_int32(bld->bld_base.base.gallivm, 141 bld->swizzles[chan]); 142 143 res = LLVMBuildInsertElement(builder, res, scalar, swizzle, ""); 144 } 145 146 /* 147 * Broadcast the first quaternion to all others. 148 * 149 * XXX: could be factored into a reusable function. 150 */ 151 152 if (type.length > 4) { 153 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 154 unsigned i; 155 156 for (chan = 0; chan < 4; ++chan) { 157 shuffles[chan] = lp_build_const_int32(bld->bld_base.base.gallivm, chan); 158 } 159 160 for (i = 4; i < type.length; ++i) { 161 shuffles[i] = shuffles[i % 4]; 162 } 163 164 res = LLVMBuildShuffleVector(builder, 165 res, bld->bld_base.base.undef, 166 LLVMConstVector(shuffles, type.length), 167 ""); 168 } 169 return res; 170 } 171 172 static LLVMValueRef 173 emit_fetch_immediate( 174 struct lp_build_tgsi_context * bld_base, 175 const struct tgsi_full_src_register * reg, 176 enum tgsi_opcode_type stype, 177 unsigned swizzle) 178 { 179 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base); 180 LLVMValueRef res = bld->immediates[reg->Register.Index]; 181 assert(res); 182 return res; 183 } 184 185 static LLVMValueRef 186 emit_fetch_input( 187 struct lp_build_tgsi_context * bld_base, 188 const struct tgsi_full_src_register * reg, 189 enum tgsi_opcode_type stype, 190 unsigned swizzle) 191 { 192 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base); 193 LLVMValueRef res = bld->inputs[reg->Register.Index]; 194 assert(!reg->Register.Indirect); 195 assert(res); 196 return res; 197 } 198 199 static LLVMValueRef 200 emit_fetch_temporary( 201 struct lp_build_tgsi_context * bld_base, 202 const struct tgsi_full_src_register * reg, 203 enum tgsi_opcode_type stype, 204 unsigned swizzle) 205 { 206 struct lp_build_tgsi_aos_context * bld = lp_aos_context(bld_base); 207 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 208 LLVMValueRef temp_ptr = bld->temps[reg->Register.Index]; 209 LLVMValueRef res = LLVMBuildLoad(builder, temp_ptr, ""); 210 assert(!reg->Register.Indirect); 211 if (!res) 212 return bld->bld_base.base.undef; 213 214 return res; 215 } 216 217 /** 218 * Register store. 219 */ 220 void 221 lp_emit_store_aos( 222 struct lp_build_tgsi_aos_context *bld, 223 const struct tgsi_full_instruction *inst, 224 unsigned index, 225 LLVMValueRef value) 226 { 227 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; 228 const struct tgsi_full_dst_register *reg = &inst->Dst[index]; 229 LLVMValueRef mask = NULL; 230 LLVMValueRef ptr; 231 232 /* 233 * Saturate the value 234 */ 235 if (inst->Instruction.Saturate) { 236 value = lp_build_max(&bld->bld_base.base, value, bld->bld_base.base.zero); 237 value = lp_build_min(&bld->bld_base.base, value, bld->bld_base.base.one); 238 } 239 240 /* 241 * Translate the register file 242 */ 243 244 assert(!reg->Register.Indirect); 245 246 switch (reg->Register.File) { 247 case TGSI_FILE_OUTPUT: 248 ptr = bld->outputs[reg->Register.Index]; 249 break; 250 251 case TGSI_FILE_TEMPORARY: 252 ptr = bld->temps[reg->Register.Index]; 253 break; 254 255 case TGSI_FILE_ADDRESS: 256 ptr = bld->addr[reg->Indirect.Index]; 257 break; 258 259 case TGSI_FILE_PREDICATE: 260 ptr = bld->preds[reg->Register.Index]; 261 break; 262 263 default: 264 assert(0); 265 return; 266 } 267 268 if (!ptr) 269 return; 270 /* 271 * Predicate 272 */ 273 274 if (inst->Instruction.Predicate) { 275 LLVMValueRef pred; 276 277 assert(inst->Predicate.Index < LP_MAX_TGSI_PREDS); 278 279 pred = LLVMBuildLoad(builder, 280 bld->preds[inst->Predicate.Index], ""); 281 282 /* 283 * Convert the value to an integer mask. 284 */ 285 pred = lp_build_compare(bld->bld_base.base.gallivm, 286 bld->bld_base.base.type, 287 PIPE_FUNC_NOTEQUAL, 288 pred, 289 bld->bld_base.base.zero); 290 291 if (inst->Predicate.Negate) { 292 pred = LLVMBuildNot(builder, pred, ""); 293 } 294 295 pred = bld->bld_base.emit_swizzle(&bld->bld_base, pred, 296 inst->Predicate.SwizzleX, 297 inst->Predicate.SwizzleY, 298 inst->Predicate.SwizzleZ, 299 inst->Predicate.SwizzleW); 300 301 if (mask) { 302 mask = LLVMBuildAnd(builder, mask, pred, ""); 303 } else { 304 mask = pred; 305 } 306 } 307 308 /* 309 * Writemask 310 */ 311 312 if (reg->Register.WriteMask != TGSI_WRITEMASK_XYZW) { 313 LLVMValueRef writemask; 314 315 writemask = lp_build_const_mask_aos_swizzled(bld->bld_base.base.gallivm, 316 bld->bld_base.base.type, 317 reg->Register.WriteMask, 318 TGSI_NUM_CHANNELS, 319 bld->swizzles); 320 321 if (mask) { 322 mask = LLVMBuildAnd(builder, mask, writemask, ""); 323 } else { 324 mask = writemask; 325 } 326 } 327 328 if (mask) { 329 LLVMValueRef orig_value; 330 331 orig_value = LLVMBuildLoad(builder, ptr, ""); 332 value = lp_build_select(&bld->bld_base.base, 333 mask, value, orig_value); 334 } 335 336 LLVMBuildStore(builder, value, ptr); 337 } 338 339 340 /** 341 * High-level instruction translators. 342 */ 343 344 static LLVMValueRef 345 emit_tex(struct lp_build_tgsi_aos_context *bld, 346 const struct tgsi_full_instruction *inst, 347 enum lp_build_tex_modifier modifier) 348 { 349 unsigned target; 350 unsigned unit; 351 LLVMValueRef coords; 352 struct lp_derivatives derivs = { {NULL}, {NULL} }; 353 354 if (!bld->sampler) { 355 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 356 return bld->bld_base.base.undef; 357 } 358 359 target = inst->Texture.Texture; 360 361 coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL); 362 363 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { 364 /* probably not going to work */ 365 derivs.ddx[0] = lp_build_emit_fetch( &bld->bld_base, inst, 1 , LP_CHAN_ALL); 366 derivs.ddy[0] = lp_build_emit_fetch( &bld->bld_base, inst, 2 , LP_CHAN_ALL); 367 unit = inst->Src[3].Register.Index; 368 } 369 else { 370 unit = inst->Src[1].Register.Index; 371 } 372 return bld->sampler->emit_fetch_texel(bld->sampler, 373 &bld->bld_base.base, 374 target, unit, 375 coords, derivs, 376 modifier); 377 } 378 379 380 static LLVMValueRef 381 emit_sample(struct lp_build_tgsi_aos_context *bld, 382 const struct tgsi_full_instruction *inst, 383 enum lp_build_tex_modifier modifier) 384 { 385 unsigned target; 386 unsigned unit; 387 LLVMValueRef coords; 388 struct lp_derivatives derivs = { {NULL}, {NULL} }; 389 390 if (!bld->sampler) { 391 _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); 392 return bld->bld_base.base.undef; 393 } 394 395 coords = lp_build_emit_fetch( &bld->bld_base, inst, 0 , LP_CHAN_ALL); 396 397 /* ignore modifiers, can't handle different sampler / sampler view, etc... */ 398 unit = inst->Src[1].Register.Index; 399 assert(inst->Src[2].Register.Index == unit); 400 401 target = bld->sv[unit].Resource; 402 403 return bld->sampler->emit_fetch_texel(bld->sampler, 404 &bld->bld_base.base, 405 target, unit, 406 coords, derivs, 407 modifier); 408 } 409 410 411 void 412 lp_emit_declaration_aos( 413 struct lp_build_tgsi_aos_context *bld, 414 const struct tgsi_full_declaration *decl) 415 { 416 struct gallivm_state *gallivm = bld->bld_base.base.gallivm; 417 LLVMTypeRef vec_type = lp_build_vec_type(bld->bld_base.base.gallivm, bld->bld_base.base.type); 418 419 unsigned first = decl->Range.First; 420 unsigned last = decl->Range.Last; 421 unsigned idx; 422 423 for (idx = first; idx <= last; ++idx) { 424 switch (decl->Declaration.File) { 425 case TGSI_FILE_TEMPORARY: 426 assert(idx < LP_MAX_INLINED_TEMPS); 427 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 428 LLVMValueRef array_size = lp_build_const_int32(gallivm, last + 1); 429 bld->temps_array = lp_build_array_alloca(bld->bld_base.base.gallivm, 430 vec_type, array_size, ""); 431 } else { 432 bld->temps[idx] = lp_build_alloca(gallivm, vec_type, ""); 433 } 434 break; 435 436 case TGSI_FILE_OUTPUT: 437 bld->outputs[idx] = lp_build_alloca(gallivm, vec_type, ""); 438 break; 439 440 case TGSI_FILE_ADDRESS: 441 assert(idx < LP_MAX_TGSI_ADDRS); 442 bld->addr[idx] = lp_build_alloca(gallivm, vec_type, ""); 443 break; 444 445 case TGSI_FILE_PREDICATE: 446 assert(idx < LP_MAX_TGSI_PREDS); 447 bld->preds[idx] = lp_build_alloca(gallivm, vec_type, ""); 448 break; 449 450 case TGSI_FILE_SAMPLER_VIEW: 451 /* 452 * The target stored here MUST match whatever there actually 453 * is in the set sampler views (what about return type?). 454 */ 455 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS); 456 for (idx = first; idx <= last; ++idx) { 457 bld->sv[idx] = decl->SamplerView; 458 } 459 break; 460 461 default: 462 /* don't need to declare other vars */ 463 break; 464 } 465 } 466 } 467 468 469 /** 470 * Emit LLVM for one TGSI instruction. 471 * \param return TRUE for success, FALSE otherwise 472 */ 473 boolean 474 lp_emit_instruction_aos( 475 struct lp_build_tgsi_aos_context *bld, 476 const struct tgsi_full_instruction *inst, 477 const struct tgsi_opcode_info *info, 478 int *pc) 479 { 480 LLVMValueRef src0, src1, src2; 481 LLVMValueRef tmp0; 482 LLVMValueRef dst0 = NULL; 483 484 /* 485 * Stores and write masks are handled in a general fashion after the long 486 * instruction opcode switch statement. 487 * 488 * Although not stricitly necessary, we avoid generating instructions for 489 * channels which won't be stored, in cases where's that easy. For some 490 * complex instructions, like texture sampling, it is more convenient to 491 * assume a full writemask and then let LLVM optimization passes eliminate 492 * redundant code. 493 */ 494 495 (*pc)++; 496 497 assert(info->num_dst <= 1); 498 if (info->num_dst) { 499 dst0 = bld->bld_base.base.undef; 500 } 501 502 switch (inst->Instruction.Opcode) { 503 case TGSI_OPCODE_ARL: 504 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 505 dst0 = lp_build_floor(&bld->bld_base.base, src0); 506 break; 507 508 case TGSI_OPCODE_MOV: 509 dst0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 510 break; 511 512 case TGSI_OPCODE_LIT: 513 return FALSE; 514 515 case TGSI_OPCODE_RCP: 516 /* TGSI_OPCODE_RECIP */ 517 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 518 dst0 = lp_build_rcp(&bld->bld_base.base, src0); 519 break; 520 521 case TGSI_OPCODE_RSQ: 522 /* TGSI_OPCODE_RECIPSQRT */ 523 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 524 tmp0 = lp_build_abs(&bld->bld_base.base, src0); 525 dst0 = lp_build_rsqrt(&bld->bld_base.base, tmp0); 526 break; 527 528 case TGSI_OPCODE_EXP: 529 return FALSE; 530 531 case TGSI_OPCODE_LOG: 532 return FALSE; 533 534 case TGSI_OPCODE_MUL: 535 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 536 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 537 dst0 = lp_build_mul(&bld->bld_base.base, src0, src1); 538 break; 539 540 case TGSI_OPCODE_ADD: 541 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 542 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 543 dst0 = lp_build_add(&bld->bld_base.base, src0, src1); 544 break; 545 546 case TGSI_OPCODE_DP3: 547 /* TGSI_OPCODE_DOT3 */ 548 return FALSE; 549 550 case TGSI_OPCODE_DP4: 551 /* TGSI_OPCODE_DOT4 */ 552 return FALSE; 553 554 case TGSI_OPCODE_DST: 555 return FALSE; 556 557 case TGSI_OPCODE_MIN: 558 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 559 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 560 dst0 = lp_build_min(&bld->bld_base.base, src0, src1); 561 break; 562 563 case TGSI_OPCODE_MAX: 564 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 565 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 566 dst0 = lp_build_max(&bld->bld_base.base, src0, src1); 567 break; 568 569 case TGSI_OPCODE_SLT: 570 /* TGSI_OPCODE_SETLT */ 571 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 572 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 573 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, src1); 574 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); 575 break; 576 577 case TGSI_OPCODE_SGE: 578 /* TGSI_OPCODE_SETGE */ 579 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 580 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 581 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, src0, src1); 582 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); 583 break; 584 585 case TGSI_OPCODE_MAD: 586 /* TGSI_OPCODE_MADD */ 587 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 588 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 589 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); 590 tmp0 = lp_build_mul(&bld->bld_base.base, src0, src1); 591 dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2); 592 break; 593 594 case TGSI_OPCODE_LRP: 595 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 596 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 597 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); 598 tmp0 = lp_build_sub(&bld->bld_base.base, src1, src2); 599 tmp0 = lp_build_mul(&bld->bld_base.base, src0, tmp0); 600 dst0 = lp_build_add(&bld->bld_base.base, tmp0, src2); 601 break; 602 603 case TGSI_OPCODE_DP2A: 604 return FALSE; 605 606 case TGSI_OPCODE_FRC: 607 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 608 tmp0 = lp_build_floor(&bld->bld_base.base, src0); 609 dst0 = lp_build_sub(&bld->bld_base.base, src0, tmp0); 610 break; 611 612 case TGSI_OPCODE_CLAMP: 613 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 614 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 615 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); 616 tmp0 = lp_build_max(&bld->bld_base.base, src0, src1); 617 dst0 = lp_build_min(&bld->bld_base.base, tmp0, src2); 618 break; 619 620 case TGSI_OPCODE_FLR: 621 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 622 dst0 = lp_build_floor(&bld->bld_base.base, src0); 623 break; 624 625 case TGSI_OPCODE_ROUND: 626 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 627 dst0 = lp_build_round(&bld->bld_base.base, src0); 628 break; 629 630 case TGSI_OPCODE_EX2: 631 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 632 tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X, TGSI_NUM_CHANNELS); 633 dst0 = lp_build_exp2(&bld->bld_base.base, tmp0); 634 break; 635 636 case TGSI_OPCODE_LG2: 637 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 638 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); 639 dst0 = lp_build_log2(&bld->bld_base.base, tmp0); 640 break; 641 642 case TGSI_OPCODE_POW: 643 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 644 src0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); 645 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 646 src1 = swizzle_scalar_aos(bld, src1, TGSI_SWIZZLE_X); 647 dst0 = lp_build_pow(&bld->bld_base.base, src0, src1); 648 break; 649 650 case TGSI_OPCODE_XPD: 651 return FALSE; 652 653 case TGSI_OPCODE_DPH: 654 return FALSE; 655 656 case TGSI_OPCODE_COS: 657 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 658 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); 659 dst0 = lp_build_cos(&bld->bld_base.base, tmp0); 660 break; 661 662 case TGSI_OPCODE_DDX: 663 return FALSE; 664 665 case TGSI_OPCODE_DDY: 666 return FALSE; 667 668 case TGSI_OPCODE_KILL: 669 return FALSE; 670 671 case TGSI_OPCODE_KILL_IF: 672 return FALSE; 673 674 case TGSI_OPCODE_PK2H: 675 return FALSE; 676 break; 677 678 case TGSI_OPCODE_PK2US: 679 return FALSE; 680 break; 681 682 case TGSI_OPCODE_PK4B: 683 return FALSE; 684 break; 685 686 case TGSI_OPCODE_PK4UB: 687 return FALSE; 688 689 case TGSI_OPCODE_SEQ: 690 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 691 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 692 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_EQUAL, src0, src1); 693 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); 694 break; 695 696 case TGSI_OPCODE_SGT: 697 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 698 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 699 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GREATER, src0, src1); 700 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); 701 break; 702 703 case TGSI_OPCODE_SIN: 704 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 705 tmp0 = swizzle_scalar_aos(bld, src0, TGSI_SWIZZLE_X); 706 dst0 = lp_build_sin(&bld->bld_base.base, tmp0); 707 break; 708 709 case TGSI_OPCODE_SLE: 710 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 711 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 712 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LEQUAL, src0, src1); 713 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); 714 break; 715 716 case TGSI_OPCODE_SNE: 717 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 718 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 719 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_NOTEQUAL, src0, src1); 720 dst0 = lp_build_select(&bld->bld_base.base, tmp0, bld->bld_base.base.one, bld->bld_base.base.zero); 721 break; 722 723 case TGSI_OPCODE_TEX: 724 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE); 725 break; 726 727 case TGSI_OPCODE_TXD: 728 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV); 729 break; 730 731 case TGSI_OPCODE_UP2H: 732 /* deprecated */ 733 assert (0); 734 return FALSE; 735 break; 736 737 case TGSI_OPCODE_UP2US: 738 /* deprecated */ 739 assert(0); 740 return FALSE; 741 break; 742 743 case TGSI_OPCODE_UP4B: 744 /* deprecated */ 745 assert(0); 746 return FALSE; 747 break; 748 749 case TGSI_OPCODE_UP4UB: 750 /* deprecated */ 751 assert(0); 752 return FALSE; 753 break; 754 755 case TGSI_OPCODE_ARR: 756 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 757 dst0 = lp_build_round(&bld->bld_base.base, src0); 758 break; 759 760 case TGSI_OPCODE_CAL: 761 return FALSE; 762 763 case TGSI_OPCODE_RET: 764 /* safe to ignore at end */ 765 break; 766 767 case TGSI_OPCODE_END: 768 *pc = -1; 769 break; 770 771 case TGSI_OPCODE_SSG: 772 /* TGSI_OPCODE_SGN */ 773 tmp0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 774 dst0 = lp_build_sgn(&bld->bld_base.base, tmp0); 775 break; 776 777 case TGSI_OPCODE_CMP: 778 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 779 src1 = lp_build_emit_fetch(&bld->bld_base, inst, 1, LP_CHAN_ALL); 780 src2 = lp_build_emit_fetch(&bld->bld_base, inst, 2, LP_CHAN_ALL); 781 tmp0 = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_LESS, src0, bld->bld_base.base.zero); 782 dst0 = lp_build_select(&bld->bld_base.base, tmp0, src1, src2); 783 break; 784 785 case TGSI_OPCODE_SCS: 786 return FALSE; 787 788 case TGSI_OPCODE_TXB: 789 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS); 790 break; 791 792 case TGSI_OPCODE_DIV: 793 assert(0); 794 return FALSE; 795 break; 796 797 case TGSI_OPCODE_DP2: 798 return FALSE; 799 800 case TGSI_OPCODE_TXL: 801 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD); 802 break; 803 804 case TGSI_OPCODE_TXP: 805 dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_PROJECTED); 806 break; 807 808 case TGSI_OPCODE_BRK: 809 return FALSE; 810 811 case TGSI_OPCODE_IF: 812 case TGSI_OPCODE_UIF: 813 return FALSE; 814 815 case TGSI_OPCODE_BGNLOOP: 816 return FALSE; 817 818 case TGSI_OPCODE_BGNSUB: 819 return FALSE; 820 821 case TGSI_OPCODE_ELSE: 822 return FALSE; 823 824 case TGSI_OPCODE_ENDIF: 825 return FALSE; 826 827 case TGSI_OPCODE_ENDLOOP: 828 return FALSE; 829 830 case TGSI_OPCODE_ENDSUB: 831 return FALSE; 832 833 case TGSI_OPCODE_PUSHA: 834 /* deprecated? */ 835 assert(0); 836 return FALSE; 837 break; 838 839 case TGSI_OPCODE_POPA: 840 /* deprecated? */ 841 assert(0); 842 return FALSE; 843 break; 844 845 case TGSI_OPCODE_CEIL: 846 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 847 dst0 = lp_build_ceil(&bld->bld_base.base, src0); 848 break; 849 850 case TGSI_OPCODE_I2F: 851 assert(0); 852 return FALSE; 853 break; 854 855 case TGSI_OPCODE_NOT: 856 assert(0); 857 return FALSE; 858 break; 859 860 case TGSI_OPCODE_TRUNC: 861 src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); 862 dst0 = lp_build_trunc(&bld->bld_base.base, src0); 863 break; 864 865 case TGSI_OPCODE_SHL: 866 assert(0); 867 return FALSE; 868 break; 869 870 case TGSI_OPCODE_ISHR: 871 assert(0); 872 return FALSE; 873 break; 874 875 case TGSI_OPCODE_AND: 876 assert(0); 877 return FALSE; 878 break; 879 880 case TGSI_OPCODE_OR: 881 assert(0); 882 return FALSE; 883 break; 884 885 case TGSI_OPCODE_MOD: 886 assert(0); 887 return FALSE; 888 break; 889 890 case TGSI_OPCODE_XOR: 891 assert(0); 892 return FALSE; 893 break; 894 895 case TGSI_OPCODE_SAD: 896 assert(0); 897 return FALSE; 898 break; 899 900 case TGSI_OPCODE_TXF: 901 assert(0); 902 return FALSE; 903 break; 904 905 case TGSI_OPCODE_TXQ: 906 assert(0); 907 return FALSE; 908 break; 909 910 case TGSI_OPCODE_CONT: 911 return FALSE; 912 913 case TGSI_OPCODE_EMIT: 914 return FALSE; 915 break; 916 917 case TGSI_OPCODE_ENDPRIM: 918 return FALSE; 919 break; 920 921 case TGSI_OPCODE_NOP: 922 break; 923 924 case TGSI_OPCODE_SAMPLE: 925 dst0 = emit_sample(bld, inst, LP_BLD_TEX_MODIFIER_NONE); 926 break; 927 928 default: 929 return FALSE; 930 } 931 932 if (info->num_dst) { 933 lp_emit_store_aos(bld, inst, 0, dst0); 934 } 935 936 return TRUE; 937 } 938 939 940 void 941 lp_build_tgsi_aos(struct gallivm_state *gallivm, 942 const struct tgsi_token *tokens, 943 struct lp_type type, 944 const unsigned char swizzles[4], 945 LLVMValueRef consts_ptr, 946 const LLVMValueRef *inputs, 947 LLVMValueRef *outputs, 948 struct lp_build_sampler_aos *sampler, 949 const struct tgsi_shader_info *info) 950 { 951 struct lp_build_tgsi_aos_context bld; 952 struct tgsi_parse_context parse; 953 uint num_immediates = 0; 954 unsigned chan; 955 int pc = 0; 956 957 /* Setup build context */ 958 memset(&bld, 0, sizeof bld); 959 lp_build_context_init(&bld.bld_base.base, gallivm, type); 960 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type)); 961 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type)); 962 lp_build_context_init(&bld.int_bld, gallivm, lp_int_type(type)); 963 964 for (chan = 0; chan < 4; ++chan) { 965 bld.swizzles[chan] = swizzles[chan]; 966 bld.inv_swizzles[swizzles[chan]] = chan; 967 } 968 969 bld.inputs = inputs; 970 bld.outputs = outputs; 971 bld.consts_ptr = consts_ptr; 972 bld.sampler = sampler; 973 bld.indirect_files = info->indirect_files; 974 bld.bld_base.emit_swizzle = swizzle_aos; 975 bld.bld_base.info = info; 976 977 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant; 978 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate; 979 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input; 980 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary; 981 982 /* Set opcode actions */ 983 lp_set_default_actions_cpu(&bld.bld_base); 984 985 if (!lp_bld_tgsi_list_init(&bld.bld_base)) { 986 return; 987 } 988 989 tgsi_parse_init(&parse, tokens); 990 991 while (!tgsi_parse_end_of_tokens(&parse)) { 992 tgsi_parse_token(&parse); 993 994 switch(parse.FullToken.Token.Type) { 995 case TGSI_TOKEN_TYPE_DECLARATION: 996 /* Inputs already interpolated */ 997 lp_emit_declaration_aos(&bld, &parse.FullToken.FullDeclaration); 998 break; 999 1000 case TGSI_TOKEN_TYPE_INSTRUCTION: 1001 /* save expanded instruction */ 1002 lp_bld_tgsi_add_instruction(&bld.bld_base, 1003 &parse.FullToken.FullInstruction); 1004 break; 1005 1006 case TGSI_TOKEN_TYPE_IMMEDIATE: 1007 /* simply copy the immediate values into the next immediates[] slot */ 1008 { 1009 const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 1010 float imm[4]; 1011 assert(size <= 4); 1012 assert(num_immediates < LP_MAX_INLINED_IMMEDIATES); 1013 for (chan = 0; chan < 4; ++chan) { 1014 imm[chan] = 0.0f; 1015 } 1016 for (chan = 0; chan < size; ++chan) { 1017 unsigned swizzle = bld.swizzles[chan]; 1018 imm[swizzle] = parse.FullToken.FullImmediate.u[chan].Float; 1019 } 1020 bld.immediates[num_immediates] = 1021 lp_build_const_aos(gallivm, type, 1022 imm[0], imm[1], imm[2], imm[3], 1023 NULL); 1024 num_immediates++; 1025 } 1026 break; 1027 1028 case TGSI_TOKEN_TYPE_PROPERTY: 1029 break; 1030 1031 default: 1032 assert(0); 1033 } 1034 } 1035 1036 while (pc != -1) { 1037 struct tgsi_full_instruction *instr = bld.bld_base.instructions + pc; 1038 const struct tgsi_opcode_info *opcode_info = 1039 tgsi_get_opcode_info(instr->Instruction.Opcode); 1040 if (!lp_emit_instruction_aos(&bld, instr, opcode_info, &pc)) 1041 _debug_printf("warning: failed to translate tgsi opcode %s to LLVM\n", 1042 opcode_info->mnemonic); 1043 } 1044 1045 if (0) { 1046 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder); 1047 LLVMValueRef function = LLVMGetBasicBlockParent(block); 1048 debug_printf("11111111111111111111111111111 \n"); 1049 tgsi_dump(tokens, 0); 1050 lp_debug_dump_value(function); 1051 debug_printf("2222222222222222222222222222 \n"); 1052 } 1053 tgsi_parse_free(&parse); 1054 FREE(bld.bld_base.instructions); 1055 1056 if (0) { 1057 LLVMModuleRef module = LLVMGetGlobalParent( 1058 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); 1059 LLVMDumpModule(module); 1060 } 1061 1062 } 1063 1064