1 /************************************************************************** 2 * 3 * Copyright 2011-2012 Advanced Micro Devices, Inc. 4 * Copyright 2009 VMware, Inc. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29 /** 30 * @file 31 * TGSI to LLVM IR translation. 32 * 33 * @author Jose Fonseca <jfonseca (at) vmware.com> 34 * @author Tom Stellard <thomas.stellard (at) amd.com> 35 */ 36 37 #ifndef LP_BLD_TGSI_H 38 #define LP_BLD_TGSI_H 39 40 #include "gallivm/lp_bld.h" 41 #include "gallivm/lp_bld_tgsi_action.h" 42 #include "gallivm/lp_bld_limits.h" 43 #include "gallivm/lp_bld_sample.h" 44 #include "lp_bld_type.h" 45 #include "pipe/p_compiler.h" 46 #include "pipe/p_state.h" 47 #include "tgsi/tgsi_exec.h" 48 #include "tgsi/tgsi_scan.h" 49 #include "tgsi/tgsi_info.h" 50 51 #ifdef __cplusplus 52 extern "C" { 53 #endif 54 55 #define LP_CHAN_ALL ~0u 56 57 #define LP_MAX_INSTRUCTIONS 256 58 59 struct tgsi_full_declaration; 60 struct tgsi_full_immediate; 61 struct tgsi_full_instruction; 62 struct tgsi_full_src_register; 63 struct tgsi_opcode_info; 64 struct tgsi_token; 65 struct tgsi_shader_info; 66 struct lp_build_mask_context; 67 struct gallivm_state; 68 struct lp_derivatives; 69 struct lp_build_tgsi_gs_iface; 70 71 72 enum lp_build_tex_modifier { 73 LP_BLD_TEX_MODIFIER_NONE = 0, 74 LP_BLD_TEX_MODIFIER_PROJECTED, 75 LP_BLD_TEX_MODIFIER_LOD_BIAS, 76 LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, 77 LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, 78 LP_BLD_TEX_MODIFIER_LOD_ZERO 79 }; 80 81 82 /** 83 * Describe a channel of a register. 84 * 85 * The value can be a: 86 * - immediate value (i.e. derived from a IMM register) 87 * - CONST[n].x/y/z/w 88 * - IN[n].x/y/z/w 89 * - undetermined (when .file == TGSI_FILE_NULL) 90 * 91 * This is one of the analysis results, and is used to described 92 * the output color in terms of inputs. 93 */ 94 struct lp_tgsi_channel_info 95 { 96 unsigned file:4; /* TGSI_FILE_* */ 97 unsigned swizzle:3; /* PIPE_SWIZZLE_x */ 98 union { 99 uint32_t index; 100 float value; /* for TGSI_FILE_IMMEDIATE */ 101 } u; 102 }; 103 104 105 /** 106 * Describe a texture sampler interpolator. 107 * 108 * The interpolation is described in terms of regular inputs. 109 */ 110 struct lp_tgsi_texture_info 111 { 112 struct lp_tgsi_channel_info coord[4]; 113 unsigned target:8; /* TGSI_TEXTURE_* */ 114 unsigned sampler_unit:8; /* Sampler unit */ 115 unsigned texture_unit:8; /* Texture unit */ 116 unsigned modifier:8; /* LP_BLD_TEX_MODIFIER_* */ 117 }; 118 119 120 struct lp_tgsi_info 121 { 122 struct tgsi_shader_info base; 123 124 /* 125 * Whether any of the texture opcodes access a register file other than 126 * TGSI_FILE_INPUT. 127 * 128 * We could also handle TGSI_FILE_CONST/IMMEDIATE here, but there is little 129 * benefit. 130 */ 131 unsigned indirect_textures:1; 132 133 /* 134 * Whether any of the texture (sample) ocpodes use different sampler 135 * and sampler view unit. 136 */ 137 unsigned sampler_texture_units_different:1; 138 139 /* 140 * Whether any immediate values are outside the range of 0 and 1 141 */ 142 unsigned unclamped_immediates:1; 143 144 /* 145 * Texture opcode description. Aimed at detecting and described direct 146 * texture opcodes. 147 */ 148 unsigned num_texs; 149 struct lp_tgsi_texture_info tex[PIPE_MAX_SAMPLERS]; 150 151 /* 152 * Output description. Aimed at detecting and describing simple blit 153 * shaders. 154 */ 155 struct lp_tgsi_channel_info output[PIPE_MAX_SHADER_OUTPUTS][4]; 156 157 /* 158 * Shortcut pointers into the above (for fragment shaders). 159 */ 160 const struct lp_tgsi_channel_info *cbuf[PIPE_MAX_COLOR_BUFS]; 161 }; 162 163 /** 164 * Reference to system values. 165 */ 166 struct lp_bld_tgsi_system_values { 167 LLVMValueRef instance_id; 168 LLVMValueRef vertex_id; 169 LLVMValueRef vertex_id_nobase; 170 LLVMValueRef prim_id; 171 LLVMValueRef basevertex; 172 LLVMValueRef invocation_id; 173 }; 174 175 176 /** 177 * Sampler code generation interface. 178 * 179 * Although texture sampling is a requirement for TGSI translation, it is 180 * a very different problem with several different approaches to it. This 181 * structure establishes an interface for texture sampling code generation, so 182 * that we can easily use different texture sampling strategies. 183 */ 184 struct lp_build_sampler_soa 185 { 186 void 187 (*destroy)( struct lp_build_sampler_soa *sampler ); 188 189 void 190 (*emit_tex_sample)(const struct lp_build_sampler_soa *sampler, 191 struct gallivm_state *gallivm, 192 const struct lp_sampler_params *params); 193 194 void 195 (*emit_size_query)( const struct lp_build_sampler_soa *sampler, 196 struct gallivm_state *gallivm, 197 const struct lp_sampler_size_query_params *params); 198 }; 199 200 201 struct lp_build_sampler_aos 202 { 203 LLVMValueRef 204 (*emit_fetch_texel)( struct lp_build_sampler_aos *sampler, 205 struct lp_build_context *bld, 206 unsigned target, /* TGSI_TEXTURE_* */ 207 unsigned unit, 208 LLVMValueRef coords, 209 const struct lp_derivatives derivs, 210 enum lp_build_tex_modifier modifier); 211 }; 212 213 214 void 215 lp_build_tgsi_info(const struct tgsi_token *tokens, 216 struct lp_tgsi_info *info); 217 218 219 void 220 lp_build_tgsi_soa(struct gallivm_state *gallivm, 221 const struct tgsi_token *tokens, 222 struct lp_type type, 223 struct lp_build_mask_context *mask, 224 LLVMValueRef consts_ptr, 225 LLVMValueRef const_sizes_ptr, 226 const struct lp_bld_tgsi_system_values *system_values, 227 const LLVMValueRef (*inputs)[4], 228 LLVMValueRef (*outputs)[4], 229 LLVMValueRef context_ptr, 230 LLVMValueRef thread_data_ptr, 231 struct lp_build_sampler_soa *sampler, 232 const struct tgsi_shader_info *info, 233 const struct lp_build_tgsi_gs_iface *gs_iface); 234 235 236 void 237 lp_build_tgsi_aos(struct gallivm_state *gallivm, 238 const struct tgsi_token *tokens, 239 struct lp_type type, 240 const unsigned char swizzles[4], 241 LLVMValueRef consts_ptr, 242 const LLVMValueRef *inputs, 243 LLVMValueRef *outputs, 244 struct lp_build_sampler_aos *sampler, 245 const struct tgsi_shader_info *info); 246 247 248 enum lp_exec_mask_break_type { 249 LP_EXEC_MASK_BREAK_TYPE_LOOP, 250 LP_EXEC_MASK_BREAK_TYPE_SWITCH 251 }; 252 253 254 struct lp_exec_mask { 255 struct lp_build_context *bld; 256 257 boolean has_mask; 258 boolean ret_in_main; 259 260 LLVMTypeRef int_vec_type; 261 262 LLVMValueRef exec_mask; 263 264 LLVMValueRef ret_mask; 265 LLVMValueRef cond_mask; 266 LLVMValueRef switch_mask; /* current switch exec mask */ 267 LLVMValueRef cont_mask; 268 LLVMValueRef break_mask; 269 270 struct function_ctx { 271 int pc; 272 LLVMValueRef ret_mask; 273 274 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING]; 275 int cond_stack_size; 276 277 /* keep track if break belongs to switch or loop */ 278 enum lp_exec_mask_break_type break_type_stack[LP_MAX_TGSI_NESTING]; 279 enum lp_exec_mask_break_type break_type; 280 281 struct { 282 LLVMValueRef switch_val; 283 LLVMValueRef switch_mask; 284 LLVMValueRef switch_mask_default; 285 boolean switch_in_default; 286 unsigned switch_pc; 287 } switch_stack[LP_MAX_TGSI_NESTING]; 288 int switch_stack_size; 289 LLVMValueRef switch_val; 290 LLVMValueRef switch_mask_default; /* reverse of switch mask used for default */ 291 boolean switch_in_default; /* if switch exec is currently in default */ 292 unsigned switch_pc; /* when used points to default or endswitch-1 */ 293 294 LLVMValueRef loop_limiter; 295 LLVMBasicBlockRef loop_block; 296 LLVMValueRef break_var; 297 struct { 298 LLVMBasicBlockRef loop_block; 299 LLVMValueRef cont_mask; 300 LLVMValueRef break_mask; 301 LLVMValueRef break_var; 302 } loop_stack[LP_MAX_TGSI_NESTING]; 303 int loop_stack_size; 304 305 } *function_stack; 306 int function_stack_size; 307 }; 308 309 struct lp_build_tgsi_inst_list 310 { 311 struct tgsi_full_instruction *instructions; 312 uint max_instructions; 313 uint num_instructions; 314 }; 315 316 unsigned lp_bld_tgsi_list_init(struct lp_build_tgsi_context * bld_base); 317 318 319 unsigned lp_bld_tgsi_add_instruction( 320 struct lp_build_tgsi_context * bld_base, 321 const struct tgsi_full_instruction *inst_to_add); 322 323 324 struct lp_build_tgsi_context; 325 326 327 typedef LLVMValueRef (*lp_build_emit_fetch_fn)(struct lp_build_tgsi_context *, 328 const struct tgsi_full_src_register *, 329 enum tgsi_opcode_type, 330 unsigned); 331 332 struct lp_build_tgsi_context 333 { 334 struct lp_build_context base; 335 336 struct lp_build_context uint_bld; 337 struct lp_build_context int_bld; 338 339 struct lp_build_context dbl_bld; 340 341 struct lp_build_context uint64_bld; 342 struct lp_build_context int64_bld; 343 344 /** This array stores functions that are used to transform TGSI opcodes to 345 * LLVM instructions. 346 */ 347 struct lp_build_tgsi_action op_actions[TGSI_OPCODE_LAST]; 348 349 /* TGSI_OPCODE_RSQ is defined as 1 / sqrt( abs(src0.x) ), rsq_action 350 * should compute 1 / sqrt (src0.x) */ 351 struct lp_build_tgsi_action rsq_action; 352 353 struct lp_build_tgsi_action sqrt_action; 354 355 struct lp_build_tgsi_action drsq_action; 356 357 struct lp_build_tgsi_action dsqrt_action; 358 const struct tgsi_shader_info *info; 359 360 lp_build_emit_fetch_fn emit_fetch_funcs[TGSI_FILE_COUNT]; 361 362 LLVMValueRef (*emit_swizzle)(struct lp_build_tgsi_context *, 363 LLVMValueRef, unsigned, unsigned, unsigned, unsigned); 364 365 366 void (*emit_debug)(struct lp_build_tgsi_context *, 367 const struct tgsi_full_instruction *, 368 const struct tgsi_opcode_info *); 369 370 void (*emit_store)(struct lp_build_tgsi_context *, 371 const struct tgsi_full_instruction *, 372 const struct tgsi_opcode_info *, 373 unsigned index, 374 LLVMValueRef dst[4]); 375 376 void (*emit_declaration)(struct lp_build_tgsi_context *, 377 const struct tgsi_full_declaration *decl); 378 379 void (*emit_immediate)(struct lp_build_tgsi_context *, 380 const struct tgsi_full_immediate *imm); 381 382 383 /* Allow the user to store data in this structure rather than passing it 384 * to every function. */ 385 void * userdata; 386 387 boolean soa; 388 389 int pc; 390 391 struct tgsi_full_instruction *instructions; 392 uint max_instructions; 393 uint num_instructions; 394 395 /** This function allows the user to insert some instructions at the 396 * beginning of the program. It is optional and does not need to be 397 * implemented. 398 */ 399 void (*emit_prologue)(struct lp_build_tgsi_context*); 400 401 /** This function allows the user to insert some instructions at the end of 402 * the program. This callback is intended to be used for emitting 403 * instructions to handle the export for the output registers, but it can 404 * be used for any purpose. Implementing this function is optiona, but 405 * recommended. 406 */ 407 void (*emit_epilogue)(struct lp_build_tgsi_context*); 408 }; 409 410 struct lp_build_tgsi_gs_iface 411 { 412 LLVMValueRef (*fetch_input)(const struct lp_build_tgsi_gs_iface *gs_iface, 413 struct lp_build_tgsi_context * bld_base, 414 boolean is_vindex_indirect, 415 LLVMValueRef vertex_index, 416 boolean is_aindex_indirect, 417 LLVMValueRef attrib_index, 418 LLVMValueRef swizzle_index); 419 void (*emit_vertex)(const struct lp_build_tgsi_gs_iface *gs_iface, 420 struct lp_build_tgsi_context * bld_base, 421 LLVMValueRef (*outputs)[4], 422 LLVMValueRef emitted_vertices_vec); 423 void (*end_primitive)(const struct lp_build_tgsi_gs_iface *gs_iface, 424 struct lp_build_tgsi_context * bld_base, 425 LLVMValueRef verts_per_prim_vec, 426 LLVMValueRef emitted_prims_vec); 427 void (*gs_epilogue)(const struct lp_build_tgsi_gs_iface *gs_iface, 428 struct lp_build_tgsi_context * bld_base, 429 LLVMValueRef total_emitted_vertices_vec, 430 LLVMValueRef emitted_prims_vec); 431 }; 432 433 struct lp_build_tgsi_soa_context 434 { 435 struct lp_build_tgsi_context bld_base; 436 437 /* Builder for scalar elements of shader's data type (float) */ 438 struct lp_build_context elem_bld; 439 440 const struct lp_build_tgsi_gs_iface *gs_iface; 441 LLVMValueRef emitted_prims_vec_ptr; 442 LLVMValueRef total_emitted_vertices_vec_ptr; 443 LLVMValueRef emitted_vertices_vec_ptr; 444 LLVMValueRef max_output_vertices_vec; 445 446 LLVMValueRef consts_ptr; 447 LLVMValueRef const_sizes_ptr; 448 LLVMValueRef consts[LP_MAX_TGSI_CONST_BUFFERS]; 449 LLVMValueRef consts_sizes[LP_MAX_TGSI_CONST_BUFFERS]; 450 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS]; 451 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS]; 452 LLVMValueRef context_ptr; 453 LLVMValueRef thread_data_ptr; 454 455 const struct lp_build_sampler_soa *sampler; 456 457 struct tgsi_declaration_sampler_view sv[PIPE_MAX_SHADER_SAMPLER_VIEWS]; 458 459 LLVMValueRef immediates[LP_MAX_INLINED_IMMEDIATES][TGSI_NUM_CHANNELS]; 460 LLVMValueRef temps[LP_MAX_INLINED_TEMPS][TGSI_NUM_CHANNELS]; 461 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][TGSI_NUM_CHANNELS]; 462 463 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is 464 * set in the indirect_files field. 465 * The temps[] array above is unused then. 466 */ 467 LLVMValueRef temps_array; 468 469 /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is 470 * set in the indirect_files field. 471 * The outputs[] array above is unused then. 472 */ 473 LLVMValueRef outputs_array; 474 475 /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is 476 * set in the indirect_files field. 477 * The inputs[] array above is unused then. 478 */ 479 LLVMValueRef inputs_array; 480 481 /* We allocate/use this array of temps if (1 << TGSI_FILE_IMMEDIATE) is 482 * set in the indirect_files field. 483 */ 484 LLVMValueRef imms_array; 485 486 487 struct lp_bld_tgsi_system_values system_values; 488 489 /** bitmask indicating which register files are accessed indirectly */ 490 unsigned indirect_files; 491 492 struct lp_build_mask_context *mask; 493 struct lp_exec_mask exec_mask; 494 495 uint num_immediates; 496 boolean use_immediates_array; 497 }; 498 499 void 500 lp_emit_declaration_soa( 501 struct lp_build_tgsi_context *bld, 502 const struct tgsi_full_declaration *decl); 503 504 void lp_emit_immediate_soa( 505 struct lp_build_tgsi_context *bld_base, 506 const struct tgsi_full_immediate *imm); 507 508 boolean 509 lp_emit_instruction_soa( 510 struct lp_build_tgsi_soa_context *bld, 511 const struct tgsi_full_instruction *inst, 512 const struct tgsi_opcode_info *info); 513 514 515 LLVMValueRef 516 lp_get_temp_ptr_soa( 517 struct lp_build_tgsi_soa_context *bld, 518 unsigned index, 519 unsigned chan); 520 521 LLVMValueRef 522 lp_get_output_ptr( 523 struct lp_build_tgsi_soa_context *bld, 524 unsigned index, 525 unsigned chan); 526 527 struct lp_build_tgsi_aos_context 528 { 529 struct lp_build_tgsi_context bld_base; 530 531 /* Builder for integer masks and indices */ 532 struct lp_build_context int_bld; 533 534 /* 535 * AoS swizzle used: 536 * - swizzles[0] = red index 537 * - swizzles[1] = green index 538 * - swizzles[2] = blue index 539 * - swizzles[3] = alpha index 540 */ 541 unsigned char swizzles[4]; 542 unsigned char inv_swizzles[4]; 543 544 LLVMValueRef consts_ptr; 545 const LLVMValueRef *inputs; 546 LLVMValueRef *outputs; 547 548 struct lp_build_sampler_aos *sampler; 549 550 struct tgsi_declaration_sampler_view sv[PIPE_MAX_SHADER_SAMPLER_VIEWS]; 551 552 LLVMValueRef immediates[LP_MAX_INLINED_IMMEDIATES]; 553 LLVMValueRef temps[LP_MAX_INLINED_TEMPS]; 554 LLVMValueRef addr[LP_MAX_TGSI_ADDRS]; 555 556 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is 557 * set in the indirect_files field. 558 * The temps[] array above is unused then. 559 */ 560 LLVMValueRef temps_array; 561 562 /** bitmask indicating which register files are accessed indirectly */ 563 unsigned indirect_files; 564 565 }; 566 567 static inline struct lp_build_tgsi_soa_context * 568 lp_soa_context(struct lp_build_tgsi_context *bld_base) 569 { 570 return (struct lp_build_tgsi_soa_context *)bld_base; 571 } 572 573 static inline struct lp_build_tgsi_aos_context * 574 lp_aos_context(struct lp_build_tgsi_context *bld_base) 575 { 576 return (struct lp_build_tgsi_aos_context *)bld_base; 577 } 578 579 void 580 lp_emit_declaration_aos( 581 struct lp_build_tgsi_aos_context *bld, 582 const struct tgsi_full_declaration *decl); 583 584 585 boolean 586 lp_emit_instruction_aos( 587 struct lp_build_tgsi_aos_context *bld, 588 const struct tgsi_full_instruction *inst, 589 const struct tgsi_opcode_info *info, 590 int *pc); 591 592 void 593 lp_emit_store_aos( 594 struct lp_build_tgsi_aos_context *bld, 595 const struct tgsi_full_instruction *inst, 596 unsigned index, 597 LLVMValueRef value); 598 599 void lp_build_fetch_args( 600 struct lp_build_tgsi_context * bld_base, 601 struct lp_build_emit_data * emit_data); 602 603 LLVMValueRef 604 lp_build_tgsi_inst_llvm_aos( 605 struct lp_build_tgsi_context * bld_base, 606 const struct tgsi_full_instruction *inst); 607 608 void 609 lp_build_tgsi_intrinsic( 610 const struct lp_build_tgsi_action * action, 611 struct lp_build_tgsi_context * bld_base, 612 struct lp_build_emit_data * emit_data); 613 614 LLVMValueRef 615 lp_build_emit_llvm( 616 struct lp_build_tgsi_context *bld_base, 617 unsigned tgsi_opcode, 618 struct lp_build_emit_data * emit_data); 619 620 LLVMValueRef 621 lp_build_emit_llvm_unary( 622 struct lp_build_tgsi_context *bld_base, 623 unsigned tgsi_opcode, 624 LLVMValueRef arg0); 625 626 LLVMValueRef 627 lp_build_emit_llvm_binary( 628 struct lp_build_tgsi_context *bld_base, 629 unsigned tgsi_opcode, 630 LLVMValueRef arg0, 631 LLVMValueRef arg1); 632 633 LLVMValueRef 634 lp_build_emit_llvm_ternary( 635 struct lp_build_tgsi_context *bld_base, 636 unsigned tgsi_opcode, 637 LLVMValueRef arg0, 638 LLVMValueRef arg1, 639 LLVMValueRef arg2); 640 641 boolean 642 lp_build_tgsi_inst_llvm( 643 struct lp_build_tgsi_context * bld_base, 644 const struct tgsi_full_instruction *inst); 645 646 LLVMValueRef 647 lp_build_emit_fetch_src( 648 struct lp_build_tgsi_context *bld_base, 649 const struct tgsi_full_src_register *reg, 650 enum tgsi_opcode_type stype, 651 const unsigned chan_index); 652 653 LLVMValueRef 654 lp_build_emit_fetch( 655 struct lp_build_tgsi_context *bld_base, 656 const struct tgsi_full_instruction *inst, 657 unsigned src_op, 658 const unsigned chan_index); 659 660 661 LLVMValueRef 662 lp_build_emit_fetch_texoffset( 663 struct lp_build_tgsi_context *bld_base, 664 const struct tgsi_full_instruction *inst, 665 unsigned tex_off_op, 666 const unsigned chan_index); 667 668 boolean 669 lp_build_tgsi_llvm( 670 struct lp_build_tgsi_context * bld_base, 671 const struct tgsi_token *tokens); 672 673 #ifdef __cplusplus 674 } 675 #endif 676 677 #endif /* LP_BLD_TGSI_H */ 678