1 /************************************************************************** 2 * 3 * Copyright 2011-2012 Advanced Micro Devices, Inc. 4 * Copyright 2009 VMware, Inc. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29 /** 30 * @file 31 * TGSI to LLVM IR translation. 32 * 33 * @author Jose Fonseca <jfonseca (at) vmware.com> 34 * @author Tom Stellard <thomas.stellard (at) amd.com> 35 */ 36 37 #ifndef LP_BLD_TGSI_H 38 #define LP_BLD_TGSI_H 39 40 #include "gallivm/lp_bld.h" 41 #include "gallivm/lp_bld_tgsi_action.h" 42 #include "gallivm/lp_bld_limits.h" 43 #include "gallivm/lp_bld_sample.h" 44 #include "lp_bld_type.h" 45 #include "pipe/p_compiler.h" 46 #include "pipe/p_state.h" 47 #include "tgsi/tgsi_exec.h" 48 #include "tgsi/tgsi_scan.h" 49 #include "tgsi/tgsi_info.h" 50 51 #ifdef __cplusplus 52 extern "C" { 53 #endif 54 55 #define LP_CHAN_ALL ~0u 56 57 #define LP_MAX_INSTRUCTIONS 256 58 59 struct tgsi_full_declaration; 60 struct tgsi_full_immediate; 61 struct tgsi_full_instruction; 62 struct tgsi_full_src_register; 63 struct tgsi_opcode_info; 64 struct tgsi_token; 65 struct tgsi_shader_info; 66 struct lp_build_mask_context; 67 struct gallivm_state; 68 struct lp_derivatives; 69 struct lp_build_tgsi_gs_iface; 70 71 72 enum lp_build_tex_modifier { 73 LP_BLD_TEX_MODIFIER_NONE = 0, 74 LP_BLD_TEX_MODIFIER_PROJECTED, 75 LP_BLD_TEX_MODIFIER_LOD_BIAS, 76 LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, 77 LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, 78 LP_BLD_TEX_MODIFIER_LOD_ZERO 79 }; 80 81 82 /** 83 * Describe a channel of a register. 84 * 85 * The value can be a: 86 * - immediate value (i.e. derived from a IMM register) 87 * - CONST[n].x/y/z/w 88 * - IN[n].x/y/z/w 89 * - undetermined (when .file == TGSI_FILE_NULL) 90 * 91 * This is one of the analysis results, and is used to described 92 * the output color in terms of inputs. 93 */ 94 struct lp_tgsi_channel_info 95 { 96 unsigned file:4; /* TGSI_FILE_* */ 97 unsigned swizzle:3; /* PIPE_SWIZZLE_x */ 98 union { 99 uint32_t index; 100 float value; /* for TGSI_FILE_IMMEDIATE */ 101 } u; 102 }; 103 104 105 /** 106 * Describe a texture sampler interpolator. 107 * 108 * The interpolation is described in terms of regular inputs. 109 */ 110 struct lp_tgsi_texture_info 111 { 112 struct lp_tgsi_channel_info coord[4]; 113 unsigned target:8; /* TGSI_TEXTURE_* */ 114 unsigned sampler_unit:8; /* Sampler unit */ 115 unsigned texture_unit:8; /* Texture unit */ 116 unsigned modifier:8; /* LP_BLD_TEX_MODIFIER_* */ 117 }; 118 119 120 struct lp_tgsi_info 121 { 122 struct tgsi_shader_info base; 123 124 /* 125 * Whether any of the texture opcodes access a register file other than 126 * TGSI_FILE_INPUT. 127 * 128 * We could also handle TGSI_FILE_CONST/IMMEDIATE here, but there is little 129 * benefit. 130 */ 131 unsigned indirect_textures:1; 132 133 /* 134 * Whether any of the texture (sample) ocpodes use different sampler 135 * and sampler view unit. 136 */ 137 unsigned sampler_texture_units_different:1; 138 139 /* 140 * Whether any immediate values are outside the range of 0 and 1 141 */ 142 unsigned unclamped_immediates:1; 143 144 /* 145 * Texture opcode description. Aimed at detecting and described direct 146 * texture opcodes. 147 */ 148 unsigned num_texs; 149 struct lp_tgsi_texture_info tex[PIPE_MAX_SAMPLERS]; 150 151 /* 152 * Output description. Aimed at detecting and describing simple blit 153 * shaders. 154 */ 155 struct lp_tgsi_channel_info output[PIPE_MAX_SHADER_OUTPUTS][4]; 156 157 /* 158 * Shortcut pointers into the above (for fragment shaders). 159 */ 160 const struct lp_tgsi_channel_info *cbuf[PIPE_MAX_COLOR_BUFS]; 161 }; 162 163 /** 164 * Reference to system values. 165 */ 166 struct lp_bld_tgsi_system_values { 167 LLVMValueRef instance_id; 168 LLVMValueRef vertex_id; 169 LLVMValueRef vertex_id_nobase; 170 LLVMValueRef prim_id; 171 LLVMValueRef basevertex; 172 LLVMValueRef invocation_id; 173 }; 174 175 176 /** 177 * Sampler code generation interface. 178 * 179 * Although texture sampling is a requirement for TGSI translation, it is 180 * a very different problem with several different approaches to it. This 181 * structure establishes an interface for texture sampling code generation, so 182 * that we can easily use different texture sampling strategies. 183 */ 184 struct lp_build_sampler_soa 185 { 186 void 187 (*destroy)( struct lp_build_sampler_soa *sampler ); 188 189 void 190 (*emit_tex_sample)(const struct lp_build_sampler_soa *sampler, 191 struct gallivm_state *gallivm, 192 const struct lp_sampler_params *params); 193 194 void 195 (*emit_size_query)( const struct lp_build_sampler_soa *sampler, 196 struct gallivm_state *gallivm, 197 const struct lp_sampler_size_query_params *params); 198 }; 199 200 201 struct lp_build_sampler_aos 202 { 203 LLVMValueRef 204 (*emit_fetch_texel)( struct lp_build_sampler_aos *sampler, 205 struct lp_build_context *bld, 206 unsigned target, /* TGSI_TEXTURE_* */ 207 unsigned unit, 208 LLVMValueRef coords, 209 const struct lp_derivatives derivs, 210 enum lp_build_tex_modifier modifier); 211 }; 212 213 214 void 215 lp_build_tgsi_info(const struct tgsi_token *tokens, 216 struct lp_tgsi_info *info); 217 218 219 void 220 lp_build_tgsi_soa(struct gallivm_state *gallivm, 221 const struct tgsi_token *tokens, 222 struct lp_type type, 223 struct lp_build_mask_context *mask, 224 LLVMValueRef consts_ptr, 225 LLVMValueRef const_sizes_ptr, 226 const struct lp_bld_tgsi_system_values *system_values, 227 const LLVMValueRef (*inputs)[4], 228 LLVMValueRef (*outputs)[4], 229 LLVMValueRef context_ptr, 230 LLVMValueRef thread_data_ptr, 231 struct lp_build_sampler_soa *sampler, 232 const struct tgsi_shader_info *info, 233 const struct lp_build_tgsi_gs_iface *gs_iface); 234 235 236 void 237 lp_build_tgsi_aos(struct gallivm_state *gallivm, 238 const struct tgsi_token *tokens, 239 struct lp_type type, 240 const unsigned char swizzles[4], 241 LLVMValueRef consts_ptr, 242 const LLVMValueRef *inputs, 243 LLVMValueRef *outputs, 244 struct lp_build_sampler_aos *sampler, 245 const struct tgsi_shader_info *info); 246 247 248 enum lp_exec_mask_break_type { 249 LP_EXEC_MASK_BREAK_TYPE_LOOP, 250 LP_EXEC_MASK_BREAK_TYPE_SWITCH 251 }; 252 253 254 struct lp_exec_mask { 255 struct lp_build_context *bld; 256 257 boolean has_mask; 258 boolean ret_in_main; 259 260 LLVMTypeRef int_vec_type; 261 262 LLVMValueRef exec_mask; 263 264 LLVMValueRef ret_mask; 265 LLVMValueRef cond_mask; 266 LLVMValueRef switch_mask; /* current switch exec mask */ 267 LLVMValueRef cont_mask; 268 LLVMValueRef break_mask; 269 270 struct function_ctx { 271 int pc; 272 LLVMValueRef ret_mask; 273 274 LLVMValueRef cond_stack[LP_MAX_TGSI_NESTING]; 275 int cond_stack_size; 276 277 /* keep track if break belongs to switch or loop */ 278 enum lp_exec_mask_break_type break_type_stack[LP_MAX_TGSI_NESTING]; 279 enum lp_exec_mask_break_type break_type; 280 281 struct { 282 LLVMValueRef switch_val; 283 LLVMValueRef switch_mask; 284 LLVMValueRef switch_mask_default; 285 boolean switch_in_default; 286 unsigned switch_pc; 287 } switch_stack[LP_MAX_TGSI_NESTING]; 288 int switch_stack_size; 289 LLVMValueRef switch_val; 290 LLVMValueRef switch_mask_default; /* reverse of switch mask used for default */ 291 boolean switch_in_default; /* if switch exec is currently in default */ 292 unsigned switch_pc; /* when used points to default or endswitch-1 */ 293 294 LLVMValueRef loop_limiter; 295 LLVMBasicBlockRef loop_block; 296 LLVMValueRef break_var; 297 struct { 298 LLVMBasicBlockRef loop_block; 299 LLVMValueRef cont_mask; 300 LLVMValueRef break_mask; 301 LLVMValueRef break_var; 302 } loop_stack[LP_MAX_TGSI_NESTING]; 303 int loop_stack_size; 304 305 } *function_stack; 306 int function_stack_size; 307 }; 308 309 struct lp_build_tgsi_inst_list 310 { 311 struct tgsi_full_instruction *instructions; 312 uint max_instructions; 313 uint num_instructions; 314 }; 315 316 unsigned lp_bld_tgsi_list_init(struct lp_build_tgsi_context * bld_base); 317 318 319 unsigned lp_bld_tgsi_add_instruction( 320 struct lp_build_tgsi_context * bld_base, 321 const struct tgsi_full_instruction *inst_to_add); 322 323 324 struct lp_build_tgsi_context; 325 326 327 typedef LLVMValueRef (*lp_build_emit_fetch_fn)(struct lp_build_tgsi_context *, 328 const struct tgsi_full_src_register *, 329 enum tgsi_opcode_type, 330 unsigned); 331 332 struct lp_build_tgsi_context 333 { 334 struct lp_build_context base; 335 336 struct lp_build_context uint_bld; 337 struct lp_build_context int_bld; 338 339 struct lp_build_context dbl_bld; 340 341 struct lp_build_context uint64_bld; 342 struct lp_build_context int64_bld; 343 344 /** This array stores functions that are used to transform TGSI opcodes to 345 * LLVM instructions. 346 */ 347 struct lp_build_tgsi_action op_actions[TGSI_OPCODE_LAST]; 348 349 /* TGSI_OPCODE_RSQ is defined as 1 / sqrt( abs(src0.x) ), rsq_action 350 * should compute 1 / sqrt (src0.x) */ 351 struct lp_build_tgsi_action rsq_action; 352 353 struct lp_build_tgsi_action sqrt_action; 354 355 struct lp_build_tgsi_action drsq_action; 356 357 struct lp_build_tgsi_action dsqrt_action; 358 const struct tgsi_shader_info *info; 359 360 lp_build_emit_fetch_fn emit_fetch_funcs[TGSI_FILE_COUNT]; 361 362 LLVMValueRef (*emit_swizzle)(struct lp_build_tgsi_context *, 363 LLVMValueRef, unsigned, unsigned, unsigned, unsigned); 364 365 366 void (*emit_debug)(struct lp_build_tgsi_context *, 367 const struct tgsi_full_instruction *, 368 const struct tgsi_opcode_info *); 369 370 void (*emit_store)(struct lp_build_tgsi_context *, 371 const struct tgsi_full_instruction *, 372 const struct tgsi_opcode_info *, 373 LLVMValueRef dst[4]); 374 375 void (*emit_declaration)(struct lp_build_tgsi_context *, 376 const struct tgsi_full_declaration *decl); 377 378 void (*emit_immediate)(struct lp_build_tgsi_context *, 379 const struct tgsi_full_immediate *imm); 380 381 382 /* Allow the user to store data in this structure rather than passing it 383 * to every function. */ 384 void * userdata; 385 386 boolean soa; 387 388 int pc; 389 390 struct tgsi_full_instruction *instructions; 391 uint max_instructions; 392 uint num_instructions; 393 394 /** This function allows the user to insert some instructions at the 395 * beginning of the program. It is optional and does not need to be 396 * implemented. 397 */ 398 void (*emit_prologue)(struct lp_build_tgsi_context*); 399 400 /** This function allows the user to insert some instructions at the end of 401 * the program. This callback is intended to be used for emitting 402 * instructions to handle the export for the output registers, but it can 403 * be used for any purpose. Implementing this function is optiona, but 404 * recommended. 405 */ 406 void (*emit_epilogue)(struct lp_build_tgsi_context*); 407 }; 408 409 struct lp_build_tgsi_gs_iface 410 { 411 LLVMValueRef (*fetch_input)(const struct lp_build_tgsi_gs_iface *gs_iface, 412 struct lp_build_tgsi_context * bld_base, 413 boolean is_vindex_indirect, 414 LLVMValueRef vertex_index, 415 boolean is_aindex_indirect, 416 LLVMValueRef attrib_index, 417 LLVMValueRef swizzle_index); 418 void (*emit_vertex)(const struct lp_build_tgsi_gs_iface *gs_iface, 419 struct lp_build_tgsi_context * bld_base, 420 LLVMValueRef (*outputs)[4], 421 LLVMValueRef emitted_vertices_vec); 422 void (*end_primitive)(const struct lp_build_tgsi_gs_iface *gs_iface, 423 struct lp_build_tgsi_context * bld_base, 424 LLVMValueRef verts_per_prim_vec, 425 LLVMValueRef emitted_prims_vec); 426 void (*gs_epilogue)(const struct lp_build_tgsi_gs_iface *gs_iface, 427 struct lp_build_tgsi_context * bld_base, 428 LLVMValueRef total_emitted_vertices_vec, 429 LLVMValueRef emitted_prims_vec); 430 }; 431 432 struct lp_build_tgsi_soa_context 433 { 434 struct lp_build_tgsi_context bld_base; 435 436 /* Builder for scalar elements of shader's data type (float) */ 437 struct lp_build_context elem_bld; 438 439 const struct lp_build_tgsi_gs_iface *gs_iface; 440 LLVMValueRef emitted_prims_vec_ptr; 441 LLVMValueRef total_emitted_vertices_vec_ptr; 442 LLVMValueRef emitted_vertices_vec_ptr; 443 LLVMValueRef max_output_vertices_vec; 444 445 LLVMValueRef consts_ptr; 446 LLVMValueRef const_sizes_ptr; 447 LLVMValueRef consts[LP_MAX_TGSI_CONST_BUFFERS]; 448 LLVMValueRef consts_sizes[LP_MAX_TGSI_CONST_BUFFERS]; 449 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS]; 450 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS]; 451 LLVMValueRef context_ptr; 452 LLVMValueRef thread_data_ptr; 453 454 const struct lp_build_sampler_soa *sampler; 455 456 struct tgsi_declaration_sampler_view sv[PIPE_MAX_SHADER_SAMPLER_VIEWS]; 457 458 LLVMValueRef immediates[LP_MAX_INLINED_IMMEDIATES][TGSI_NUM_CHANNELS]; 459 LLVMValueRef temps[LP_MAX_INLINED_TEMPS][TGSI_NUM_CHANNELS]; 460 LLVMValueRef addr[LP_MAX_TGSI_ADDRS][TGSI_NUM_CHANNELS]; 461 LLVMValueRef preds[LP_MAX_TGSI_PREDS][TGSI_NUM_CHANNELS]; 462 463 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is 464 * set in the indirect_files field. 465 * The temps[] array above is unused then. 466 */ 467 LLVMValueRef temps_array; 468 469 /* We allocate/use this array of output if (1 << TGSI_FILE_OUTPUT) is 470 * set in the indirect_files field. 471 * The outputs[] array above is unused then. 472 */ 473 LLVMValueRef outputs_array; 474 475 /* We allocate/use this array of inputs if (1 << TGSI_FILE_INPUT) is 476 * set in the indirect_files field. 477 * The inputs[] array above is unused then. 478 */ 479 LLVMValueRef inputs_array; 480 481 /* We allocate/use this array of temps if (1 << TGSI_FILE_IMMEDIATE) is 482 * set in the indirect_files field. 483 */ 484 LLVMValueRef imms_array; 485 486 487 struct lp_bld_tgsi_system_values system_values; 488 489 /** bitmask indicating which register files are accessed indirectly */ 490 unsigned indirect_files; 491 492 struct lp_build_mask_context *mask; 493 struct lp_exec_mask exec_mask; 494 495 uint num_immediates; 496 boolean use_immediates_array; 497 }; 498 499 void 500 lp_emit_declaration_soa( 501 struct lp_build_tgsi_context *bld, 502 const struct tgsi_full_declaration *decl); 503 504 void lp_emit_immediate_soa( 505 struct lp_build_tgsi_context *bld_base, 506 const struct tgsi_full_immediate *imm); 507 508 boolean 509 lp_emit_instruction_soa( 510 struct lp_build_tgsi_soa_context *bld, 511 const struct tgsi_full_instruction *inst, 512 const struct tgsi_opcode_info *info); 513 514 515 LLVMValueRef 516 lp_get_temp_ptr_soa( 517 struct lp_build_tgsi_soa_context *bld, 518 unsigned index, 519 unsigned chan); 520 521 LLVMValueRef 522 lp_get_output_ptr( 523 struct lp_build_tgsi_soa_context *bld, 524 unsigned index, 525 unsigned chan); 526 527 struct lp_build_tgsi_aos_context 528 { 529 struct lp_build_tgsi_context bld_base; 530 531 /* Builder for integer masks and indices */ 532 struct lp_build_context int_bld; 533 534 /* 535 * AoS swizzle used: 536 * - swizzles[0] = red index 537 * - swizzles[1] = green index 538 * - swizzles[2] = blue index 539 * - swizzles[3] = alpha index 540 */ 541 unsigned char swizzles[4]; 542 unsigned char inv_swizzles[4]; 543 544 LLVMValueRef consts_ptr; 545 const LLVMValueRef *inputs; 546 LLVMValueRef *outputs; 547 548 struct lp_build_sampler_aos *sampler; 549 550 struct tgsi_declaration_sampler_view sv[PIPE_MAX_SHADER_SAMPLER_VIEWS]; 551 552 LLVMValueRef immediates[LP_MAX_INLINED_IMMEDIATES]; 553 LLVMValueRef temps[LP_MAX_INLINED_TEMPS]; 554 LLVMValueRef addr[LP_MAX_TGSI_ADDRS]; 555 LLVMValueRef preds[LP_MAX_TGSI_PREDS]; 556 557 /* We allocate/use this array of temps if (1 << TGSI_FILE_TEMPORARY) is 558 * set in the indirect_files field. 559 * The temps[] array above is unused then. 560 */ 561 LLVMValueRef temps_array; 562 563 /** bitmask indicating which register files are accessed indirectly */ 564 unsigned indirect_files; 565 566 }; 567 568 static inline struct lp_build_tgsi_soa_context * 569 lp_soa_context(struct lp_build_tgsi_context *bld_base) 570 { 571 return (struct lp_build_tgsi_soa_context *)bld_base; 572 } 573 574 static inline struct lp_build_tgsi_aos_context * 575 lp_aos_context(struct lp_build_tgsi_context *bld_base) 576 { 577 return (struct lp_build_tgsi_aos_context *)bld_base; 578 } 579 580 void 581 lp_emit_declaration_aos( 582 struct lp_build_tgsi_aos_context *bld, 583 const struct tgsi_full_declaration *decl); 584 585 586 boolean 587 lp_emit_instruction_aos( 588 struct lp_build_tgsi_aos_context *bld, 589 const struct tgsi_full_instruction *inst, 590 const struct tgsi_opcode_info *info, 591 int *pc); 592 593 void 594 lp_emit_store_aos( 595 struct lp_build_tgsi_aos_context *bld, 596 const struct tgsi_full_instruction *inst, 597 unsigned index, 598 LLVMValueRef value); 599 600 void lp_build_fetch_args( 601 struct lp_build_tgsi_context * bld_base, 602 struct lp_build_emit_data * emit_data); 603 604 LLVMValueRef 605 lp_build_tgsi_inst_llvm_aos( 606 struct lp_build_tgsi_context * bld_base, 607 const struct tgsi_full_instruction *inst); 608 609 void 610 lp_build_tgsi_intrinsic( 611 const struct lp_build_tgsi_action * action, 612 struct lp_build_tgsi_context * bld_base, 613 struct lp_build_emit_data * emit_data); 614 615 LLVMValueRef 616 lp_build_emit_llvm( 617 struct lp_build_tgsi_context *bld_base, 618 unsigned tgsi_opcode, 619 struct lp_build_emit_data * emit_data); 620 621 LLVMValueRef 622 lp_build_emit_llvm_unary( 623 struct lp_build_tgsi_context *bld_base, 624 unsigned tgsi_opcode, 625 LLVMValueRef arg0); 626 627 LLVMValueRef 628 lp_build_emit_llvm_binary( 629 struct lp_build_tgsi_context *bld_base, 630 unsigned tgsi_opcode, 631 LLVMValueRef arg0, 632 LLVMValueRef arg1); 633 634 LLVMValueRef 635 lp_build_emit_llvm_ternary( 636 struct lp_build_tgsi_context *bld_base, 637 unsigned tgsi_opcode, 638 LLVMValueRef arg0, 639 LLVMValueRef arg1, 640 LLVMValueRef arg2); 641 642 boolean 643 lp_build_tgsi_inst_llvm( 644 struct lp_build_tgsi_context * bld_base, 645 const struct tgsi_full_instruction *inst); 646 647 LLVMValueRef 648 lp_build_emit_fetch( 649 struct lp_build_tgsi_context *bld_base, 650 const struct tgsi_full_instruction *inst, 651 unsigned src_op, 652 const unsigned chan_index); 653 654 655 LLVMValueRef 656 lp_build_emit_fetch_texoffset( 657 struct lp_build_tgsi_context *bld_base, 658 const struct tgsi_full_instruction *inst, 659 unsigned tex_off_op, 660 const unsigned chan_index); 661 662 boolean 663 lp_build_tgsi_llvm( 664 struct lp_build_tgsi_context * bld_base, 665 const struct tgsi_token *tokens); 666 667 #ifdef __cplusplus 668 } 669 #endif 670 671 #endif /* LP_BLD_TGSI_H */ 672