1 2 /* 3 * Copyright 2012 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Tom Stellard <thomas.stellard (at) amd.com> 26 * Michel Dnzer <michel.daenzer (at) amd.com> 27 * Christian Knig <christian.koenig (at) amd.com> 28 */ 29 30 #include "gallivm/lp_bld_tgsi_action.h" 31 #include "gallivm/lp_bld_const.h" 32 #include "gallivm/lp_bld_gather.h" 33 #include "gallivm/lp_bld_intr.h" 34 #include "gallivm/lp_bld_tgsi.h" 35 #include "radeon_llvm.h" 36 #include "radeon_llvm_emit.h" 37 #include "tgsi/tgsi_info.h" 38 #include "tgsi/tgsi_parse.h" 39 #include "tgsi/tgsi_scan.h" 40 #include "tgsi/tgsi_dump.h" 41 42 #include "radeonsi_pipe.h" 43 #include "radeonsi_shader.h" 44 #include "si_state.h" 45 #include "sid.h" 46 47 #include <assert.h> 48 #include <errno.h> 49 #include <stdio.h> 50 51 /* 52 static ps_remap_inputs( 53 struct tgsi_llvm_context * tl_ctx, 54 unsigned tgsi_index, 55 unsigned tgsi_chan) 56 { 57 : 58 } 59 60 struct si_input 61 { 62 struct list_head head; 63 unsigned tgsi_index; 64 unsigned tgsi_chan; 65 unsigned order; 66 }; 67 */ 68 69 70 struct si_shader_context 71 { 72 struct radeon_llvm_context radeon_bld; 73 struct r600_context *rctx; 74 struct tgsi_parse_context parse; 75 struct tgsi_token * tokens; 76 struct si_pipe_shader *shader; 77 unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */ 78 /* unsigned num_inputs; */ 79 /* struct list_head inputs; */ 80 /* unsigned * input_mappings *//* From TGSI to SI hw */ 81 /* struct tgsi_shader_info info;*/ 82 }; 83 84 static struct si_shader_context * si_shader_context( 85 struct lp_build_tgsi_context * bld_base) 86 { 87 return (struct si_shader_context *)bld_base; 88 } 89 90 91 #define PERSPECTIVE_BASE 0 92 #define LINEAR_BASE 9 93 94 #define SAMPLE_OFFSET 0 95 #define CENTER_OFFSET 2 96 #define CENTROID_OFSET 4 97 98 #define USE_SGPR_MAX_SUFFIX_LEN 5 99 #define CONST_ADDR_SPACE 2 100 #define USER_SGPR_ADDR_SPACE 8 101 102 enum sgpr_type { 103 SGPR_CONST_PTR_F32, 104 SGPR_CONST_PTR_V4I32, 105 SGPR_CONST_PTR_V8I32, 106 SGPR_I32, 107 SGPR_I64 108 }; 109 110 /** 111 * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad 112 * 113 * @param offset The offset parameter specifies the number of 114 * elements to offset, not the number of bytes or dwords. An element is the 115 * the type pointed to by the base_ptr parameter (e.g. int is the element of 116 * an int* pointer) 117 * 118 * When LLVM lowers the load instruction, it will convert the element offset 119 * into a dword offset automatically. 120 * 121 */ 122 static LLVMValueRef build_indexed_load( 123 struct gallivm_state * gallivm, 124 LLVMValueRef base_ptr, 125 LLVMValueRef offset) 126 { 127 LLVMValueRef computed_ptr = LLVMBuildGEP( 128 gallivm->builder, base_ptr, &offset, 1, ""); 129 130 return LLVMBuildLoad(gallivm->builder, computed_ptr, ""); 131 } 132 133 /** 134 * Load a value stored in one of the user SGPRs 135 * 136 * @param sgpr This is the sgpr to load the value from. If you need to load a 137 * value that is stored in consecutive SGPR registers (e.g. a 64-bit pointer), 138 * then you should pass the index of the first SGPR that holds the value. For 139 * example, if you want to load a pointer that is stored in SGPRs 2 and 3, then 140 * use pass 2 for the sgpr parameter. 141 * 142 * The value of the sgpr parameter must also be aligned to the width of the type 143 * being loaded, so that the sgpr parameter is divisible by the dword width of the 144 * type. For example, if the value being loaded is two dwords wide, then the sgpr 145 * parameter must be divisible by two. 146 */ 147 static LLVMValueRef use_sgpr( 148 struct gallivm_state * gallivm, 149 enum sgpr_type type, 150 unsigned sgpr) 151 { 152 LLVMValueRef sgpr_index; 153 LLVMTypeRef ret_type; 154 LLVMValueRef ptr; 155 156 sgpr_index = lp_build_const_int32(gallivm, sgpr); 157 158 switch (type) { 159 case SGPR_CONST_PTR_F32: 160 assert(sgpr % 2 == 0); 161 ret_type = LLVMFloatTypeInContext(gallivm->context); 162 ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE); 163 break; 164 165 case SGPR_I32: 166 ret_type = LLVMInt32TypeInContext(gallivm->context); 167 break; 168 169 case SGPR_I64: 170 assert(sgpr % 2 == 0); 171 ret_type= LLVMInt64TypeInContext(gallivm->context); 172 break; 173 174 case SGPR_CONST_PTR_V4I32: 175 assert(sgpr % 2 == 0); 176 ret_type = LLVMInt32TypeInContext(gallivm->context); 177 ret_type = LLVMVectorType(ret_type, 4); 178 ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE); 179 break; 180 181 case SGPR_CONST_PTR_V8I32: 182 assert(sgpr % 2 == 0); 183 ret_type = LLVMInt32TypeInContext(gallivm->context); 184 ret_type = LLVMVectorType(ret_type, 8); 185 ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE); 186 break; 187 188 default: 189 assert(!"Unsupported SGPR type in use_sgpr()"); 190 return NULL; 191 } 192 193 ret_type = LLVMPointerType(ret_type, USER_SGPR_ADDR_SPACE); 194 ptr = LLVMBuildIntToPtr(gallivm->builder, sgpr_index, ret_type, ""); 195 return LLVMBuildLoad(gallivm->builder, ptr, ""); 196 } 197 198 static void declare_input_vs( 199 struct si_shader_context * si_shader_ctx, 200 unsigned input_index, 201 const struct tgsi_full_declaration *decl) 202 { 203 LLVMValueRef t_list_ptr; 204 LLVMValueRef t_offset; 205 LLVMValueRef t_list; 206 LLVMValueRef attribute_offset; 207 LLVMValueRef buffer_index_reg; 208 LLVMValueRef args[3]; 209 LLVMTypeRef vec4_type; 210 LLVMValueRef input; 211 struct lp_build_context * uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; 212 struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base; 213 struct r600_context *rctx = si_shader_ctx->rctx; 214 //struct pipe_vertex_element *velem = &rctx->vertex_elements->elements[input_index]; 215 unsigned chan; 216 217 /* Load the T list */ 218 /* XXX: Communicate with the rest of the driver about which SGPR the T# 219 * list pointer is going to be stored in. Hard code to SGPR[6:7] for 220 * now */ 221 t_list_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_V4I32, 6); 222 223 t_offset = lp_build_const_int32(base->gallivm, input_index); 224 225 t_list = build_indexed_load(base->gallivm, t_list_ptr, t_offset); 226 227 /* Build the attribute offset */ 228 attribute_offset = lp_build_const_int32(base->gallivm, 0); 229 230 /* Load the buffer index is always, which is always stored in VGPR0 231 * for Vertex Shaders */ 232 buffer_index_reg = build_intrinsic(base->gallivm->builder, 233 "llvm.SI.vs.load.buffer.index", uint->elem_type, NULL, 0, 234 LLVMReadNoneAttribute); 235 236 vec4_type = LLVMVectorType(base->elem_type, 4); 237 args[0] = t_list; 238 args[1] = attribute_offset; 239 args[2] = buffer_index_reg; 240 input = lp_build_intrinsic(base->gallivm->builder, 241 "llvm.SI.vs.load.input", vec4_type, args, 3); 242 243 /* Break up the vec4 into individual components */ 244 for (chan = 0; chan < 4; chan++) { 245 LLVMValueRef llvm_chan = lp_build_const_int32(base->gallivm, chan); 246 /* XXX: Use a helper function for this. There is one in 247 * tgsi_llvm.c. */ 248 si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, chan)] = 249 LLVMBuildExtractElement(base->gallivm->builder, 250 input, llvm_chan, ""); 251 } 252 } 253 254 static void declare_input_fs( 255 struct si_shader_context * si_shader_ctx, 256 unsigned input_index, 257 const struct tgsi_full_declaration *decl) 258 { 259 const char * intr_name; 260 unsigned chan; 261 struct lp_build_context * base = 262 &si_shader_ctx->radeon_bld.soa.bld_base.base; 263 struct gallivm_state * gallivm = base->gallivm; 264 265 /* This value is: 266 * [15:0] NewPrimMask (Bit mask for each quad. It is set it the 267 * quad begins a new primitive. Bit 0 always needs 268 * to be unset) 269 * [32:16] ParamOffset 270 * 271 */ 272 /* XXX: This register number must be identical to the S_00B02C_USER_SGPR 273 * register field value 274 */ 275 LLVMValueRef params = use_sgpr(base->gallivm, SGPR_I32, 6); 276 277 278 /* XXX: Is this the input_index? */ 279 LLVMValueRef attr_number = lp_build_const_int32(gallivm, input_index); 280 281 /* XXX: Handle all possible interpolation modes */ 282 switch (decl->Interp.Interpolate) { 283 case TGSI_INTERPOLATE_COLOR: 284 /* XXX: Flat shading hangs the GPU */ 285 if (si_shader_ctx->rctx->queued.named.rasterizer && 286 si_shader_ctx->rctx->queued.named.rasterizer->flatshade) { 287 #if 0 288 intr_name = "llvm.SI.fs.interp.constant"; 289 #else 290 intr_name = "llvm.SI.fs.interp.linear.center"; 291 #endif 292 } else { 293 if (decl->Interp.Centroid) 294 intr_name = "llvm.SI.fs.interp.persp.centroid"; 295 else 296 intr_name = "llvm.SI.fs.interp.persp.center"; 297 } 298 break; 299 case TGSI_INTERPOLATE_CONSTANT: 300 /* XXX: Flat shading hangs the GPU */ 301 #if 0 302 intr_name = "llvm.SI.fs.interp.constant"; 303 break; 304 #endif 305 case TGSI_INTERPOLATE_LINEAR: 306 if (decl->Interp.Centroid) 307 intr_name = "llvm.SI.fs.interp.linear.centroid"; 308 else 309 intr_name = "llvm.SI.fs.interp.linear.center"; 310 break; 311 case TGSI_INTERPOLATE_PERSPECTIVE: 312 if (decl->Interp.Centroid) 313 intr_name = "llvm.SI.fs.interp.persp.centroid"; 314 else 315 intr_name = "llvm.SI.fs.interp.persp.center"; 316 break; 317 default: 318 fprintf(stderr, "Warning: Unhandled interpolation mode.\n"); 319 return; 320 } 321 322 /* XXX: Could there be more than TGSI_NUM_CHANNELS (4) ? */ 323 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 324 LLVMValueRef args[3]; 325 LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan); 326 unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan); 327 LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context); 328 args[0] = llvm_chan; 329 args[1] = attr_number; 330 args[2] = params; 331 si_shader_ctx->radeon_bld.inputs[soa_index] = 332 build_intrinsic(base->gallivm->builder, intr_name, 333 input_type, args, 3, LLVMReadOnlyAttribute); 334 } 335 } 336 337 static void declare_input( 338 struct radeon_llvm_context * radeon_bld, 339 unsigned input_index, 340 const struct tgsi_full_declaration *decl) 341 { 342 struct si_shader_context * si_shader_ctx = 343 si_shader_context(&radeon_bld->soa.bld_base); 344 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) { 345 declare_input_vs(si_shader_ctx, input_index, decl); 346 } else if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) { 347 declare_input_fs(si_shader_ctx, input_index, decl); 348 } else { 349 fprintf(stderr, "Warning: Unsupported shader type,\n"); 350 } 351 } 352 353 static LLVMValueRef fetch_constant( 354 struct lp_build_tgsi_context * bld_base, 355 const struct tgsi_full_src_register *reg, 356 enum tgsi_opcode_type type, 357 unsigned swizzle) 358 { 359 struct lp_build_context * base = &bld_base->base; 360 361 LLVMValueRef const_ptr; 362 LLVMValueRef offset; 363 LLVMValueRef load; 364 365 /* XXX: Assume the pointer to the constant buffer is being stored in 366 * SGPR[0:1] */ 367 const_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_F32, 0); 368 369 /* XXX: This assumes that the constant buffer is not packed, so 370 * CONST[0].x will have an offset of 0 and CONST[1].x will have an 371 * offset of 4. */ 372 offset = lp_build_const_int32(base->gallivm, 373 (reg->Register.Index * 4) + swizzle); 374 375 load = build_indexed_load(base->gallivm, const_ptr, offset); 376 return bitcast(bld_base, type, load); 377 } 378 379 /* Initialize arguments for the shader export intrinsic */ 380 static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, 381 struct tgsi_full_declaration *d, 382 unsigned index, 383 unsigned target, 384 LLVMValueRef *args) 385 { 386 struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); 387 struct lp_build_context *uint = 388 &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; 389 struct lp_build_context *base = &bld_base->base; 390 unsigned compressed = 0; 391 unsigned chan; 392 393 if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) { 394 int cbuf = target - V_008DFC_SQ_EXP_MRT; 395 396 if (cbuf >= 0 && cbuf < 8) { 397 struct r600_context *rctx = si_shader_ctx->rctx; 398 compressed = (rctx->export_16bpc >> cbuf) & 0x1; 399 } 400 } 401 402 if (compressed) { 403 /* Pixel shader needs to pack output values before export */ 404 for (chan = 0; chan < 2; chan++ ) { 405 LLVMValueRef *out_ptr = 406 si_shader_ctx->radeon_bld.soa.outputs[index]; 407 args[0] = LLVMBuildLoad(base->gallivm->builder, 408 out_ptr[2 * chan], ""); 409 args[1] = LLVMBuildLoad(base->gallivm->builder, 410 out_ptr[2 * chan + 1], ""); 411 args[chan + 5] = 412 build_intrinsic(base->gallivm->builder, 413 "llvm.SI.packf16", 414 LLVMInt32TypeInContext(base->gallivm->context), 415 args, 2, 416 LLVMReadNoneAttribute); 417 args[chan + 7] = args[chan + 5]; 418 } 419 420 /* Set COMPR flag */ 421 args[4] = uint->one; 422 } else { 423 for (chan = 0; chan < 4; chan++ ) { 424 LLVMValueRef out_ptr = 425 si_shader_ctx->radeon_bld.soa.outputs[index][chan]; 426 /* +5 because the first output value will be 427 * the 6th argument to the intrinsic. */ 428 args[chan + 5] = LLVMBuildLoad(base->gallivm->builder, 429 out_ptr, ""); 430 } 431 432 /* Clear COMPR flag */ 433 args[4] = uint->zero; 434 } 435 436 /* XXX: This controls which components of the output 437 * registers actually get exported. (e.g bit 0 means export 438 * X component, bit 1 means export Y component, etc.) I'm 439 * hard coding this to 0xf for now. In the future, we might 440 * want to do something else. */ 441 args[0] = lp_build_const_int32(base->gallivm, 0xf); 442 443 /* Specify whether the EXEC mask represents the valid mask */ 444 args[1] = uint->zero; 445 446 /* Specify whether this is the last export */ 447 args[2] = uint->zero; 448 449 /* Specify the target we are exporting */ 450 args[3] = lp_build_const_int32(base->gallivm, target); 451 452 /* XXX: We probably need to keep track of the output 453 * values, so we know what we are passing to the next 454 * stage. */ 455 } 456 457 /* XXX: This is partially implemented for VS only at this point. It is not complete */ 458 static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) 459 { 460 struct si_shader_context * si_shader_ctx = si_shader_context(bld_base); 461 struct si_shader * shader = &si_shader_ctx->shader->shader; 462 struct lp_build_context * base = &bld_base->base; 463 struct lp_build_context * uint = 464 &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; 465 struct tgsi_parse_context *parse = &si_shader_ctx->parse; 466 LLVMValueRef last_args[9] = { 0 }; 467 unsigned color_count = 0; 468 unsigned param_count = 0; 469 470 while (!tgsi_parse_end_of_tokens(parse)) { 471 struct tgsi_full_declaration *d = 472 &parse->FullToken.FullDeclaration; 473 LLVMValueRef args[9]; 474 unsigned target; 475 unsigned index; 476 int i; 477 478 tgsi_parse_token(parse); 479 if (parse->FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION) 480 continue; 481 482 switch (d->Declaration.File) { 483 case TGSI_FILE_INPUT: 484 i = shader->ninput++; 485 shader->input[i].name = d->Semantic.Name; 486 shader->input[i].sid = d->Semantic.Index; 487 shader->input[i].interpolate = d->Interp.Interpolate; 488 shader->input[i].centroid = d->Interp.Centroid; 489 continue; 490 491 case TGSI_FILE_OUTPUT: 492 i = shader->noutput++; 493 shader->output[i].name = d->Semantic.Name; 494 shader->output[i].sid = d->Semantic.Index; 495 shader->output[i].interpolate = d->Interp.Interpolate; 496 break; 497 498 default: 499 continue; 500 } 501 502 for (index = d->Range.First; index <= d->Range.Last; index++) { 503 /* Select the correct target */ 504 switch(d->Semantic.Name) { 505 case TGSI_SEMANTIC_POSITION: 506 target = V_008DFC_SQ_EXP_POS; 507 break; 508 case TGSI_SEMANTIC_COLOR: 509 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) { 510 target = V_008DFC_SQ_EXP_PARAM + param_count; 511 shader->output[i].param_offset = param_count; 512 param_count++; 513 } else { 514 target = V_008DFC_SQ_EXP_MRT + color_count; 515 color_count++; 516 } 517 break; 518 case TGSI_SEMANTIC_GENERIC: 519 target = V_008DFC_SQ_EXP_PARAM + param_count; 520 shader->output[i].param_offset = param_count; 521 param_count++; 522 break; 523 default: 524 target = 0; 525 fprintf(stderr, 526 "Warning: SI unhandled output type:%d\n", 527 d->Semantic.Name); 528 } 529 530 si_llvm_init_export_args(bld_base, d, index, target, args); 531 532 if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX ? 533 (d->Semantic.Name == TGSI_SEMANTIC_POSITION) : 534 (d->Semantic.Name == TGSI_SEMANTIC_COLOR)) { 535 if (last_args[0]) { 536 lp_build_intrinsic(base->gallivm->builder, 537 "llvm.SI.export", 538 LLVMVoidTypeInContext(base->gallivm->context), 539 last_args, 9); 540 } 541 542 memcpy(last_args, args, sizeof(args)); 543 } else { 544 lp_build_intrinsic(base->gallivm->builder, 545 "llvm.SI.export", 546 LLVMVoidTypeInContext(base->gallivm->context), 547 args, 9); 548 } 549 550 } 551 } 552 553 if (!last_args[0]) { 554 assert(si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT); 555 556 /* Specify which components to enable */ 557 last_args[0] = lp_build_const_int32(base->gallivm, 0x0); 558 559 /* Specify the target we are exporting */ 560 last_args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRT); 561 562 /* Set COMPR flag to zero to export data as 32-bit */ 563 last_args[4] = uint->zero; 564 565 /* dummy bits */ 566 last_args[5]= uint->zero; 567 last_args[6]= uint->zero; 568 last_args[7]= uint->zero; 569 last_args[8]= uint->zero; 570 } 571 572 /* Specify whether the EXEC mask represents the valid mask */ 573 last_args[1] = lp_build_const_int32(base->gallivm, 574 si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT); 575 576 /* Specify that this is the last export */ 577 last_args[2] = lp_build_const_int32(base->gallivm, 1); 578 579 lp_build_intrinsic(base->gallivm->builder, 580 "llvm.SI.export", 581 LLVMVoidTypeInContext(base->gallivm->context), 582 last_args, 9); 583 584 /* XXX: Look up what this function does */ 585 /* ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);*/ 586 } 587 588 static void tex_fetch_args( 589 struct lp_build_tgsi_context * bld_base, 590 struct lp_build_emit_data * emit_data) 591 { 592 const struct tgsi_full_instruction * inst = emit_data->inst; 593 LLVMValueRef ptr; 594 LLVMValueRef offset; 595 596 /* WriteMask */ 597 /* XXX: should be optimized using emit_data->inst->Dst[0].Register.WriteMask*/ 598 emit_data->args[0] = lp_build_const_int32(bld_base->base.gallivm, 0xf); 599 600 /* Coordinates */ 601 /* XXX: Not all sample instructions need 4 address arguments. */ 602 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 603 LLVMValueRef src_w; 604 unsigned chan; 605 LLVMValueRef coords[4]; 606 607 emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4); 608 src_w = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W); 609 610 for (chan = 0; chan < 3; chan++ ) { 611 LLVMValueRef arg = lp_build_emit_fetch(bld_base, 612 emit_data->inst, 0, chan); 613 coords[chan] = lp_build_emit_llvm_binary(bld_base, 614 TGSI_OPCODE_DIV, 615 arg, src_w); 616 } 617 coords[3] = bld_base->base.one; 618 emit_data->args[1] = lp_build_gather_values(bld_base->base.gallivm, 619 coords, 4); 620 } else 621 emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst, 622 0, LP_CHAN_ALL); 623 624 /* Resource */ 625 ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V8I32, 4); 626 offset = lp_build_const_int32(bld_base->base.gallivm, 627 emit_data->inst->Src[1].Register.Index); 628 emit_data->args[2] = build_indexed_load(bld_base->base.gallivm, 629 ptr, offset); 630 631 /* Sampler */ 632 ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V4I32, 2); 633 offset = lp_build_const_int32(bld_base->base.gallivm, 634 emit_data->inst->Src[1].Register.Index); 635 emit_data->args[3] = build_indexed_load(bld_base->base.gallivm, 636 ptr, offset); 637 638 /* Dimensions */ 639 /* XXX: We might want to pass this information to the shader at some. */ 640 /* emit_data->args[4] = lp_build_const_int32(bld_base->base.gallivm, 641 emit_data->inst->Texture.Texture); 642 */ 643 644 emit_data->arg_count = 4; 645 /* XXX: To optimize, we could use a float or v2f32, if the last bits of 646 * the writemask are clear */ 647 emit_data->dst_type = LLVMVectorType( 648 LLVMFloatTypeInContext(bld_base->base.gallivm->context), 649 4); 650 } 651 652 static const struct lp_build_tgsi_action tex_action = { 653 .fetch_args = tex_fetch_args, 654 .emit = lp_build_tgsi_intrinsic, 655 .intr_name = "llvm.SI.sample" 656 }; 657 658 659 int si_pipe_shader_create( 660 struct pipe_context *ctx, 661 struct si_pipe_shader *shader) 662 { 663 struct r600_context *rctx = (struct r600_context*)ctx; 664 struct si_pipe_shader_selector *sel = shader->selector; 665 struct si_shader_context si_shader_ctx; 666 struct tgsi_shader_info shader_info; 667 struct lp_build_tgsi_context * bld_base; 668 LLVMModuleRef mod; 669 unsigned char * inst_bytes; 670 unsigned inst_byte_count; 671 unsigned i; 672 uint32_t *ptr; 673 bool dump; 674 675 dump = debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE); 676 677 memset(&si_shader_ctx.radeon_bld, 0, sizeof(si_shader_ctx.radeon_bld)); 678 radeon_llvm_context_init(&si_shader_ctx.radeon_bld); 679 bld_base = &si_shader_ctx.radeon_bld.soa.bld_base; 680 681 tgsi_scan_shader(sel->tokens, &shader_info); 682 bld_base->info = &shader_info; 683 bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant; 684 bld_base->emit_epilogue = si_llvm_emit_epilogue; 685 686 bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action; 687 bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action; 688 689 si_shader_ctx.radeon_bld.load_input = declare_input; 690 si_shader_ctx.tokens = sel->tokens; 691 tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens); 692 si_shader_ctx.shader = shader; 693 si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor; 694 si_shader_ctx.rctx = rctx; 695 696 shader->shader.nr_cbufs = rctx->framebuffer.nr_cbufs; 697 698 /* Dump TGSI code before doing TGSI->LLVM conversion in case the 699 * conversion fails. */ 700 if (dump) { 701 tgsi_dump(sel->tokens, 0); 702 } 703 704 if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) { 705 fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n"); 706 return -EINVAL; 707 } 708 709 radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld); 710 711 mod = bld_base->base.gallivm->module; 712 if (dump) { 713 LLVMDumpModule(mod); 714 } 715 radeon_llvm_compile(mod, &inst_bytes, &inst_byte_count, "SI", dump); 716 if (dump) { 717 fprintf(stderr, "SI CODE:\n"); 718 for (i = 0; i < inst_byte_count; i+=4 ) { 719 fprintf(stderr, "%02x%02x%02x%02x\n", inst_bytes[i + 3], 720 inst_bytes[i + 2], inst_bytes[i + 1], 721 inst_bytes[i]); 722 } 723 } 724 725 shader->num_sgprs = util_le32_to_cpu(*(uint32_t*)inst_bytes); 726 shader->num_vgprs = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 4)); 727 shader->spi_ps_input_ena = util_le32_to_cpu(*(uint32_t*)(inst_bytes + 8)); 728 729 radeon_llvm_dispose(&si_shader_ctx.radeon_bld); 730 tgsi_parse_free(&si_shader_ctx.parse); 731 732 /* copy new shader */ 733 si_resource_reference(&shader->bo, NULL); 734 shader->bo = si_resource_create_custom(ctx->screen, PIPE_USAGE_IMMUTABLE, 735 inst_byte_count - 12); 736 if (shader->bo == NULL) { 737 return -ENOMEM; 738 } 739 740 ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE); 741 if (0 /*R600_BIG_ENDIAN*/) { 742 for (i = 0; i < (inst_byte_count-12)/4; ++i) { 743 ptr[i] = util_bswap32(*(uint32_t*)(inst_bytes+12 + i*4)); 744 } 745 } else { 746 memcpy(ptr, inst_bytes + 12, inst_byte_count - 12); 747 } 748 rctx->ws->buffer_unmap(shader->bo->cs_buf); 749 750 free(inst_bytes); 751 752 return 0; 753 } 754 755 void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader) 756 { 757 si_resource_reference(&shader->bo, NULL); 758 } 759