1 /************************************************************************** 2 * 3 * Copyright 2010 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 #include "draw_llvm.h" 29 30 #include "draw_context.h" 31 #include "draw_vs.h" 32 #include "draw_gs.h" 33 34 #include "gallivm/lp_bld_arit.h" 35 #include "gallivm/lp_bld_arit_overflow.h" 36 #include "gallivm/lp_bld_bitarit.h" 37 #include "gallivm/lp_bld_gather.h" 38 #include "gallivm/lp_bld_logic.h" 39 #include "gallivm/lp_bld_const.h" 40 #include "gallivm/lp_bld_swizzle.h" 41 #include "gallivm/lp_bld_struct.h" 42 #include "gallivm/lp_bld_type.h" 43 #include "gallivm/lp_bld_flow.h" 44 #include "gallivm/lp_bld_debug.h" 45 #include "gallivm/lp_bld_tgsi.h" 46 #include "gallivm/lp_bld_printf.h" 47 #include "gallivm/lp_bld_intr.h" 48 #include "gallivm/lp_bld_init.h" 49 #include "gallivm/lp_bld_type.h" 50 #include "gallivm/lp_bld_pack.h" 51 #include "gallivm/lp_bld_format.h" 52 53 #include "tgsi/tgsi_exec.h" 54 #include "tgsi/tgsi_dump.h" 55 56 #include "util/u_math.h" 57 #include "util/u_pointer.h" 58 #include "util/u_string.h" 59 #include "util/simple_list.h" 60 61 62 #define DEBUG_STORE 0 63 64 65 static void 66 draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var); 67 68 69 struct draw_gs_llvm_iface { 70 struct lp_build_tgsi_gs_iface base; 71 72 struct draw_gs_llvm_variant *variant; 73 LLVMValueRef input; 74 }; 75 76 static inline const struct draw_gs_llvm_iface * 77 draw_gs_llvm_iface(const struct lp_build_tgsi_gs_iface *iface) 78 { 79 return (const struct draw_gs_llvm_iface *)iface; 80 } 81 82 /** 83 * Create LLVM type for draw_vertex_buffer. 84 */ 85 static LLVMTypeRef 86 create_jit_dvbuffer_type(struct gallivm_state *gallivm, 87 const char *struct_name) 88 { 89 LLVMTargetDataRef target = gallivm->target; 90 LLVMTypeRef dvbuffer_type; 91 LLVMTypeRef elem_types[DRAW_JIT_DVBUFFER_NUM_FIELDS]; 92 LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context); 93 94 elem_types[DRAW_JIT_DVBUFFER_MAP] = 95 LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 8), 0); 96 elem_types[DRAW_JIT_DVBUFFER_SIZE] = int32_type; 97 98 dvbuffer_type = LLVMStructTypeInContext(gallivm->context, elem_types, 99 ARRAY_SIZE(elem_types), 0); 100 101 (void) target; /* silence unused var warning for non-debug build */ 102 LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, map, 103 target, dvbuffer_type, 104 DRAW_JIT_DVBUFFER_MAP); 105 LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, size, 106 target, dvbuffer_type, 107 DRAW_JIT_DVBUFFER_SIZE); 108 109 return dvbuffer_type; 110 } 111 112 /** 113 * Create LLVM type for struct draw_jit_texture 114 */ 115 static LLVMTypeRef 116 create_jit_texture_type(struct gallivm_state *gallivm, const char *struct_name) 117 { 118 LLVMTargetDataRef target = gallivm->target; 119 LLVMTypeRef texture_type; 120 LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS]; 121 LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context); 122 123 elem_types[DRAW_JIT_TEXTURE_WIDTH] = 124 elem_types[DRAW_JIT_TEXTURE_HEIGHT] = 125 elem_types[DRAW_JIT_TEXTURE_DEPTH] = 126 elem_types[DRAW_JIT_TEXTURE_FIRST_LEVEL] = 127 elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = int32_type; 128 elem_types[DRAW_JIT_TEXTURE_BASE] = 129 LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); 130 elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] = 131 elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] = 132 elem_types[DRAW_JIT_TEXTURE_MIP_OFFSETS] = 133 LLVMArrayType(int32_type, PIPE_MAX_TEXTURE_LEVELS); 134 135 texture_type = LLVMStructTypeInContext(gallivm->context, elem_types, 136 ARRAY_SIZE(elem_types), 0); 137 138 (void) target; /* silence unused var warning for non-debug build */ 139 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width, 140 target, texture_type, 141 DRAW_JIT_TEXTURE_WIDTH); 142 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height, 143 target, texture_type, 144 DRAW_JIT_TEXTURE_HEIGHT); 145 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth, 146 target, texture_type, 147 DRAW_JIT_TEXTURE_DEPTH); 148 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, first_level, 149 target, texture_type, 150 DRAW_JIT_TEXTURE_FIRST_LEVEL); 151 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level, 152 target, texture_type, 153 DRAW_JIT_TEXTURE_LAST_LEVEL); 154 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, base, 155 target, texture_type, 156 DRAW_JIT_TEXTURE_BASE); 157 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride, 158 target, texture_type, 159 DRAW_JIT_TEXTURE_ROW_STRIDE); 160 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride, 161 target, texture_type, 162 DRAW_JIT_TEXTURE_IMG_STRIDE); 163 LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, mip_offsets, 164 target, texture_type, 165 DRAW_JIT_TEXTURE_MIP_OFFSETS); 166 167 LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, target, texture_type); 168 169 return texture_type; 170 } 171 172 173 /** 174 * Create LLVM type for struct draw_jit_sampler 175 */ 176 static LLVMTypeRef 177 create_jit_sampler_type(struct gallivm_state *gallivm, const char *struct_name) 178 { 179 LLVMTargetDataRef target = gallivm->target; 180 LLVMTypeRef sampler_type; 181 LLVMTypeRef elem_types[DRAW_JIT_SAMPLER_NUM_FIELDS]; 182 183 elem_types[DRAW_JIT_SAMPLER_MIN_LOD] = 184 elem_types[DRAW_JIT_SAMPLER_MAX_LOD] = 185 elem_types[DRAW_JIT_SAMPLER_LOD_BIAS] = LLVMFloatTypeInContext(gallivm->context); 186 elem_types[DRAW_JIT_SAMPLER_BORDER_COLOR] = 187 LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4); 188 189 sampler_type = LLVMStructTypeInContext(gallivm->context, elem_types, 190 ARRAY_SIZE(elem_types), 0); 191 192 (void) target; /* silence unused var warning for non-debug build */ 193 LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, min_lod, 194 target, sampler_type, 195 DRAW_JIT_SAMPLER_MIN_LOD); 196 LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, max_lod, 197 target, sampler_type, 198 DRAW_JIT_SAMPLER_MAX_LOD); 199 LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, lod_bias, 200 target, sampler_type, 201 DRAW_JIT_SAMPLER_LOD_BIAS); 202 LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, border_color, 203 target, sampler_type, 204 DRAW_JIT_SAMPLER_BORDER_COLOR); 205 206 LP_CHECK_STRUCT_SIZE(struct draw_jit_sampler, target, sampler_type); 207 208 return sampler_type; 209 } 210 211 212 /** 213 * Create LLVM type for struct draw_jit_context 214 */ 215 static LLVMTypeRef 216 create_jit_context_type(struct gallivm_state *gallivm, 217 LLVMTypeRef texture_type, LLVMTypeRef sampler_type, 218 const char *struct_name) 219 { 220 LLVMTargetDataRef target = gallivm->target; 221 LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context); 222 LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context); 223 LLVMTypeRef elem_types[DRAW_JIT_CTX_NUM_FIELDS]; 224 LLVMTypeRef context_type; 225 226 elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* vs_constants */ 227 LP_MAX_TGSI_CONST_BUFFERS); 228 elem_types[1] = LLVMArrayType(int_type, /* num_vs_constants */ 229 LP_MAX_TGSI_CONST_BUFFERS); 230 elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4), 231 DRAW_TOTAL_CLIP_PLANES), 0); 232 elem_types[3] = LLVMPointerType(float_type, 0); /* viewports */ 233 elem_types[4] = LLVMArrayType(texture_type, 234 PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */ 235 elem_types[5] = LLVMArrayType(sampler_type, 236 PIPE_MAX_SAMPLERS); /* samplers */ 237 context_type = LLVMStructTypeInContext(gallivm->context, elem_types, 238 ARRAY_SIZE(elem_types), 0); 239 240 (void) target; /* silence unused var warning for non-debug build */ 241 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants, 242 target, context_type, DRAW_JIT_CTX_CONSTANTS); 243 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, num_vs_constants, 244 target, context_type, DRAW_JIT_CTX_NUM_CONSTANTS); 245 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes, 246 target, context_type, DRAW_JIT_CTX_PLANES); 247 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, viewports, 248 target, context_type, DRAW_JIT_CTX_VIEWPORT); 249 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures, 250 target, context_type, 251 DRAW_JIT_CTX_TEXTURES); 252 LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, samplers, 253 target, context_type, 254 DRAW_JIT_CTX_SAMPLERS); 255 LP_CHECK_STRUCT_SIZE(struct draw_jit_context, 256 target, context_type); 257 258 return context_type; 259 } 260 261 262 /** 263 * Create LLVM type for struct draw_gs_jit_context 264 */ 265 static LLVMTypeRef 266 create_gs_jit_context_type(struct gallivm_state *gallivm, 267 unsigned vector_length, 268 LLVMTypeRef texture_type, LLVMTypeRef sampler_type, 269 const char *struct_name) 270 { 271 LLVMTargetDataRef target = gallivm->target; 272 LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context); 273 LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context); 274 LLVMTypeRef elem_types[DRAW_GS_JIT_CTX_NUM_FIELDS]; 275 LLVMTypeRef context_type; 276 277 elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* constants */ 278 LP_MAX_TGSI_CONST_BUFFERS); 279 elem_types[1] = LLVMArrayType(int_type, /* num_constants */ 280 LP_MAX_TGSI_CONST_BUFFERS); 281 elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4), 282 DRAW_TOTAL_CLIP_PLANES), 0); 283 elem_types[3] = LLVMPointerType(float_type, 0); /* viewports */ 284 285 elem_types[4] = LLVMArrayType(texture_type, 286 PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */ 287 elem_types[5] = LLVMArrayType(sampler_type, 288 PIPE_MAX_SAMPLERS); /* samplers */ 289 290 elem_types[6] = LLVMPointerType(LLVMPointerType(int_type, 0), 0); 291 elem_types[7] = LLVMPointerType(LLVMVectorType(int_type, 292 vector_length), 0); 293 elem_types[8] = LLVMPointerType(LLVMVectorType(int_type, 294 vector_length), 0); 295 296 context_type = LLVMStructTypeInContext(gallivm->context, elem_types, 297 ARRAY_SIZE(elem_types), 0); 298 299 (void) target; /* silence unused var warning for non-debug build */ 300 LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, constants, 301 target, context_type, DRAW_GS_JIT_CTX_CONSTANTS); 302 LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, num_constants, 303 target, context_type, DRAW_GS_JIT_CTX_NUM_CONSTANTS); 304 LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, planes, 305 target, context_type, DRAW_GS_JIT_CTX_PLANES); 306 LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, viewports, 307 target, context_type, DRAW_GS_JIT_CTX_VIEWPORT); 308 LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, textures, 309 target, context_type, 310 DRAW_GS_JIT_CTX_TEXTURES); 311 LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, samplers, 312 target, context_type, 313 DRAW_GS_JIT_CTX_SAMPLERS); 314 LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, prim_lengths, 315 target, context_type, 316 DRAW_GS_JIT_CTX_PRIM_LENGTHS); 317 LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_vertices, 318 target, context_type, 319 DRAW_GS_JIT_CTX_EMITTED_VERTICES); 320 LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_prims, 321 target, context_type, 322 DRAW_GS_JIT_CTX_EMITTED_PRIMS); 323 LP_CHECK_STRUCT_SIZE(struct draw_gs_jit_context, 324 target, context_type); 325 326 return context_type; 327 } 328 329 330 static LLVMTypeRef 331 create_gs_jit_input_type(struct gallivm_state *gallivm) 332 { 333 LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context); 334 LLVMTypeRef input_array; 335 336 input_array = LLVMVectorType(float_type, TGSI_NUM_CHANNELS); /* num primitives */ 337 input_array = LLVMArrayType(input_array, TGSI_NUM_CHANNELS); /* num channels */ 338 input_array = LLVMArrayType(input_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */ 339 input_array = LLVMPointerType(input_array, 0); /* num vertices per prim */ 340 341 return input_array; 342 } 343 344 /** 345 * Create LLVM type for struct pipe_vertex_buffer 346 */ 347 static LLVMTypeRef 348 create_jit_vertex_buffer_type(struct gallivm_state *gallivm, 349 const char *struct_name) 350 { 351 LLVMTargetDataRef target = gallivm->target; 352 LLVMTypeRef elem_types[4]; 353 LLVMTypeRef vb_type; 354 355 elem_types[0] = 356 elem_types[1] = LLVMInt32TypeInContext(gallivm->context); 357 elem_types[2] = 358 elem_types[3] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); 359 360 vb_type = LLVMStructTypeInContext(gallivm->context, elem_types, 361 ARRAY_SIZE(elem_types), 0); 362 363 (void) target; /* silence unused var warning for non-debug build */ 364 LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride, 365 target, vb_type, 0); 366 LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset, 367 target, vb_type, 1); 368 369 LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, target, vb_type); 370 371 return vb_type; 372 } 373 374 375 /** 376 * Create LLVM type for struct vertex_header; 377 */ 378 static LLVMTypeRef 379 create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems) 380 { 381 LLVMTargetDataRef target = gallivm->target; 382 LLVMTypeRef elem_types[3]; 383 LLVMTypeRef vertex_header; 384 char struct_name[24]; 385 386 util_snprintf(struct_name, 23, "vertex_header%d", data_elems); 387 388 elem_types[DRAW_JIT_VERTEX_VERTEX_ID] = LLVMIntTypeInContext(gallivm->context, 32); 389 elem_types[DRAW_JIT_VERTEX_CLIP_POS] = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4); 390 elem_types[DRAW_JIT_VERTEX_DATA] = LLVMArrayType(elem_types[1], data_elems); 391 392 vertex_header = LLVMStructTypeInContext(gallivm->context, elem_types, 393 ARRAY_SIZE(elem_types), 0); 394 395 /* these are bit-fields and we can't take address of them 396 LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask, 397 target, vertex_header, 398 DRAW_JIT_VERTEX_CLIPMASK); 399 LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag, 400 target, vertex_header, 401 DRAW_JIT_VERTEX_EDGEFLAG); 402 LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad, 403 target, vertex_header, 404 DRAW_JIT_VERTEX_PAD); 405 LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id, 406 target, vertex_header, 407 DRAW_JIT_VERTEX_VERTEX_ID); 408 */ 409 (void) target; /* silence unused var warning for non-debug build */ 410 LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip_pos, 411 target, vertex_header, 412 DRAW_JIT_VERTEX_CLIP_POS); 413 LP_CHECK_MEMBER_OFFSET(struct vertex_header, data, 414 target, vertex_header, 415 DRAW_JIT_VERTEX_DATA); 416 417 assert(LLVMABISizeOfType(target, vertex_header) == 418 offsetof(struct vertex_header, data[data_elems])); 419 420 return vertex_header; 421 } 422 423 424 /** 425 * Create LLVM types for various structures. 426 */ 427 static void 428 create_jit_types(struct draw_llvm_variant *variant) 429 { 430 struct gallivm_state *gallivm = variant->gallivm; 431 LLVMTypeRef texture_type, sampler_type, context_type, buffer_type, 432 vb_type; 433 434 texture_type = create_jit_texture_type(gallivm, "texture"); 435 sampler_type = create_jit_sampler_type(gallivm, "sampler"); 436 437 context_type = create_jit_context_type(gallivm, texture_type, sampler_type, 438 "draw_jit_context"); 439 variant->context_ptr_type = LLVMPointerType(context_type, 0); 440 441 buffer_type = create_jit_dvbuffer_type(gallivm, "draw_vertex_buffer"); 442 variant->buffer_ptr_type = LLVMPointerType(buffer_type, 0); 443 444 vb_type = create_jit_vertex_buffer_type(gallivm, "pipe_vertex_buffer"); 445 variant->vb_ptr_type = LLVMPointerType(vb_type, 0); 446 } 447 448 449 static LLVMTypeRef 450 get_context_ptr_type(struct draw_llvm_variant *variant) 451 { 452 if (!variant->context_ptr_type) 453 create_jit_types(variant); 454 return variant->context_ptr_type; 455 } 456 457 458 static LLVMTypeRef 459 get_buffer_ptr_type(struct draw_llvm_variant *variant) 460 { 461 if (!variant->buffer_ptr_type) 462 create_jit_types(variant); 463 return variant->buffer_ptr_type; 464 } 465 466 467 static LLVMTypeRef 468 get_vb_ptr_type(struct draw_llvm_variant *variant) 469 { 470 if (!variant->vb_ptr_type) 471 create_jit_types(variant); 472 return variant->vb_ptr_type; 473 } 474 475 static LLVMTypeRef 476 get_vertex_header_ptr_type(struct draw_llvm_variant *variant) 477 { 478 if (!variant->vertex_header_ptr_type) 479 create_jit_types(variant); 480 return variant->vertex_header_ptr_type; 481 } 482 483 484 /** 485 * Create per-context LLVM info. 486 */ 487 struct draw_llvm * 488 draw_llvm_create(struct draw_context *draw, LLVMContextRef context) 489 { 490 struct draw_llvm *llvm; 491 492 if (!lp_build_init()) 493 return NULL; 494 495 llvm = CALLOC_STRUCT( draw_llvm ); 496 if (!llvm) 497 return NULL; 498 499 llvm->draw = draw; 500 501 llvm->context = context; 502 if (!llvm->context) { 503 llvm->context = LLVMContextCreate(); 504 llvm->context_owned = true; 505 } 506 if (!llvm->context) 507 goto fail; 508 509 llvm->nr_variants = 0; 510 make_empty_list(&llvm->vs_variants_list); 511 512 llvm->nr_gs_variants = 0; 513 make_empty_list(&llvm->gs_variants_list); 514 515 return llvm; 516 517 fail: 518 draw_llvm_destroy(llvm); 519 return NULL; 520 } 521 522 523 /** 524 * Free per-context LLVM info. 525 */ 526 void 527 draw_llvm_destroy(struct draw_llvm *llvm) 528 { 529 if (llvm->context_owned) 530 LLVMContextDispose(llvm->context); 531 llvm->context = NULL; 532 533 /* XXX free other draw_llvm data? */ 534 FREE(llvm); 535 } 536 537 538 /** 539 * Create LLVM-generated code for a vertex shader. 540 */ 541 struct draw_llvm_variant * 542 draw_llvm_create_variant(struct draw_llvm *llvm, 543 unsigned num_inputs, 544 const struct draw_llvm_variant_key *key) 545 { 546 struct draw_llvm_variant *variant; 547 struct llvm_vertex_shader *shader = 548 llvm_vertex_shader(llvm->draw->vs.vertex_shader); 549 LLVMTypeRef vertex_header; 550 char module_name[64]; 551 552 variant = MALLOC(sizeof *variant + 553 shader->variant_key_size - 554 sizeof variant->key); 555 if (!variant) 556 return NULL; 557 558 variant->llvm = llvm; 559 variant->shader = shader; 560 561 util_snprintf(module_name, sizeof(module_name), "draw_llvm_vs_variant%u", 562 variant->shader->variants_cached); 563 564 variant->gallivm = gallivm_create(module_name, llvm->context); 565 566 create_jit_types(variant); 567 568 memcpy(&variant->key, key, shader->variant_key_size); 569 570 if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) { 571 tgsi_dump(llvm->draw->vs.vertex_shader->state.tokens, 0); 572 draw_llvm_dump_variant_key(&variant->key); 573 } 574 575 vertex_header = create_jit_vertex_header(variant->gallivm, num_inputs); 576 577 variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0); 578 579 draw_llvm_generate(llvm, variant); 580 581 gallivm_compile_module(variant->gallivm); 582 583 variant->jit_func = (draw_jit_vert_func) 584 gallivm_jit_function(variant->gallivm, variant->function); 585 586 gallivm_free_ir(variant->gallivm); 587 588 variant->list_item_global.base = variant; 589 variant->list_item_local.base = variant; 590 /*variant->no = */shader->variants_created++; 591 variant->list_item_global.base = variant; 592 593 return variant; 594 } 595 596 597 static void 598 generate_vs(struct draw_llvm_variant *variant, 599 LLVMBuilderRef builder, 600 struct lp_type vs_type, 601 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], 602 const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS], 603 const struct lp_bld_tgsi_system_values *system_values, 604 LLVMValueRef context_ptr, 605 struct lp_build_sampler_soa *draw_sampler, 606 boolean clamp_vertex_color) 607 { 608 struct draw_llvm *llvm = variant->llvm; 609 const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens; 610 LLVMValueRef consts_ptr = 611 draw_jit_context_vs_constants(variant->gallivm, context_ptr); 612 LLVMValueRef num_consts_ptr = 613 draw_jit_context_num_vs_constants(variant->gallivm, context_ptr); 614 615 lp_build_tgsi_soa(variant->gallivm, 616 tokens, 617 vs_type, 618 NULL /*struct lp_build_mask_context *mask*/, 619 consts_ptr, 620 num_consts_ptr, 621 system_values, 622 inputs, 623 outputs, 624 context_ptr, 625 NULL, 626 draw_sampler, 627 &llvm->draw->vs.vertex_shader->info, 628 NULL); 629 630 { 631 LLVMValueRef out; 632 unsigned chan, attrib; 633 struct lp_build_context bld; 634 struct tgsi_shader_info* info = &llvm->draw->vs.vertex_shader->info; 635 lp_build_context_init(&bld, variant->gallivm, vs_type); 636 637 for (attrib = 0; attrib < info->num_outputs; ++attrib) { 638 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { 639 if (outputs[attrib][chan]) { 640 switch (info->output_semantic_name[attrib]) { 641 case TGSI_SEMANTIC_COLOR: 642 case TGSI_SEMANTIC_BCOLOR: 643 if (clamp_vertex_color) { 644 out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); 645 out = lp_build_clamp(&bld, out, bld.zero, bld.one); 646 LLVMBuildStore(builder, out, outputs[attrib][chan]); 647 } 648 break; 649 } 650 } 651 } 652 } 653 } 654 } 655 656 657 static void 658 fetch_instanced(struct gallivm_state *gallivm, 659 const struct util_format_description *format_desc, 660 struct lp_type vs_type, 661 LLVMValueRef vb_stride, 662 LLVMValueRef map_ptr, 663 LLVMValueRef buffer_size_adj, 664 LLVMValueRef *inputs, 665 LLVMValueRef index) 666 { 667 LLVMTypeRef i32_t = LLVMInt32TypeInContext(gallivm->context); 668 LLVMTypeRef aosf_t, aosi_t; 669 LLVMValueRef zero = LLVMConstNull(i32_t); 670 LLVMBuilderRef builder = gallivm->builder; 671 LLVMValueRef stride, buffer_overflowed, aos, index_valid; 672 unsigned i; 673 674 aosf_t = lp_build_vec_type(gallivm, lp_float32_vec4_type()); 675 aosi_t = lp_build_vec_type(gallivm, lp_int32_vec4_type()); 676 677 /* This mul can overflow. Wraparound is ok. */ 678 stride = LLVMBuildMul(builder, vb_stride, index, ""); 679 680 buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGE, 681 stride, buffer_size_adj, 682 "buffer_overflowed"); 683 684 if (0) { 685 lp_build_print_value(gallivm, " instance index = ", index); 686 lp_build_print_value(gallivm, " buffer overflowed = ", buffer_overflowed); 687 } 688 689 index_valid = LLVMBuildNot(builder, buffer_overflowed, ""); 690 index_valid = LLVMBuildSExt(builder, index_valid, i32_t, ""); 691 stride = LLVMBuildAnd(builder, stride, index_valid, ""); 692 693 aos = lp_build_fetch_rgba_aos(gallivm, 694 format_desc, 695 lp_float32_vec4_type(), 696 FALSE, 697 map_ptr, 698 stride, zero, zero, 699 NULL); 700 701 index_valid = lp_build_broadcast(gallivm, aosi_t, index_valid); 702 aos = LLVMBuildBitCast(builder, aos, aosi_t, ""); 703 aos = LLVMBuildAnd(builder, aos, index_valid, ""); 704 aos = LLVMBuildBitCast(builder, aos, aosf_t, ""); 705 706 for (i = 0; i < TGSI_NUM_CHANNELS; i++) { 707 LLVMValueRef index = lp_build_const_int32(gallivm, i); 708 inputs[i] = lp_build_extract_broadcast(gallivm, 709 lp_float32_vec4_type(), 710 vs_type, aos, index); 711 } 712 } 713 714 715 static void 716 fetch_vector(struct gallivm_state *gallivm, 717 const struct util_format_description *format_desc, 718 struct lp_type vs_type, 719 LLVMValueRef vb_stride, 720 LLVMValueRef map_ptr, 721 LLVMValueRef buffer_size_adj, 722 LLVMValueRef *inputs, 723 LLVMValueRef indices) 724 { 725 LLVMBuilderRef builder = gallivm->builder; 726 struct lp_build_context blduivec; 727 struct lp_type fetch_type = vs_type; 728 LLVMValueRef offset, valid_mask; 729 unsigned i; 730 731 lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type)); 732 733 vb_stride = lp_build_broadcast_scalar(&blduivec, vb_stride); 734 buffer_size_adj = lp_build_broadcast_scalar(&blduivec, buffer_size_adj); 735 736 /* This mul can overflow. Wraparound is ok. */ 737 offset = lp_build_mul(&blduivec, vb_stride, indices); 738 739 valid_mask = lp_build_compare(gallivm, blduivec.type, 740 PIPE_FUNC_LESS, offset, buffer_size_adj); 741 742 /* not valid elements use offset 0 */ 743 offset = LLVMBuildAnd(builder, offset, valid_mask, ""); 744 745 if (0) { 746 lp_build_print_value(gallivm, " indices = ", indices); 747 lp_build_print_value(gallivm, " offsets = ", offset); 748 lp_build_print_value(gallivm, " valid_mask = ", valid_mask); 749 } 750 751 /* 752 * Unlike fetch_instanced, use SoA fetch instead of multiple AoS fetches. 753 * This should always produce better code. 754 */ 755 756 /* The type handling is annoying here... */ 757 if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB && 758 format_desc->channel[0].pure_integer) { 759 if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { 760 fetch_type = lp_type_int_vec(vs_type.width, vs_type.width * vs_type.length); 761 } 762 else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) { 763 fetch_type = lp_type_uint_vec(vs_type.width, vs_type.width * vs_type.length); 764 } 765 } 766 767 lp_build_fetch_rgba_soa(gallivm, format_desc, 768 fetch_type, FALSE, map_ptr, offset, 769 blduivec.zero, blduivec.zero, 770 NULL, inputs); 771 772 for (i = 0; i < TGSI_NUM_CHANNELS; i++) { 773 inputs[i] = LLVMBuildBitCast(builder, inputs[i], 774 lp_build_vec_type(gallivm, vs_type), ""); 775 } 776 777 /* out-of-bound fetches return all zeros */ 778 for (i = 0; i < TGSI_NUM_CHANNELS; i++) { 779 inputs[i] = LLVMBuildBitCast(builder, inputs[i], blduivec.vec_type, ""); 780 inputs[i] = LLVMBuildAnd(builder, inputs[i], valid_mask, ""); 781 inputs[i] = LLVMBuildBitCast(builder, inputs[i], 782 lp_build_vec_type(gallivm, vs_type), ""); 783 } 784 } 785 786 787 static void 788 store_aos(struct gallivm_state *gallivm, 789 LLVMValueRef io_ptr, 790 LLVMValueRef index, 791 LLVMValueRef value) 792 { 793 LLVMTypeRef data_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, lp_float32_vec4_type()), 0); 794 LLVMBuilderRef builder = gallivm->builder; 795 LLVMValueRef data_ptr = draw_jit_header_data(gallivm, io_ptr); 796 LLVMValueRef indices[3]; 797 798 indices[0] = lp_build_const_int32(gallivm, 0); 799 indices[1] = index; 800 indices[2] = lp_build_const_int32(gallivm, 0); 801 802 data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, ""); 803 data_ptr = LLVMBuildPointerCast(builder, data_ptr, data_ptr_type, ""); 804 805 #if DEBUG_STORE 806 lp_build_printf(gallivm, " ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr); 807 #endif 808 809 /* Unaligned store due to the vertex header */ 810 LLVMSetAlignment(LLVMBuildStore(builder, value, data_ptr), sizeof(float)); 811 } 812 813 /** 814 * Adjust the mask to architecture endianess. The mask will the store in struct: 815 * 816 * struct vertex_header { 817 * unsigned clipmask:DRAW_TOTAL_CLIP_PLANES; 818 * unsigned edgeflag:1; 819 * unsigned pad:1; 820 * unsigned vertex_id:16; 821 * [...] 822 * } 823 * 824 * On little-endian machine nothing needs to done, however on bit-endian machine 825 * the mask's fields need to be adjusted with the algorithm: 826 * 827 * uint32_t reverse (uint32_t x) 828 * { 829 * return (x >> 16) | // vertex_id 830 * ((x & 0x3fff) << 18) | // clipmask 831 * ((x & 0x4000) << 3) | // pad 832 * ((x & 0x8000) << 1); // edgeflag 833 * } 834 */ 835 static LLVMValueRef 836 adjust_mask(struct gallivm_state *gallivm, 837 LLVMValueRef mask) 838 { 839 #ifdef PIPE_ARCH_BIG_ENDIAN 840 LLVMBuilderRef builder = gallivm->builder; 841 LLVMValueRef vertex_id; 842 LLVMValueRef clipmask; 843 LLVMValueRef pad; 844 LLVMValueRef edgeflag; 845 846 vertex_id = LLVMBuildLShr(builder, mask, lp_build_const_int32(gallivm, 16), ""); 847 clipmask = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x3fff), ""); 848 clipmask = LLVMBuildShl(builder, clipmask, lp_build_const_int32(gallivm, 18), ""); 849 if (0) { 850 pad = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x4000), ""); 851 pad = LLVMBuildShl(builder, pad, lp_build_const_int32(gallivm, 3), ""); 852 } 853 edgeflag = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x8000), ""); 854 edgeflag = LLVMBuildShl(builder, edgeflag, lp_build_const_int32(gallivm, 1), ""); 855 856 mask = LLVMBuildOr(builder, vertex_id, clipmask, ""); 857 if (0) { 858 mask = LLVMBuildOr(builder, mask, pad, ""); 859 } 860 mask = LLVMBuildOr(builder, mask, edgeflag, ""); 861 #endif 862 return mask; 863 } 864 865 static void 866 store_aos_array(struct gallivm_state *gallivm, 867 struct lp_type soa_type, 868 LLVMValueRef io_ptr, 869 LLVMValueRef *indices, 870 LLVMValueRef* aos, 871 int attrib, 872 int num_outputs, 873 LLVMValueRef clipmask, 874 boolean need_edgeflag) 875 { 876 LLVMBuilderRef builder = gallivm->builder; 877 LLVMValueRef attr_index = lp_build_const_int32(gallivm, attrib); 878 LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32]; 879 LLVMValueRef linear_inds[LP_MAX_VECTOR_WIDTH / 32]; 880 LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32]; 881 int vector_length = soa_type.length; 882 int i; 883 884 debug_assert(TGSI_NUM_CHANNELS == 4); 885 886 for (i = 0; i < vector_length; i++) { 887 linear_inds[i] = lp_build_const_int32(gallivm, i); 888 if (indices) { 889 inds[i] = indices[i]; 890 } else { 891 inds[i] = linear_inds[i]; 892 } 893 io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, ""); 894 } 895 896 if (attrib == 0) { 897 /* store vertex header for each of the n vertices */ 898 LLVMValueRef val, cliptmp; 899 int vertex_id_pad_edgeflag; 900 901 /* If this assertion fails, it means we need to update the bit twidding 902 * code here. See struct vertex_header in draw_private.h. 903 */ 904 assert(DRAW_TOTAL_CLIP_PLANES==14); 905 /* initialize vertex id:16 = 0xffff, pad:1 = 0, edgeflag:1 = 1 */ 906 if (!need_edgeflag) { 907 vertex_id_pad_edgeflag = (0xffff << 16) | (1 << DRAW_TOTAL_CLIP_PLANES); 908 } 909 else { 910 vertex_id_pad_edgeflag = (0xffff << 16); 911 } 912 val = lp_build_const_int_vec(gallivm, lp_int_type(soa_type), 913 vertex_id_pad_edgeflag); 914 /* OR with the clipmask */ 915 cliptmp = LLVMBuildOr(builder, val, clipmask, ""); 916 for (i = 0; i < vector_length; i++) { 917 LLVMValueRef id_ptr = draw_jit_header_id(gallivm, io_ptrs[i]); 918 val = LLVMBuildExtractElement(builder, cliptmp, linear_inds[i], ""); 919 val = adjust_mask(gallivm, val); 920 #if DEBUG_STORE 921 lp_build_printf(gallivm, "io = %p, index %d, clipmask = %x\n", 922 io_ptrs[i], inds[i], val); 923 #endif 924 LLVMBuildStore(builder, val, id_ptr); 925 } 926 } 927 928 /* store for each of the n vertices */ 929 for (i = 0; i < vector_length; i++) { 930 store_aos(gallivm, io_ptrs[i], attr_index, aos[i]); 931 } 932 } 933 934 935 static void 936 convert_to_aos(struct gallivm_state *gallivm, 937 LLVMValueRef io, 938 LLVMValueRef *indices, 939 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], 940 LLVMValueRef clipmask, 941 int num_outputs, 942 struct lp_type soa_type, 943 boolean need_edgeflag) 944 { 945 LLVMBuilderRef builder = gallivm->builder; 946 unsigned chan, attrib, i; 947 948 #if DEBUG_STORE 949 lp_build_printf(gallivm, " # storing begin\n"); 950 #endif 951 for (attrib = 0; attrib < num_outputs; ++attrib) { 952 LLVMValueRef soa[TGSI_NUM_CHANNELS]; 953 LLVMValueRef aos[LP_MAX_VECTOR_WIDTH / 32]; 954 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { 955 if (outputs[attrib][chan]) { 956 LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); 957 lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]); 958 #if DEBUG_STORE 959 lp_build_printf(gallivm, "output %d : %d ", 960 LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), 961 attrib, 0), 962 LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), 963 chan, 0)); 964 lp_build_print_value(gallivm, "val = ", out); 965 { 966 LLVMValueRef iv = 967 LLVMBuildBitCast(builder, out, lp_build_int_vec_type(gallivm, soa_type), ""); 968 969 lp_build_print_value(gallivm, " ival = ", iv); 970 } 971 #endif 972 soa[chan] = out; 973 } 974 else { 975 soa[chan] = 0; 976 } 977 } 978 979 980 if (soa_type.length == TGSI_NUM_CHANNELS) { 981 lp_build_transpose_aos(gallivm, soa_type, soa, aos); 982 } else { 983 lp_build_transpose_aos(gallivm, soa_type, soa, soa); 984 985 for (i = 0; i < soa_type.length; ++i) { 986 aos[i] = lp_build_extract_range(gallivm, 987 soa[i % TGSI_NUM_CHANNELS], 988 (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS, 989 TGSI_NUM_CHANNELS); 990 } 991 } 992 993 store_aos_array(gallivm, 994 soa_type, 995 io, indices, 996 aos, 997 attrib, 998 num_outputs, 999 clipmask, 1000 need_edgeflag); 1001 } 1002 #if DEBUG_STORE 1003 lp_build_printf(gallivm, " # storing end\n"); 1004 #endif 1005 } 1006 1007 1008 /** 1009 * Stores original vertex positions in clip coordinates 1010 */ 1011 static void 1012 store_clip(struct gallivm_state *gallivm, 1013 const struct lp_type vs_type, 1014 LLVMValueRef io_ptr, 1015 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], 1016 int idx) 1017 { 1018 LLVMBuilderRef builder = gallivm->builder; 1019 LLVMValueRef soa[4]; 1020 LLVMValueRef aos[LP_MAX_VECTOR_LENGTH]; 1021 LLVMValueRef indices[2]; 1022 LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32]; 1023 LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32]; 1024 LLVMValueRef clip_ptrs[LP_MAX_VECTOR_WIDTH / 32]; 1025 LLVMTypeRef clip_ptr_type = 1026 LLVMPointerType(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 1027 4), 0); 1028 int i, j; 1029 1030 indices[0] = 1031 indices[1] = lp_build_const_int32(gallivm, 0); 1032 1033 for (i = 0; i < vs_type.length; i++) { 1034 inds[i] = lp_build_const_int32(gallivm, i); 1035 io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, ""); 1036 } 1037 1038 soa[0] = LLVMBuildLoad(builder, outputs[idx][0], ""); /*x0 x1 .. xn*/ 1039 soa[1] = LLVMBuildLoad(builder, outputs[idx][1], ""); /*y0 y1 .. yn*/ 1040 soa[2] = LLVMBuildLoad(builder, outputs[idx][2], ""); /*z0 z1 .. zn*/ 1041 soa[3] = LLVMBuildLoad(builder, outputs[idx][3], ""); /*w0 w1 .. wn*/ 1042 1043 for (i = 0; i < vs_type.length; i++) { 1044 clip_ptrs[i] = draw_jit_header_clip_pos(gallivm, io_ptrs[i]); 1045 } 1046 1047 lp_build_transpose_aos(gallivm, vs_type, soa, soa); 1048 for (i = 0; i < vs_type.length; ++i) { 1049 aos[i] = lp_build_extract_range(gallivm, 1050 soa[i % TGSI_NUM_CHANNELS], 1051 (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS, 1052 TGSI_NUM_CHANNELS); 1053 } 1054 1055 for (j = 0; j < vs_type.length; j++) { 1056 LLVMValueRef clip_ptr; 1057 1058 clip_ptr = LLVMBuildGEP(builder, clip_ptrs[j], indices, 2, "clipo"); 1059 clip_ptr = LLVMBuildPointerCast(builder, clip_ptr, clip_ptr_type, ""); 1060 1061 /* Unaligned store */ 1062 LLVMSetAlignment(LLVMBuildStore(builder, aos[j], clip_ptr), sizeof(float)); 1063 } 1064 } 1065 1066 1067 /** 1068 * Transforms the outputs for viewport mapping 1069 */ 1070 static void 1071 generate_viewport(struct draw_llvm_variant *variant, 1072 LLVMBuilderRef builder, 1073 struct lp_type vs_type, 1074 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], 1075 LLVMValueRef context_ptr) 1076 { 1077 int i; 1078 struct gallivm_state *gallivm = variant->gallivm; 1079 struct lp_type f32_type = vs_type; 1080 const unsigned pos = variant->llvm->draw->vs.position_output; 1081 LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type); 1082 LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[pos][3], ""); /*w0 w1 .. wn*/ 1083 LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0); /*1.0 1.0 1.0 1.0*/ 1084 LLVMValueRef vp_ptr = draw_jit_context_viewports(gallivm, context_ptr); 1085 1086 /* We treat pipe_viewport_state as a float array */ 1087 const int scale_index_offset = offsetof(struct pipe_viewport_state, scale) / sizeof(float); 1088 const int trans_index_offset = offsetof(struct pipe_viewport_state, translate) / sizeof(float); 1089 1090 /* for 1/w convention*/ 1091 out3 = LLVMBuildFDiv(builder, const1, out3, ""); 1092 LLVMBuildStore(builder, out3, outputs[pos][3]); 1093 1094 /* Viewport Mapping */ 1095 for (i=0; i<3; i++) { 1096 LLVMValueRef out = LLVMBuildLoad(builder, outputs[pos][i], ""); /*x0 x1 .. xn*/ 1097 LLVMValueRef scale; 1098 LLVMValueRef trans; 1099 LLVMValueRef scale_i; 1100 LLVMValueRef trans_i; 1101 LLVMValueRef index; 1102 1103 index = lp_build_const_int32(gallivm, i + scale_index_offset); 1104 scale_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, ""); 1105 1106 index = lp_build_const_int32(gallivm, i + trans_index_offset); 1107 trans_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, ""); 1108 1109 scale = lp_build_broadcast(gallivm, vs_type_llvm, 1110 LLVMBuildLoad(builder, scale_i, "scale")); 1111 trans = lp_build_broadcast(gallivm, vs_type_llvm, 1112 LLVMBuildLoad(builder, trans_i, "trans")); 1113 1114 /* divide by w */ 1115 out = LLVMBuildFMul(builder, out, out3, ""); 1116 /* mult by scale, add translation */ 1117 out = lp_build_fmuladd(builder, out, scale, trans); 1118 1119 /* store transformed outputs */ 1120 LLVMBuildStore(builder, out, outputs[pos][i]); 1121 } 1122 1123 } 1124 1125 1126 /** 1127 * Returns clipmask as nxi32 bitmask for the n vertices 1128 */ 1129 static LLVMValueRef 1130 generate_clipmask(struct draw_llvm *llvm, 1131 struct gallivm_state *gallivm, 1132 struct lp_type vs_type, 1133 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], 1134 struct draw_llvm_variant_key *key, 1135 LLVMValueRef context_ptr, 1136 boolean *have_clipdist) 1137 { 1138 LLVMBuilderRef builder = gallivm->builder; 1139 LLVMValueRef mask; /* stores the <nxi32> clipmasks */ 1140 LLVMValueRef test, temp; 1141 LLVMValueRef zero, shift; 1142 LLVMValueRef pos_x, pos_y, pos_z, pos_w; 1143 LLVMValueRef cv_x, cv_y, cv_z, cv_w; 1144 LLVMValueRef plane1, planes, plane_ptr, sum; 1145 struct lp_type f32_type = vs_type; 1146 struct lp_type i32_type = lp_int_type(vs_type); 1147 const unsigned pos = llvm->draw->vs.position_output; 1148 const unsigned cv = llvm->draw->vs.clipvertex_output; 1149 int num_written_clipdistance = llvm->draw->vs.vertex_shader->info.num_written_clipdistance; 1150 boolean have_cd = false; 1151 boolean clip_user = key->clip_user; 1152 unsigned ucp_enable = key->ucp_enable; 1153 unsigned cd[2]; 1154 1155 cd[0] = llvm->draw->vs.ccdistance_output[0]; 1156 cd[1] = llvm->draw->vs.ccdistance_output[1]; 1157 1158 if (cd[0] != pos || cd[1] != pos) 1159 have_cd = true; 1160 1161 if (num_written_clipdistance && !clip_user) { 1162 clip_user = true; 1163 ucp_enable = (1 << num_written_clipdistance) - 1; 1164 } 1165 1166 mask = lp_build_const_int_vec(gallivm, i32_type, 0); 1167 temp = lp_build_const_int_vec(gallivm, i32_type, 0); 1168 zero = lp_build_const_vec(gallivm, f32_type, 0); /* 0.0f 0.0f 0.0f 0.0f */ 1169 shift = lp_build_const_int_vec(gallivm, i32_type, 1); /* 1 1 1 1 */ 1170 1171 /* 1172 * load clipvertex and position from correct locations. 1173 * if they are the same just load them once. 1174 */ 1175 pos_x = LLVMBuildLoad(builder, outputs[pos][0], ""); /*x0 x1 .. xn */ 1176 pos_y = LLVMBuildLoad(builder, outputs[pos][1], ""); /*y0 y1 .. yn */ 1177 pos_z = LLVMBuildLoad(builder, outputs[pos][2], ""); /*z0 z1 .. zn */ 1178 pos_w = LLVMBuildLoad(builder, outputs[pos][3], ""); /*w0 w1 .. wn */ 1179 1180 if (clip_user && cv != pos) { 1181 cv_x = LLVMBuildLoad(builder, outputs[cv][0], ""); /*x0 x1 .. xn */ 1182 cv_y = LLVMBuildLoad(builder, outputs[cv][1], ""); /*y0 y1 .. yn */ 1183 cv_z = LLVMBuildLoad(builder, outputs[cv][2], ""); /*z0 z1 .. zn */ 1184 cv_w = LLVMBuildLoad(builder, outputs[cv][3], ""); /*w0 w1 .. wn */ 1185 } else { 1186 cv_x = pos_x; 1187 cv_y = pos_y; 1188 cv_z = pos_z; 1189 cv_w = pos_w; 1190 } 1191 1192 /* 1193 * Be careful with the comparisons and NaNs (using llvm's unordered 1194 * comparisons here). 1195 */ 1196 /* Cliptest, for hardwired planes */ 1197 /* 1198 * XXX should take guardband into account (currently not in key). 1199 * Otherwise might run the draw pipeline stages for nothing. 1200 */ 1201 if (key->clip_xy) { 1202 /* plane 1 */ 1203 test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w); 1204 temp = shift; 1205 test = LLVMBuildAnd(builder, test, temp, ""); 1206 mask = test; 1207 1208 /* plane 2 */ 1209 test = LLVMBuildFAdd(builder, pos_x, pos_w, ""); 1210 test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test); 1211 temp = LLVMBuildShl(builder, temp, shift, ""); 1212 test = LLVMBuildAnd(builder, test, temp, ""); 1213 mask = LLVMBuildOr(builder, mask, test, ""); 1214 1215 /* plane 3 */ 1216 test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w); 1217 temp = LLVMBuildShl(builder, temp, shift, ""); 1218 test = LLVMBuildAnd(builder, test, temp, ""); 1219 mask = LLVMBuildOr(builder, mask, test, ""); 1220 1221 /* plane 4 */ 1222 test = LLVMBuildFAdd(builder, pos_y, pos_w, ""); 1223 test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test); 1224 temp = LLVMBuildShl(builder, temp, shift, ""); 1225 test = LLVMBuildAnd(builder, test, temp, ""); 1226 mask = LLVMBuildOr(builder, mask, test, ""); 1227 } 1228 1229 if (key->clip_z) { 1230 temp = lp_build_const_int_vec(gallivm, i32_type, 16); 1231 if (key->clip_halfz) { 1232 /* plane 5 */ 1233 test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z); 1234 test = LLVMBuildAnd(builder, test, temp, ""); 1235 mask = LLVMBuildOr(builder, mask, test, ""); 1236 } 1237 else { 1238 /* plane 5 */ 1239 test = LLVMBuildFAdd(builder, pos_z, pos_w, ""); 1240 test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test); 1241 test = LLVMBuildAnd(builder, test, temp, ""); 1242 mask = LLVMBuildOr(builder, mask, test, ""); 1243 } 1244 /* plane 6 */ 1245 test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w); 1246 temp = LLVMBuildShl(builder, temp, shift, ""); 1247 test = LLVMBuildAnd(builder, test, temp, ""); 1248 mask = LLVMBuildOr(builder, mask, test, ""); 1249 } 1250 1251 if (clip_user) { 1252 LLVMValueRef planes_ptr = draw_jit_context_planes(gallivm, context_ptr); 1253 LLVMValueRef indices[3]; 1254 LLVMValueRef is_nan_or_inf; 1255 1256 /* userclip planes */ 1257 while (ucp_enable) { 1258 unsigned plane_idx = ffs(ucp_enable)-1; 1259 ucp_enable &= ~(1 << plane_idx); 1260 plane_idx += 6; 1261 1262 if (have_cd && num_written_clipdistance) { 1263 LLVMValueRef clipdist; 1264 int i; 1265 i = plane_idx - 6; 1266 1267 *have_clipdist = TRUE; 1268 if (i < 4) { 1269 clipdist = LLVMBuildLoad(builder, outputs[cd[0]][i], ""); 1270 } else { 1271 clipdist = LLVMBuildLoad(builder, outputs[cd[1]][i-4], ""); 1272 } 1273 test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, clipdist); 1274 is_nan_or_inf = lp_build_is_inf_or_nan(gallivm, vs_type, clipdist); 1275 test = LLVMBuildOr(builder, test, is_nan_or_inf, ""); 1276 temp = lp_build_const_int_vec(gallivm, i32_type, 1LL << plane_idx); 1277 test = LLVMBuildAnd(builder, test, temp, ""); 1278 mask = LLVMBuildOr(builder, mask, test, ""); 1279 } else { 1280 LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type); 1281 indices[0] = lp_build_const_int32(gallivm, 0); 1282 indices[1] = lp_build_const_int32(gallivm, plane_idx); 1283 1284 indices[2] = lp_build_const_int32(gallivm, 0); 1285 plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); 1286 plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x"); 1287 planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1); 1288 sum = LLVMBuildFMul(builder, planes, cv_x, ""); 1289 1290 indices[2] = lp_build_const_int32(gallivm, 1); 1291 plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); 1292 plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y"); 1293 planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1); 1294 sum = lp_build_fmuladd(builder, planes, cv_y, sum); 1295 1296 indices[2] = lp_build_const_int32(gallivm, 2); 1297 plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); 1298 plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z"); 1299 planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1); 1300 sum = lp_build_fmuladd(builder, planes, cv_z, sum); 1301 1302 indices[2] = lp_build_const_int32(gallivm, 3); 1303 plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); 1304 plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w"); 1305 planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1); 1306 sum = lp_build_fmuladd(builder, planes, cv_w, sum); 1307 1308 test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, sum); 1309 temp = lp_build_const_int_vec(gallivm, i32_type, 1LL << plane_idx); 1310 test = LLVMBuildAnd(builder, test, temp, ""); 1311 mask = LLVMBuildOr(builder, mask, test, ""); 1312 } 1313 } 1314 } 1315 if (key->need_edgeflags) { 1316 /* 1317 * This isn't really part of clipmask but stored the same in vertex 1318 * header later, so do it here. 1319 */ 1320 unsigned edge_attr = llvm->draw->vs.edgeflag_output; 1321 LLVMValueRef one = lp_build_const_vec(gallivm, f32_type, 1.0); 1322 LLVMValueRef edgeflag = LLVMBuildLoad(builder, outputs[edge_attr][0], ""); 1323 test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_EQUAL, one, edgeflag); 1324 temp = lp_build_const_int_vec(gallivm, i32_type, 1325 1LL << DRAW_TOTAL_CLIP_PLANES); 1326 test = LLVMBuildAnd(builder, test, temp, ""); 1327 mask = LLVMBuildOr(builder, mask, test, ""); 1328 } 1329 return mask; 1330 } 1331 1332 1333 /** 1334 * Returns boolean if any clipping has occurred 1335 * Used zero/one i8 value to represent boolean 1336 */ 1337 static LLVMValueRef 1338 clipmask_booli8(struct gallivm_state *gallivm, 1339 const struct lp_type vs_type, 1340 LLVMValueRef clipmask_bool_ptr, 1341 boolean edgeflag_in_clipmask) 1342 { 1343 LLVMBuilderRef builder = gallivm->builder; 1344 LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context); 1345 LLVMValueRef clipmask_bool = LLVMBuildLoad(builder, clipmask_bool_ptr, ""); 1346 LLVMValueRef ret; 1347 struct lp_build_context bldivec; 1348 1349 lp_build_context_init(&bldivec, gallivm, lp_int_type(vs_type)); 1350 1351 /* 1352 * We need to invert the edgeflag bit from the clipmask here 1353 * (because the result is really if we want to run the pipeline or not 1354 * and we (may) need it if edgeflag was 0). 1355 */ 1356 if (edgeflag_in_clipmask) { 1357 LLVMValueRef edge = lp_build_const_int_vec(gallivm, bldivec.type, 1358 1LL << DRAW_TOTAL_CLIP_PLANES); 1359 clipmask_bool = LLVMBuildXor(builder, clipmask_bool, edge, ""); 1360 } 1361 1362 /* 1363 * XXX: probably should mask off bits from the mask which come from 1364 * vertices which were beyond the count (i.e. indices_valid for 1365 * linear fetches, for elts ones we don't have the correct mask 1366 * right now). Otherwise might run the pipeline for nothing, 1367 * though everything should still work. 1368 */ 1369 ret = lp_build_any_true_range(&bldivec, vs_type.length, clipmask_bool); 1370 ret = LLVMBuildZExt(builder, ret, int8_type, ""); 1371 return ret; 1372 } 1373 1374 static LLVMValueRef 1375 draw_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface *gs_iface, 1376 struct lp_build_tgsi_context * bld_base, 1377 boolean is_vindex_indirect, 1378 LLVMValueRef vertex_index, 1379 boolean is_aindex_indirect, 1380 LLVMValueRef attrib_index, 1381 LLVMValueRef swizzle_index) 1382 { 1383 const struct draw_gs_llvm_iface *gs = draw_gs_llvm_iface(gs_iface); 1384 struct gallivm_state *gallivm = bld_base->base.gallivm; 1385 LLVMBuilderRef builder = gallivm->builder; 1386 LLVMValueRef indices[3]; 1387 LLVMValueRef res; 1388 struct lp_type type = bld_base->base.type; 1389 1390 if (is_vindex_indirect || is_aindex_indirect) { 1391 int i; 1392 res = bld_base->base.zero; 1393 for (i = 0; i < type.length; ++i) { 1394 LLVMValueRef idx = lp_build_const_int32(gallivm, i); 1395 LLVMValueRef vert_chan_index = vertex_index; 1396 LLVMValueRef attr_chan_index = attrib_index; 1397 LLVMValueRef channel_vec, value; 1398 1399 if (is_vindex_indirect) { 1400 vert_chan_index = LLVMBuildExtractElement(builder, 1401 vertex_index, idx, ""); 1402 } 1403 if (is_aindex_indirect) { 1404 attr_chan_index = LLVMBuildExtractElement(builder, 1405 attrib_index, idx, ""); 1406 } 1407 1408 indices[0] = vert_chan_index; 1409 indices[1] = attr_chan_index; 1410 indices[2] = swizzle_index; 1411 1412 channel_vec = LLVMBuildGEP(builder, gs->input, indices, 3, ""); 1413 channel_vec = LLVMBuildLoad(builder, channel_vec, ""); 1414 value = LLVMBuildExtractElement(builder, channel_vec, idx, ""); 1415 1416 res = LLVMBuildInsertElement(builder, res, value, idx, ""); 1417 } 1418 } else { 1419 indices[0] = vertex_index; 1420 indices[1] = attrib_index; 1421 indices[2] = swizzle_index; 1422 1423 res = LLVMBuildGEP(builder, gs->input, indices, 3, ""); 1424 res = LLVMBuildLoad(builder, res, ""); 1425 } 1426 1427 return res; 1428 } 1429 1430 static void 1431 draw_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base, 1432 struct lp_build_tgsi_context * bld_base, 1433 LLVMValueRef (*outputs)[4], 1434 LLVMValueRef emitted_vertices_vec) 1435 { 1436 const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base); 1437 struct draw_gs_llvm_variant *variant = gs_iface->variant; 1438 struct gallivm_state *gallivm = variant->gallivm; 1439 LLVMBuilderRef builder = gallivm->builder; 1440 struct lp_type gs_type = bld_base->base.type; 1441 LLVMValueRef clipmask = lp_build_const_int_vec(gallivm, 1442 lp_int_type(gs_type), 0); 1443 LLVMValueRef indices[LP_MAX_VECTOR_LENGTH]; 1444 LLVMValueRef next_prim_offset = 1445 lp_build_const_int32(gallivm, variant->shader->base.primitive_boundary); 1446 LLVMValueRef io = variant->io_ptr; 1447 unsigned i; 1448 const struct tgsi_shader_info *gs_info = &variant->shader->base.info; 1449 1450 for (i = 0; i < gs_type.length; ++i) { 1451 LLVMValueRef ind = lp_build_const_int32(gallivm, i); 1452 LLVMValueRef currently_emitted = 1453 LLVMBuildExtractElement(builder, emitted_vertices_vec, ind, ""); 1454 indices[i] = LLVMBuildMul(builder, ind, next_prim_offset, ""); 1455 indices[i] = LLVMBuildAdd(builder, indices[i], currently_emitted, ""); 1456 } 1457 1458 convert_to_aos(gallivm, io, indices, 1459 outputs, clipmask, 1460 gs_info->num_outputs, gs_type, 1461 FALSE); 1462 } 1463 1464 static void 1465 draw_gs_llvm_end_primitive(const struct lp_build_tgsi_gs_iface *gs_base, 1466 struct lp_build_tgsi_context * bld_base, 1467 LLVMValueRef verts_per_prim_vec, 1468 LLVMValueRef emitted_prims_vec) 1469 { 1470 const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base); 1471 struct draw_gs_llvm_variant *variant = gs_iface->variant; 1472 struct gallivm_state *gallivm = variant->gallivm; 1473 LLVMBuilderRef builder = gallivm->builder; 1474 LLVMValueRef prim_lengts_ptr = 1475 draw_gs_jit_prim_lengths(variant->gallivm, variant->context_ptr); 1476 unsigned i; 1477 1478 for (i = 0; i < bld_base->base.type.length; ++i) { 1479 LLVMValueRef ind = lp_build_const_int32(gallivm, i); 1480 LLVMValueRef prims_emitted = 1481 LLVMBuildExtractElement(builder, emitted_prims_vec, ind, ""); 1482 LLVMValueRef store_ptr; 1483 LLVMValueRef num_vertices = 1484 LLVMBuildExtractElement(builder, verts_per_prim_vec, ind, ""); 1485 1486 store_ptr = LLVMBuildGEP(builder, prim_lengts_ptr, &prims_emitted, 1, ""); 1487 store_ptr = LLVMBuildLoad(builder, store_ptr, ""); 1488 store_ptr = LLVMBuildGEP(builder, store_ptr, &ind, 1, ""); 1489 LLVMBuildStore(builder, num_vertices, store_ptr); 1490 } 1491 } 1492 1493 static void 1494 draw_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface *gs_base, 1495 struct lp_build_tgsi_context * bld_base, 1496 LLVMValueRef total_emitted_vertices_vec, 1497 LLVMValueRef emitted_prims_vec) 1498 { 1499 const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base); 1500 struct draw_gs_llvm_variant *variant = gs_iface->variant; 1501 struct gallivm_state *gallivm = variant->gallivm; 1502 LLVMBuilderRef builder = gallivm->builder; 1503 LLVMValueRef emitted_verts_ptr = 1504 draw_gs_jit_emitted_vertices(gallivm, variant->context_ptr); 1505 LLVMValueRef emitted_prims_ptr = 1506 draw_gs_jit_emitted_prims(gallivm, variant->context_ptr); 1507 LLVMValueRef zero = lp_build_const_int32(gallivm, 0); 1508 1509 emitted_verts_ptr = LLVMBuildGEP(builder, emitted_verts_ptr, &zero, 0, ""); 1510 emitted_prims_ptr = LLVMBuildGEP(builder, emitted_prims_ptr, &zero, 0, ""); 1511 1512 LLVMBuildStore(builder, total_emitted_vertices_vec, emitted_verts_ptr); 1513 LLVMBuildStore(builder, emitted_prims_vec, emitted_prims_ptr); 1514 } 1515 1516 static void 1517 draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) 1518 { 1519 struct gallivm_state *gallivm = variant->gallivm; 1520 LLVMContextRef context = gallivm->context; 1521 LLVMTypeRef int32_type = LLVMInt32TypeInContext(context); 1522 LLVMTypeRef arg_types[11]; 1523 unsigned num_arg_types = ARRAY_SIZE(arg_types); 1524 LLVMTypeRef func_type; 1525 LLVMValueRef context_ptr; 1526 LLVMBasicBlockRef block; 1527 LLVMBuilderRef builder; 1528 char func_name[64]; 1529 struct lp_type vs_type; 1530 LLVMValueRef count, fetch_elts, start_or_maxelt; 1531 LLVMValueRef vertex_id_offset, start_instance; 1532 LLVMValueRef stride, step, io_itr; 1533 LLVMValueRef ind_vec, start_vec, have_elts, fetch_max, tmp; 1534 LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; 1535 LLVMValueRef vb_stride[PIPE_MAX_ATTRIBS]; 1536 LLVMValueRef map_ptr[PIPE_MAX_ATTRIBS]; 1537 LLVMValueRef buffer_size_adj[PIPE_MAX_ATTRIBS]; 1538 LLVMValueRef instance_index[PIPE_MAX_ATTRIBS]; 1539 LLVMValueRef fake_buf_ptr, fake_buf; 1540 1541 struct draw_context *draw = llvm->draw; 1542 const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info; 1543 unsigned i, j; 1544 struct lp_build_context bld, blduivec; 1545 struct lp_build_loop_state lp_loop; 1546 struct lp_build_if_state if_ctx; 1547 const int vector_length = lp_native_vector_width / 32; 1548 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; 1549 struct lp_build_sampler_soa *sampler = 0; 1550 LLVMValueRef ret, clipmask_bool_ptr; 1551 struct draw_llvm_variant_key *key = &variant->key; 1552 /* If geometry shader is present we need to skip both the viewport 1553 * transformation and clipping otherwise the inputs to the geometry 1554 * shader will be incorrect. 1555 * The code can't handle vp transform when vs writes vp index neither 1556 * (though this would be fixable here, but couldn't just broadcast 1557 * the values). 1558 */ 1559 const boolean bypass_viewport = key->has_gs || key->bypass_viewport || 1560 vs_info->writes_viewport_index; 1561 const boolean enable_cliptest = !key->has_gs && (key->clip_xy || 1562 key->clip_z || 1563 key->clip_user || 1564 key->need_edgeflags); 1565 LLVMValueRef variant_func; 1566 const unsigned pos = draw->vs.position_output; 1567 const unsigned cv = draw->vs.clipvertex_output; 1568 boolean have_clipdist = FALSE; 1569 struct lp_bld_tgsi_system_values system_values; 1570 1571 memset(&system_values, 0, sizeof(system_values)); 1572 1573 util_snprintf(func_name, sizeof(func_name), "draw_llvm_vs_variant%u", 1574 variant->shader->variants_cached); 1575 1576 i = 0; 1577 arg_types[i++] = get_context_ptr_type(variant); /* context */ 1578 arg_types[i++] = get_vertex_header_ptr_type(variant); /* vertex_header */ 1579 arg_types[i++] = get_buffer_ptr_type(variant); /* vbuffers */ 1580 arg_types[i++] = int32_type; /* count */ 1581 arg_types[i++] = int32_type; /* start/fetch_elt_max */ 1582 arg_types[i++] = int32_type; /* stride */ 1583 arg_types[i++] = get_vb_ptr_type(variant); /* pipe_vertex_buffer's */ 1584 arg_types[i++] = int32_type; /* instance_id */ 1585 arg_types[i++] = int32_type; /* vertex_id_offset */ 1586 arg_types[i++] = int32_type; /* start_instance */ 1587 arg_types[i++] = LLVMPointerType(int32_type, 0); /* fetch_elts */ 1588 1589 func_type = LLVMFunctionType(LLVMInt8TypeInContext(context), 1590 arg_types, num_arg_types, 0); 1591 1592 variant_func = LLVMAddFunction(gallivm->module, func_name, func_type); 1593 variant->function = variant_func; 1594 1595 LLVMSetFunctionCallConv(variant_func, LLVMCCallConv); 1596 for (i = 0; i < num_arg_types; ++i) 1597 if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) 1598 lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS); 1599 1600 context_ptr = LLVMGetParam(variant_func, 0); 1601 io_ptr = LLVMGetParam(variant_func, 1); 1602 vbuffers_ptr = LLVMGetParam(variant_func, 2); 1603 count = LLVMGetParam(variant_func, 3); 1604 /* 1605 * XXX: the maxelt part is unused. Not really useful, since we cannot 1606 * get index buffer overflows due to vsplit (which provides its own 1607 * elts buffer, with a different size than what's passed in here). 1608 */ 1609 start_or_maxelt = LLVMGetParam(variant_func, 4); 1610 /* 1611 * XXX: stride is actually unused. The stride we use is strictly calculated 1612 * from the number of outputs (including the draw_extra outputs). 1613 * Should probably fix some day (we need a new vs just because of extra 1614 * outputs which the generated vs won't touch). 1615 */ 1616 stride = LLVMGetParam(variant_func, 5); 1617 vb_ptr = LLVMGetParam(variant_func, 6); 1618 system_values.instance_id = LLVMGetParam(variant_func, 7); 1619 vertex_id_offset = LLVMGetParam(variant_func, 8); 1620 start_instance = LLVMGetParam(variant_func, 9); 1621 fetch_elts = LLVMGetParam(variant_func, 10); 1622 1623 lp_build_name(context_ptr, "context"); 1624 lp_build_name(io_ptr, "io"); 1625 lp_build_name(vbuffers_ptr, "vbuffers"); 1626 lp_build_name(count, "count"); 1627 lp_build_name(start_or_maxelt, "start_or_maxelt"); 1628 lp_build_name(stride, "stride"); 1629 lp_build_name(vb_ptr, "vb"); 1630 lp_build_name(system_values.instance_id, "instance_id"); 1631 lp_build_name(vertex_id_offset, "vertex_id_offset"); 1632 lp_build_name(start_instance, "start_instance"); 1633 lp_build_name(fetch_elts, "fetch_elts"); 1634 1635 /* 1636 * Function body 1637 */ 1638 1639 block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry"); 1640 builder = gallivm->builder; 1641 LLVMPositionBuilderAtEnd(builder, block); 1642 1643 memset(&vs_type, 0, sizeof vs_type); 1644 vs_type.floating = TRUE; /* floating point values */ 1645 vs_type.sign = TRUE; /* values are signed */ 1646 vs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ 1647 vs_type.width = 32; /* 32-bit float */ 1648 vs_type.length = vector_length; 1649 1650 lp_build_context_init(&bld, gallivm, lp_type_uint(32)); 1651 lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type)); 1652 1653 /* hold temporary "bool" clipmask */ 1654 clipmask_bool_ptr = lp_build_alloca(gallivm, blduivec.vec_type, ""); 1655 1656 fake_buf = lp_build_alloca_undef(gallivm, 1657 LLVMVectorType(LLVMInt64TypeInContext(context), 4), ""); 1658 fake_buf = LLVMBuildBitCast(builder, fake_buf, 1659 LLVMPointerType(LLVMInt8TypeInContext(context), 0), ""); 1660 fake_buf_ptr = LLVMBuildGEP(builder, fake_buf, &bld.zero, 1, ""); 1661 1662 /* code generated texture sampling */ 1663 sampler = draw_llvm_sampler_soa_create(draw_llvm_variant_key_samplers(key)); 1664 1665 step = lp_build_const_int32(gallivm, vector_length); 1666 1667 ind_vec = blduivec.undef; 1668 for (i = 0; i < vs_type.length; i++) { 1669 LLVMValueRef index = lp_build_const_int32(gallivm, i); 1670 ind_vec = LLVMBuildInsertElement(builder, ind_vec, index, index, ""); 1671 } 1672 1673 fetch_max = lp_build_alloca(gallivm, int32_type, "fetch_max"); 1674 1675 have_elts = LLVMBuildICmp(builder, LLVMIntNE, 1676 LLVMConstPointerNull(arg_types[10]), fetch_elts, ""); 1677 1678 fetch_max = LLVMBuildSub(builder, count, bld.one, "fetch_max"); 1679 fetch_max = lp_build_broadcast_scalar(&blduivec, fetch_max); 1680 /* 1681 * Only needed for non-indexed path. 1682 */ 1683 start_vec = lp_build_broadcast_scalar(&blduivec, start_or_maxelt); 1684 1685 /* 1686 * Pre-calculate everything which is constant per shader invocation. 1687 */ 1688 for (j = 0; j < key->nr_vertex_elements; ++j) { 1689 LLVMValueRef vb_buffer_offset, buffer_size, temp_ptr; 1690 LLVMValueRef vb_info, vbuffer_ptr, buf_offset, ofbit; 1691 struct pipe_vertex_element *velem = &key->vertex_element[j]; 1692 LLVMValueRef vb_index = 1693 lp_build_const_int32(gallivm, velem->vertex_buffer_index); 1694 LLVMValueRef bsize = lp_build_const_int32(gallivm, 1695 util_format_get_blocksize(velem->src_format)); 1696 LLVMValueRef src_offset = lp_build_const_int32(gallivm, 1697 velem->src_offset); 1698 struct lp_build_if_state if_ctx; 1699 1700 if (velem->src_format != PIPE_FORMAT_NONE) { 1701 vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, &vb_index, 1, ""); 1702 vb_info = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, ""); 1703 vb_stride[j] = draw_jit_vbuffer_stride(gallivm, vb_info); 1704 vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, vb_info); 1705 map_ptr[j] = draw_jit_dvbuffer_map(gallivm, vbuffer_ptr); 1706 buffer_size = draw_jit_dvbuffer_size(gallivm, vbuffer_ptr); 1707 1708 ofbit = NULL; 1709 /* 1710 * We'll set buffer_size_adj to zero if we have of, so it will 1711 * always overflow later automatically without having to keep ofbit. 1712 * Overflows (with normal wraparound) doing the actual offset 1713 * calculation should be ok, just not for the buffer size calc. 1714 * It would also be possible to detect such overflows and return 1715 * zeros if that happens, but this would be more complex. 1716 */ 1717 buf_offset = lp_build_add(&bld, vb_buffer_offset, src_offset); 1718 tmp = lp_build_sub(&bld, bsize, bld.one); 1719 buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size, tmp, 1720 &ofbit); 1721 buffer_size_adj[j] = lp_build_usub_overflow(gallivm, buffer_size_adj[j], 1722 buf_offset, &ofbit); 1723 1724 /* 1725 * We can't easily set fake vertex buffers outside the generated code. 1726 * Hence, set fake vertex buffers here instead basically, so fetch 1727 * code can always fetch using offset 0, eliminating all control flow 1728 * inside the main loop. 1729 * (Alternatively, could have control flow per vector skipping fetch 1730 * if ofbit is true.) 1731 */ 1732 if (velem->instance_divisor) { 1733 /* 1734 * Index is equal to the start instance plus the number of current 1735 * instance divided by the divisor. In this case we compute it as: 1736 * index = start_instance + (instance_id / divisor). 1737 * Note we could actually do the fetch here, outside the loop - 1738 * it's all constant, hopefully llvm recognizes this. 1739 */ 1740 LLVMValueRef current_instance; 1741 current_instance = LLVMBuildUDiv(builder, system_values.instance_id, 1742 lp_build_const_int32(gallivm, 1743 velem->instance_divisor), 1744 "instance_divisor"); 1745 instance_index[j] = lp_build_uadd_overflow(gallivm, start_instance, 1746 current_instance, &ofbit); 1747 } 1748 1749 buffer_size_adj[j] = LLVMBuildSelect(builder, ofbit, bld.zero, 1750 buffer_size_adj[j], ""); 1751 1752 temp_ptr = lp_build_alloca_undef(gallivm, 1753 LLVMPointerType(LLVMInt8TypeInContext(context), 0), ""); 1754 1755 lp_build_if(&if_ctx, gallivm, ofbit); 1756 { 1757 LLVMBuildStore(builder, fake_buf_ptr, temp_ptr); 1758 } 1759 lp_build_else(&if_ctx); 1760 { 1761 map_ptr[j] = LLVMBuildGEP(builder, map_ptr[j], &buf_offset, 1, ""); 1762 LLVMBuildStore(builder, map_ptr[j], temp_ptr); 1763 } 1764 lp_build_endif(&if_ctx); 1765 map_ptr[j] = LLVMBuildLoad(builder, temp_ptr, "map_ptr"); 1766 1767 if (0) { 1768 lp_build_printf(gallivm, "velem %d, vbuf index = %u, vb_stride = %u\n", 1769 lp_build_const_int32(gallivm, j), 1770 vb_index, vb_stride[j]); 1771 lp_build_printf(gallivm, 1772 " vb_buffer_offset = %u, src_offset = %u, buf_offset = %u\n", 1773 vb_buffer_offset, src_offset, buf_offset); 1774 lp_build_printf(gallivm, " buffer size = %u, blocksize = %u\n", 1775 buffer_size, bsize); 1776 lp_build_printf(gallivm, " instance_id = %u\n", system_values.instance_id); 1777 } 1778 } 1779 } 1780 1781 lp_build_loop_begin(&lp_loop, gallivm, bld.zero); 1782 { 1783 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; 1784 LLVMValueRef io; 1785 LLVMValueRef clipmask; /* holds the clipmask value */ 1786 LLVMValueRef true_index_array, index_store; 1787 const LLVMValueRef (*ptr_aos)[TGSI_NUM_CHANNELS]; 1788 1789 io_itr = lp_loop.counter; 1790 1791 io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, ""); 1792 #if DEBUG_STORE 1793 lp_build_printf(gallivm, " --- io %d = %p, loop counter %d\n", 1794 io_itr, io, lp_loop.counter); 1795 #endif 1796 1797 true_index_array = lp_build_broadcast_scalar(&blduivec, lp_loop.counter); 1798 true_index_array = LLVMBuildAdd(builder, true_index_array, ind_vec, ""); 1799 1800 /* 1801 * Limit indices to fetch_max, otherwise might try to access indices 1802 * beyond index buffer (or rather vsplit elt buffer) size. 1803 * Could probably safely (?) skip this for non-indexed draws and 1804 * simplify things minimally (by removing it could combine the ind_vec 1805 * and start_vec adds). I think the only effect for non-indexed draws will 1806 * be that for the invalid elements they will be all fetched from the 1807 * same location as the last valid one, but noone should really care. 1808 */ 1809 true_index_array = lp_build_min(&blduivec, true_index_array, fetch_max); 1810 1811 index_store = lp_build_alloca_undef(gallivm, blduivec.vec_type, "index_store"); 1812 1813 lp_build_if(&if_ctx, gallivm, have_elts); 1814 { 1815 /* 1816 * Note: you'd expect some comparison/clamp against fetch_elt_max 1817 * here. 1818 * There used to be one here but it was incorrect: overflow was 1819 * detected if index > fetch_elt_max - but the correct condition 1820 * would be index >= fetch_elt_max (since this is just size of elts 1821 * buffer / element size). 1822 * Using the correct condition however will cause failures - due to 1823 * vsplit/vcache code which rebases indices. So, as an example, if 1824 * fetch_elt_max is just 1 and fetch_count 2, vsplit cache will 1825 * replace all invalid indices with 0 - which in case of elt_bias 1826 * not being zero will get a different fetch index than the valid 1827 * index 0. So, just rely on vsplit code preventing out-of-bounds 1828 * fetches. This is also why it's safe to do elts fetch even if there 1829 * was no index buffer bound - the real buffer is never seen here, at 1830 * least not if there are index buffer overflows... 1831 */ 1832 1833 /* 1834 * XXX should not have to do this, as scale can be handled 1835 * natively by loads (hits asserts though). 1836 */ 1837 tmp = lp_build_shl_imm(&blduivec, true_index_array, 2); 1838 fetch_elts = LLVMBuildBitCast(builder, fetch_elts, 1839 LLVMPointerType(LLVMInt8TypeInContext(context), 1840 0), ""); 1841 tmp = lp_build_gather(gallivm, vs_type.length, 1842 32, bld.type, TRUE, 1843 fetch_elts, tmp, FALSE); 1844 LLVMBuildStore(builder, tmp, index_store); 1845 } 1846 lp_build_else(&if_ctx); 1847 { 1848 tmp = LLVMBuildAdd(builder, true_index_array, start_vec, ""); 1849 LLVMBuildStore(builder, tmp, index_store); 1850 } 1851 lp_build_endif(&if_ctx); 1852 1853 true_index_array = LLVMBuildLoad(builder, index_store, ""); 1854 1855 for (j = 0; j < key->nr_vertex_elements; ++j) { 1856 struct pipe_vertex_element *velem = &key->vertex_element[j]; 1857 const struct util_format_description *format_desc = 1858 util_format_description(velem->src_format); 1859 1860 if (format_desc->format == PIPE_FORMAT_NONE) { 1861 for (i = 0; i < TGSI_NUM_CHANNELS; i++) { 1862 inputs[j][i] = lp_build_zero(gallivm, vs_type); 1863 } 1864 } 1865 else if (velem->instance_divisor) { 1866 fetch_instanced(gallivm, format_desc, vs_type, 1867 vb_stride[j], map_ptr[j], 1868 buffer_size_adj[j], 1869 inputs[j], instance_index[j]); 1870 } 1871 else { 1872 fetch_vector(gallivm, format_desc, vs_type, 1873 vb_stride[j], map_ptr[j], 1874 buffer_size_adj[j], 1875 inputs[j], true_index_array); 1876 } 1877 } 1878 1879 /* In the paths with elts vertex id has to be unaffected by the 1880 * index bias and because indices inside our elements array have 1881 * already had index bias applied we need to subtract it here to 1882 * get back to the original index. 1883 * in the linear paths vertex id has to be unaffected by the 1884 * original start index and because we abuse the 'start' variable 1885 * to either represent the actual start index or the index at which 1886 * the primitive was split (we split rendering into chunks of at 1887 * most 4095-vertices) we need to back out the original start 1888 * index out of our vertex id here. 1889 */ 1890 system_values.basevertex = lp_build_broadcast_scalar(&blduivec, 1891 vertex_id_offset); 1892 system_values.vertex_id = true_index_array; 1893 system_values.vertex_id_nobase = LLVMBuildSub(builder, true_index_array, 1894 system_values.basevertex, ""); 1895 1896 ptr_aos = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) inputs; 1897 generate_vs(variant, 1898 builder, 1899 vs_type, 1900 outputs, 1901 ptr_aos, 1902 &system_values, 1903 context_ptr, 1904 sampler, 1905 key->clamp_vertex_color); 1906 1907 if (pos != -1 && cv != -1) { 1908 /* store original positions in clip before further manipulation */ 1909 store_clip(gallivm, vs_type, io, outputs, pos); 1910 1911 /* do cliptest */ 1912 if (enable_cliptest) { 1913 LLVMValueRef temp = LLVMBuildLoad(builder, clipmask_bool_ptr, ""); 1914 /* allocate clipmask, assign it integer type */ 1915 clipmask = generate_clipmask(llvm, 1916 gallivm, 1917 vs_type, 1918 outputs, 1919 key, 1920 context_ptr, &have_clipdist); 1921 temp = LLVMBuildOr(builder, clipmask, temp, ""); 1922 /* store temporary clipping boolean value */ 1923 LLVMBuildStore(builder, temp, clipmask_bool_ptr); 1924 } 1925 else { 1926 clipmask = blduivec.zero; 1927 } 1928 1929 /* do viewport mapping */ 1930 if (!bypass_viewport) { 1931 generate_viewport(variant, builder, vs_type, outputs, context_ptr); 1932 } 1933 } 1934 else { 1935 clipmask = blduivec.zero; 1936 } 1937 1938 /* store clipmask in vertex header, 1939 * original positions in clip 1940 * and transformed positions in data 1941 */ 1942 convert_to_aos(gallivm, io, NULL, outputs, clipmask, 1943 vs_info->num_outputs, vs_type, 1944 enable_cliptest && key->need_edgeflags); 1945 } 1946 lp_build_loop_end_cond(&lp_loop, count, step, LLVMIntUGE); 1947 1948 sampler->destroy(sampler); 1949 1950 /* return clipping boolean value for function */ 1951 ret = clipmask_booli8(gallivm, vs_type, clipmask_bool_ptr, 1952 enable_cliptest && key->need_edgeflags); 1953 1954 LLVMBuildRet(builder, ret); 1955 1956 gallivm_verify_function(gallivm, variant_func); 1957 } 1958 1959 1960 struct draw_llvm_variant_key * 1961 draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store) 1962 { 1963 unsigned i; 1964 struct draw_llvm_variant_key *key; 1965 struct draw_sampler_static_state *draw_sampler; 1966 1967 key = (struct draw_llvm_variant_key *)store; 1968 1969 memset(key, 0, offsetof(struct draw_llvm_variant_key, vertex_element[0])); 1970 1971 key->clamp_vertex_color = llvm->draw->rasterizer->clamp_vertex_color; /**/ 1972 1973 /* will have to rig this up properly later */ 1974 key->clip_xy = llvm->draw->clip_xy; 1975 key->clip_z = llvm->draw->clip_z; 1976 key->clip_user = llvm->draw->clip_user; 1977 key->bypass_viewport = llvm->draw->bypass_viewport; 1978 key->clip_halfz = llvm->draw->rasterizer->clip_halfz; 1979 /* XXX assumes edgeflag output not at 0 */ 1980 key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE); 1981 key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable; 1982 key->has_gs = llvm->draw->gs.geometry_shader != NULL; 1983 key->num_outputs = draw_total_vs_outputs(llvm->draw); 1984 1985 /* All variants of this shader will have the same value for 1986 * nr_samplers. Not yet trying to compact away holes in the 1987 * sampler array. 1988 */ 1989 key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1; 1990 if (llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) { 1991 key->nr_sampler_views = 1992 llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; 1993 } 1994 else { 1995 key->nr_sampler_views = key->nr_samplers; 1996 } 1997 1998 /* Presumably all variants of the shader should have the same 1999 * number of vertex elements - ie the number of shader inputs. 2000 * NOTE: we NEED to store the needed number of needed inputs 2001 * here, not the number of provided elements to match keysize 2002 * (and the offset of sampler state in the key). 2003 * If we have excess number of vertex elements, this is valid, 2004 * but the excess ones don't matter. 2005 * If we don't have enough vertex elements (which looks not really 2006 * valid but we'll handle it gracefully) fill out missing ones with 2007 * zero (we'll recognize these later by PIPE_FORMAT_NONE). 2008 */ 2009 key->nr_vertex_elements = 2010 llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_INPUT] + 1; 2011 2012 if (llvm->draw->pt.nr_vertex_elements < key->nr_vertex_elements) { 2013 debug_printf("draw: vs with %d inputs but only have %d vertex elements\n", 2014 key->nr_vertex_elements, llvm->draw->pt.nr_vertex_elements); 2015 memset(key->vertex_element, 0, 2016 sizeof(struct pipe_vertex_element) * key->nr_vertex_elements); 2017 } 2018 memcpy(key->vertex_element, 2019 llvm->draw->pt.vertex_element, 2020 sizeof(struct pipe_vertex_element) * 2021 MIN2(key->nr_vertex_elements, llvm->draw->pt.nr_vertex_elements)); 2022 2023 draw_sampler = draw_llvm_variant_key_samplers(key); 2024 memset(draw_sampler, 0, 2025 MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler); 2026 2027 for (i = 0 ; i < key->nr_samplers; i++) { 2028 lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state, 2029 llvm->draw->samplers[PIPE_SHADER_VERTEX][i]); 2030 } 2031 for (i = 0 ; i < key->nr_sampler_views; i++) { 2032 lp_sampler_static_texture_state(&draw_sampler[i].texture_state, 2033 llvm->draw->sampler_views[PIPE_SHADER_VERTEX][i]); 2034 } 2035 2036 return key; 2037 } 2038 2039 2040 void 2041 draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key) 2042 { 2043 unsigned i; 2044 struct draw_sampler_static_state *sampler = draw_llvm_variant_key_samplers(key); 2045 2046 debug_printf("clamp_vertex_color = %u\n", key->clamp_vertex_color); 2047 debug_printf("clip_xy = %u\n", key->clip_xy); 2048 debug_printf("clip_z = %u\n", key->clip_z); 2049 debug_printf("clip_user = %u\n", key->clip_user); 2050 debug_printf("bypass_viewport = %u\n", key->bypass_viewport); 2051 debug_printf("clip_halfz = %u\n", key->clip_halfz); 2052 debug_printf("need_edgeflags = %u\n", key->need_edgeflags); 2053 debug_printf("has_gs = %u\n", key->has_gs); 2054 debug_printf("ucp_enable = %u\n", key->ucp_enable); 2055 2056 for (i = 0 ; i < key->nr_vertex_elements; i++) { 2057 debug_printf("vertex_element[%i].src_offset = %u\n", i, key->vertex_element[i].src_offset); 2058 debug_printf("vertex_element[%i].instance_divisor = %u\n", i, key->vertex_element[i].instance_divisor); 2059 debug_printf("vertex_element[%i].vertex_buffer_index = %u\n", i, key->vertex_element[i].vertex_buffer_index); 2060 debug_printf("vertex_element[%i].src_format = %s\n", i, util_format_name(key->vertex_element[i].src_format)); 2061 } 2062 2063 for (i = 0 ; i < key->nr_sampler_views; i++) { 2064 debug_printf("sampler[%i].src_format = %s\n", i, util_format_name(sampler[i].texture_state.format)); 2065 } 2066 } 2067 2068 2069 void 2070 draw_llvm_set_mapped_texture(struct draw_context *draw, 2071 unsigned shader_stage, 2072 unsigned sview_idx, 2073 uint32_t width, uint32_t height, uint32_t depth, 2074 uint32_t first_level, uint32_t last_level, 2075 const void *base_ptr, 2076 uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], 2077 uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], 2078 uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS]) 2079 { 2080 unsigned j; 2081 struct draw_jit_texture *jit_tex; 2082 2083 assert(shader_stage == PIPE_SHADER_VERTEX || 2084 shader_stage == PIPE_SHADER_GEOMETRY); 2085 2086 if (shader_stage == PIPE_SHADER_VERTEX) { 2087 assert(sview_idx < ARRAY_SIZE(draw->llvm->jit_context.textures)); 2088 2089 jit_tex = &draw->llvm->jit_context.textures[sview_idx]; 2090 } else if (shader_stage == PIPE_SHADER_GEOMETRY) { 2091 assert(sview_idx < ARRAY_SIZE(draw->llvm->gs_jit_context.textures)); 2092 2093 jit_tex = &draw->llvm->gs_jit_context.textures[sview_idx]; 2094 } else { 2095 assert(0); 2096 return; 2097 } 2098 2099 jit_tex->width = width; 2100 jit_tex->height = height; 2101 jit_tex->depth = depth; 2102 jit_tex->first_level = first_level; 2103 jit_tex->last_level = last_level; 2104 jit_tex->base = base_ptr; 2105 2106 for (j = first_level; j <= last_level; j++) { 2107 jit_tex->mip_offsets[j] = mip_offsets[j]; 2108 jit_tex->row_stride[j] = row_stride[j]; 2109 jit_tex->img_stride[j] = img_stride[j]; 2110 } 2111 } 2112 2113 2114 void 2115 draw_llvm_set_sampler_state(struct draw_context *draw, 2116 unsigned shader_type) 2117 { 2118 unsigned i; 2119 2120 if (shader_type == PIPE_SHADER_VERTEX) { 2121 for (i = 0; i < draw->num_samplers[PIPE_SHADER_VERTEX]; i++) { 2122 struct draw_jit_sampler *jit_sam = &draw->llvm->jit_context.samplers[i]; 2123 2124 if (draw->samplers[PIPE_SHADER_VERTEX][i]) { 2125 const struct pipe_sampler_state *s 2126 = draw->samplers[PIPE_SHADER_VERTEX][i]; 2127 jit_sam->min_lod = s->min_lod; 2128 jit_sam->max_lod = s->max_lod; 2129 jit_sam->lod_bias = s->lod_bias; 2130 COPY_4V(jit_sam->border_color, s->border_color.f); 2131 } 2132 } 2133 } else if (shader_type == PIPE_SHADER_GEOMETRY) { 2134 for (i = 0; i < draw->num_samplers[PIPE_SHADER_GEOMETRY]; i++) { 2135 struct draw_jit_sampler *jit_sam = &draw->llvm->gs_jit_context.samplers[i]; 2136 2137 if (draw->samplers[PIPE_SHADER_GEOMETRY][i]) { 2138 const struct pipe_sampler_state *s 2139 = draw->samplers[PIPE_SHADER_GEOMETRY][i]; 2140 jit_sam->min_lod = s->min_lod; 2141 jit_sam->max_lod = s->max_lod; 2142 jit_sam->lod_bias = s->lod_bias; 2143 COPY_4V(jit_sam->border_color, s->border_color.f); 2144 } 2145 } 2146 } 2147 } 2148 2149 2150 void 2151 draw_llvm_destroy_variant(struct draw_llvm_variant *variant) 2152 { 2153 struct draw_llvm *llvm = variant->llvm; 2154 2155 gallivm_destroy(variant->gallivm); 2156 2157 remove_from_list(&variant->list_item_local); 2158 variant->shader->variants_cached--; 2159 remove_from_list(&variant->list_item_global); 2160 llvm->nr_variants--; 2161 FREE(variant); 2162 } 2163 2164 2165 /** 2166 * Create LLVM types for various structures. 2167 */ 2168 static void 2169 create_gs_jit_types(struct draw_gs_llvm_variant *var) 2170 { 2171 struct gallivm_state *gallivm = var->gallivm; 2172 LLVMTypeRef texture_type, sampler_type, context_type; 2173 2174 texture_type = create_jit_texture_type(gallivm, "texture"); 2175 sampler_type = create_jit_sampler_type(gallivm, "sampler"); 2176 2177 context_type = create_gs_jit_context_type(gallivm, 2178 var->shader->base.vector_length, 2179 texture_type, sampler_type, 2180 "draw_gs_jit_context"); 2181 var->context_ptr_type = LLVMPointerType(context_type, 0); 2182 2183 var->input_array_type = create_gs_jit_input_type(gallivm); 2184 } 2185 2186 static LLVMTypeRef 2187 get_gs_context_ptr_type(struct draw_gs_llvm_variant *variant) 2188 { 2189 if (!variant->context_ptr_type) 2190 create_gs_jit_types(variant); 2191 return variant->context_ptr_type; 2192 } 2193 2194 static LLVMValueRef 2195 generate_mask_value(struct draw_gs_llvm_variant *variant, 2196 struct lp_type gs_type) 2197 { 2198 struct gallivm_state *gallivm = variant->gallivm; 2199 LLVMBuilderRef builder = gallivm->builder; 2200 struct lp_type mask_type = lp_int_type(gs_type); 2201 LLVMValueRef num_prims; 2202 LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0); 2203 unsigned i; 2204 2205 num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type), 2206 variant->num_prims); 2207 for (i = 0; i < gs_type.length; i++) { 2208 LLVMValueRef idx = lp_build_const_int32(gallivm, i); 2209 mask_val = LLVMBuildInsertElement(builder, mask_val, idx, idx, ""); 2210 } 2211 mask_val = lp_build_compare(gallivm, mask_type, 2212 PIPE_FUNC_GREATER, num_prims, mask_val); 2213 2214 return mask_val; 2215 } 2216 2217 static void 2218 draw_gs_llvm_generate(struct draw_llvm *llvm, 2219 struct draw_gs_llvm_variant *variant) 2220 { 2221 struct gallivm_state *gallivm = variant->gallivm; 2222 LLVMContextRef context = gallivm->context; 2223 LLVMTypeRef int32_type = LLVMInt32TypeInContext(context); 2224 LLVMTypeRef arg_types[7]; 2225 LLVMTypeRef func_type; 2226 LLVMValueRef variant_func; 2227 LLVMValueRef context_ptr; 2228 LLVMValueRef prim_id_ptr; 2229 LLVMBasicBlockRef block; 2230 LLVMBuilderRef builder; 2231 LLVMValueRef io_ptr, input_array, num_prims, mask_val; 2232 struct lp_build_sampler_soa *sampler = 0; 2233 struct lp_build_context bld; 2234 struct lp_bld_tgsi_system_values system_values; 2235 char func_name[64]; 2236 struct lp_type gs_type; 2237 unsigned i; 2238 struct draw_gs_llvm_iface gs_iface; 2239 const struct tgsi_token *tokens = variant->shader->base.state.tokens; 2240 LLVMValueRef consts_ptr, num_consts_ptr; 2241 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; 2242 struct lp_build_mask_context mask; 2243 const struct tgsi_shader_info *gs_info = &variant->shader->base.info; 2244 unsigned vector_length = variant->shader->base.vector_length; 2245 2246 memset(&system_values, 0, sizeof(system_values)); 2247 2248 util_snprintf(func_name, sizeof(func_name), "draw_llvm_gs_variant%u", 2249 variant->shader->variants_cached); 2250 2251 assert(variant->vertex_header_ptr_type); 2252 2253 arg_types[0] = get_gs_context_ptr_type(variant); /* context */ 2254 arg_types[1] = variant->input_array_type; /* input */ 2255 arg_types[2] = variant->vertex_header_ptr_type; /* vertex_header */ 2256 arg_types[3] = int32_type; /* num_prims */ 2257 arg_types[4] = int32_type; /* instance_id */ 2258 arg_types[5] = LLVMPointerType( 2259 LLVMVectorType(int32_type, vector_length), 0); /* prim_id_ptr */ 2260 arg_types[6] = int32_type; 2261 2262 func_type = LLVMFunctionType(int32_type, arg_types, ARRAY_SIZE(arg_types), 0); 2263 2264 variant_func = LLVMAddFunction(gallivm->module, func_name, func_type); 2265 2266 variant->function = variant_func; 2267 2268 LLVMSetFunctionCallConv(variant_func, LLVMCCallConv); 2269 2270 for (i = 0; i < ARRAY_SIZE(arg_types); ++i) 2271 if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) 2272 lp_add_function_attr(variant_func, i + 1, LP_FUNC_ATTR_NOALIAS); 2273 2274 context_ptr = LLVMGetParam(variant_func, 0); 2275 input_array = LLVMGetParam(variant_func, 1); 2276 io_ptr = LLVMGetParam(variant_func, 2); 2277 num_prims = LLVMGetParam(variant_func, 3); 2278 system_values.instance_id = LLVMGetParam(variant_func, 4); 2279 prim_id_ptr = LLVMGetParam(variant_func, 5); 2280 system_values.invocation_id = LLVMGetParam(variant_func, 6); 2281 2282 lp_build_name(context_ptr, "context"); 2283 lp_build_name(input_array, "input"); 2284 lp_build_name(io_ptr, "io"); 2285 lp_build_name(num_prims, "num_prims"); 2286 lp_build_name(system_values.instance_id, "instance_id"); 2287 lp_build_name(prim_id_ptr, "prim_id_ptr"); 2288 lp_build_name(system_values.invocation_id, "invocation_id"); 2289 2290 variant->context_ptr = context_ptr; 2291 variant->io_ptr = io_ptr; 2292 variant->num_prims = num_prims; 2293 2294 gs_iface.base.fetch_input = draw_gs_llvm_fetch_input; 2295 gs_iface.base.emit_vertex = draw_gs_llvm_emit_vertex; 2296 gs_iface.base.end_primitive = draw_gs_llvm_end_primitive; 2297 gs_iface.base.gs_epilogue = draw_gs_llvm_epilogue; 2298 gs_iface.input = input_array; 2299 gs_iface.variant = variant; 2300 2301 /* 2302 * Function body 2303 */ 2304 2305 block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry"); 2306 builder = gallivm->builder; 2307 LLVMPositionBuilderAtEnd(builder, block); 2308 2309 lp_build_context_init(&bld, gallivm, lp_type_int(32)); 2310 2311 memset(&gs_type, 0, sizeof gs_type); 2312 gs_type.floating = TRUE; /* floating point values */ 2313 gs_type.sign = TRUE; /* values are signed */ 2314 gs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ 2315 gs_type.width = 32; /* 32-bit float */ 2316 gs_type.length = vector_length; 2317 2318 consts_ptr = draw_gs_jit_context_constants(variant->gallivm, context_ptr); 2319 num_consts_ptr = 2320 draw_gs_jit_context_num_constants(variant->gallivm, context_ptr); 2321 2322 /* code generated texture sampling */ 2323 sampler = draw_llvm_sampler_soa_create(variant->key.samplers); 2324 2325 mask_val = generate_mask_value(variant, gs_type); 2326 lp_build_mask_begin(&mask, gallivm, gs_type, mask_val); 2327 2328 if (gs_info->uses_primid) { 2329 system_values.prim_id = LLVMBuildLoad(builder, prim_id_ptr, "prim_id"); 2330 } 2331 2332 if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) { 2333 tgsi_dump(tokens, 0); 2334 draw_gs_llvm_dump_variant_key(&variant->key); 2335 } 2336 2337 lp_build_tgsi_soa(variant->gallivm, 2338 tokens, 2339 gs_type, 2340 &mask, 2341 consts_ptr, 2342 num_consts_ptr, 2343 &system_values, 2344 NULL, 2345 outputs, 2346 context_ptr, 2347 NULL, 2348 sampler, 2349 &llvm->draw->gs.geometry_shader->info, 2350 (const struct lp_build_tgsi_gs_iface *)&gs_iface); 2351 2352 sampler->destroy(sampler); 2353 2354 lp_build_mask_end(&mask); 2355 2356 LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32))); 2357 2358 gallivm_verify_function(gallivm, variant_func); 2359 } 2360 2361 2362 struct draw_gs_llvm_variant * 2363 draw_gs_llvm_create_variant(struct draw_llvm *llvm, 2364 unsigned num_outputs, 2365 const struct draw_gs_llvm_variant_key *key) 2366 { 2367 struct draw_gs_llvm_variant *variant; 2368 struct llvm_geometry_shader *shader = 2369 llvm_geometry_shader(llvm->draw->gs.geometry_shader); 2370 LLVMTypeRef vertex_header; 2371 char module_name[64]; 2372 2373 variant = MALLOC(sizeof *variant + 2374 shader->variant_key_size - 2375 sizeof variant->key); 2376 if (!variant) 2377 return NULL; 2378 2379 variant->llvm = llvm; 2380 variant->shader = shader; 2381 2382 util_snprintf(module_name, sizeof(module_name), "draw_llvm_gs_variant%u", 2383 variant->shader->variants_cached); 2384 2385 variant->gallivm = gallivm_create(module_name, llvm->context); 2386 2387 create_gs_jit_types(variant); 2388 2389 memcpy(&variant->key, key, shader->variant_key_size); 2390 2391 vertex_header = create_jit_vertex_header(variant->gallivm, num_outputs); 2392 2393 variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0); 2394 2395 draw_gs_llvm_generate(llvm, variant); 2396 2397 gallivm_compile_module(variant->gallivm); 2398 2399 variant->jit_func = (draw_gs_jit_func) 2400 gallivm_jit_function(variant->gallivm, variant->function); 2401 2402 gallivm_free_ir(variant->gallivm); 2403 2404 variant->list_item_global.base = variant; 2405 variant->list_item_local.base = variant; 2406 /*variant->no = */shader->variants_created++; 2407 variant->list_item_global.base = variant; 2408 2409 return variant; 2410 } 2411 2412 void 2413 draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant *variant) 2414 { 2415 struct draw_llvm *llvm = variant->llvm; 2416 2417 gallivm_destroy(variant->gallivm); 2418 2419 remove_from_list(&variant->list_item_local); 2420 variant->shader->variants_cached--; 2421 remove_from_list(&variant->list_item_global); 2422 llvm->nr_gs_variants--; 2423 FREE(variant); 2424 } 2425 2426 struct draw_gs_llvm_variant_key * 2427 draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store) 2428 { 2429 unsigned i; 2430 struct draw_gs_llvm_variant_key *key; 2431 struct draw_sampler_static_state *draw_sampler; 2432 2433 key = (struct draw_gs_llvm_variant_key *)store; 2434 2435 memset(key, 0, offsetof(struct draw_gs_llvm_variant_key, samplers[0])); 2436 2437 key->num_outputs = draw_total_gs_outputs(llvm->draw); 2438 2439 /* All variants of this shader will have the same value for 2440 * nr_samplers. Not yet trying to compact away holes in the 2441 * sampler array. 2442 */ 2443 key->nr_samplers = llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER] + 1; 2444 if (llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) { 2445 key->nr_sampler_views = 2446 llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; 2447 } 2448 else { 2449 key->nr_sampler_views = key->nr_samplers; 2450 } 2451 2452 draw_sampler = key->samplers; 2453 2454 memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler); 2455 2456 for (i = 0 ; i < key->nr_samplers; i++) { 2457 lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state, 2458 llvm->draw->samplers[PIPE_SHADER_GEOMETRY][i]); 2459 } 2460 for (i = 0 ; i < key->nr_sampler_views; i++) { 2461 lp_sampler_static_texture_state(&draw_sampler[i].texture_state, 2462 llvm->draw->sampler_views[PIPE_SHADER_GEOMETRY][i]); 2463 } 2464 2465 return key; 2466 } 2467 2468 void 2469 draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key *key) 2470 { 2471 unsigned i; 2472 struct draw_sampler_static_state *sampler = key->samplers; 2473 2474 for (i = 0 ; i < key->nr_sampler_views; i++) { 2475 debug_printf("sampler[%i].src_format = %s\n", i, 2476 util_format_name(sampler[i].texture_state.format)); 2477 } 2478 } 2479