1 /* 2 * Mesa 3-D graphics library 3 * 4 * Copyright (C) 2012-2013 LunarG, Inc. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included 14 * in all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Chia-I Wu <olv (at) lunarg.com> 26 */ 27 28 #include "tgsi/tgsi_dump.h" 29 #include "tgsi/tgsi_util.h" 30 #include "toy_compiler.h" 31 #include "toy_tgsi.h" 32 #include "toy_legalize.h" 33 #include "toy_optimize.h" 34 #include "toy_helpers.h" 35 #include "ilo_shader_internal.h" 36 37 struct vs_compile_context { 38 struct ilo_shader *shader; 39 const struct ilo_shader_variant *variant; 40 41 struct toy_compiler tc; 42 struct toy_tgsi tgsi; 43 int const_cache; 44 45 int output_map[PIPE_MAX_SHADER_OUTPUTS]; 46 47 int num_grf_per_vrf; 48 int first_const_grf; 49 int first_ucp_grf; 50 int first_vue_grf; 51 int first_free_grf; 52 int last_free_grf; 53 54 int first_free_mrf; 55 int last_free_mrf; 56 }; 57 58 static void 59 vs_lower_opcode_tgsi_in(struct vs_compile_context *vcc, 60 struct toy_dst dst, int dim, int idx) 61 { 62 struct toy_compiler *tc = &vcc->tc; 63 int slot; 64 65 assert(!dim); 66 67 slot = toy_tgsi_find_input(&vcc->tgsi, idx); 68 if (slot >= 0) { 69 const int first_in_grf = vcc->first_vue_grf + 70 (vcc->shader->in.count - vcc->tgsi.num_inputs); 71 const int grf = first_in_grf + vcc->tgsi.inputs[slot].semantic_index; 72 const struct toy_src src = tsrc(TOY_FILE_GRF, grf, 0); 73 74 tc_MOV(tc, dst, src); 75 } 76 else { 77 /* undeclared input */ 78 tc_MOV(tc, dst, tsrc_imm_f(0.0f)); 79 } 80 } 81 82 static bool 83 vs_lower_opcode_tgsi_const_pcb(struct vs_compile_context *vcc, 84 struct toy_dst dst, int dim, 85 struct toy_src idx) 86 { 87 const int i = idx.val32; 88 const int grf = vcc->first_const_grf + i / 2; 89 const int grf_subreg = (i & 1) * 16; 90 struct toy_src src; 91 92 if (!vcc->variant->use_pcb || dim != 0 || idx.file != TOY_FILE_IMM || 93 grf >= vcc->first_ucp_grf) 94 return false; 95 96 97 src = tsrc_rect(tsrc(TOY_FILE_GRF, grf, grf_subreg), TOY_RECT_041); 98 tc_MOV(&vcc->tc, dst, src); 99 100 return true; 101 } 102 103 static void 104 vs_lower_opcode_tgsi_const_gen6(struct vs_compile_context *vcc, 105 struct toy_dst dst, int dim, 106 struct toy_src idx) 107 { 108 const struct toy_dst header = 109 tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0)); 110 const struct toy_dst block_offsets = 111 tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf + 1, 0)); 112 const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0)); 113 struct toy_compiler *tc = &vcc->tc; 114 unsigned msg_type, msg_ctrl, msg_len; 115 struct toy_inst *inst; 116 struct toy_src desc; 117 118 if (vs_lower_opcode_tgsi_const_pcb(vcc, dst, dim, idx)) 119 return; 120 121 /* set message header */ 122 inst = tc_MOV(tc, header, r0); 123 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; 124 125 /* set block offsets */ 126 tc_MOV(tc, block_offsets, idx); 127 128 msg_type = GEN6_MSG_DP_OWORD_DUAL_BLOCK_READ; 129 msg_ctrl = GEN6_MSG_DP_OWORD_DUAL_BLOCK_SIZE_1; 130 msg_len = 2; 131 132 desc = tsrc_imm_mdesc_data_port(tc, false, msg_len, 1, true, false, 133 msg_type, msg_ctrl, vcc->shader->bt.const_base + dim); 134 135 tc_SEND(tc, dst, tsrc_from(header), desc, vcc->const_cache); 136 } 137 138 static void 139 vs_lower_opcode_tgsi_const_gen7(struct vs_compile_context *vcc, 140 struct toy_dst dst, int dim, 141 struct toy_src idx) 142 { 143 struct toy_compiler *tc = &vcc->tc; 144 const struct toy_dst offset = 145 tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0)); 146 struct toy_src desc; 147 148 if (vs_lower_opcode_tgsi_const_pcb(vcc, dst, dim, idx)) 149 return; 150 151 /* 152 * In 259b65e2e7938de4aab323033cfe2b33369ddb07, pull constant load was 153 * changed from OWord Dual Block Read to ld to increase performance in the 154 * classic driver. Since we use the constant cache instead of the data 155 * cache, I wonder if we still want to follow the classic driver. 156 */ 157 158 /* set offset */ 159 tc_MOV(tc, offset, idx); 160 161 desc = tsrc_imm_mdesc_sampler(tc, 1, 1, false, 162 GEN6_MSG_SAMPLER_SIMD4X2, 163 GEN6_MSG_SAMPLER_LD, 164 0, 165 vcc->shader->bt.const_base + dim); 166 167 tc_SEND(tc, dst, tsrc_from(offset), desc, GEN6_SFID_SAMPLER); 168 } 169 170 static void 171 vs_lower_opcode_tgsi_imm(struct vs_compile_context *vcc, 172 struct toy_dst dst, int idx) 173 { 174 const uint32_t *imm; 175 int ch; 176 177 imm = toy_tgsi_get_imm(&vcc->tgsi, idx, NULL); 178 179 for (ch = 0; ch < 4; ch++) { 180 /* raw moves */ 181 tc_MOV(&vcc->tc, 182 tdst_writemask(tdst_ud(dst), 1 << ch), 183 tsrc_imm_ud(imm[ch])); 184 } 185 } 186 187 188 static void 189 vs_lower_opcode_tgsi_sv(struct vs_compile_context *vcc, 190 struct toy_dst dst, int dim, int idx) 191 { 192 struct toy_compiler *tc = &vcc->tc; 193 const struct toy_tgsi *tgsi = &vcc->tgsi; 194 int slot; 195 196 assert(!dim); 197 198 slot = toy_tgsi_find_system_value(tgsi, idx); 199 if (slot < 0) 200 return; 201 202 switch (tgsi->system_values[slot].semantic_name) { 203 case TGSI_SEMANTIC_INSTANCEID: 204 case TGSI_SEMANTIC_VERTEXID: 205 /* 206 * In 3DSTATE_VERTEX_ELEMENTS, we prepend an extra vertex element for 207 * the generated IDs, with VID in the X channel and IID in the Y 208 * channel. 209 */ 210 { 211 const int grf = vcc->first_vue_grf; 212 const struct toy_src src = tsrc(TOY_FILE_GRF, grf, 0); 213 const enum toy_swizzle swizzle = 214 (tgsi->system_values[slot].semantic_name == 215 TGSI_SEMANTIC_INSTANCEID) ? TOY_SWIZZLE_Y : TOY_SWIZZLE_X; 216 217 tc_MOV(tc, tdst_d(dst), tsrc_d(tsrc_swizzle1(src, swizzle))); 218 } 219 break; 220 case TGSI_SEMANTIC_PRIMID: 221 default: 222 tc_fail(tc, "unhandled system value"); 223 tc_MOV(tc, dst, tsrc_imm_d(0)); 224 break; 225 } 226 } 227 228 static void 229 vs_lower_opcode_tgsi_direct(struct vs_compile_context *vcc, 230 struct toy_inst *inst) 231 { 232 struct toy_compiler *tc = &vcc->tc; 233 int dim, idx; 234 235 assert(inst->src[0].file == TOY_FILE_IMM); 236 dim = inst->src[0].val32; 237 238 assert(inst->src[1].file == TOY_FILE_IMM); 239 idx = inst->src[1].val32; 240 241 switch (inst->opcode) { 242 case TOY_OPCODE_TGSI_IN: 243 vs_lower_opcode_tgsi_in(vcc, inst->dst, dim, idx); 244 break; 245 case TOY_OPCODE_TGSI_CONST: 246 if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) 247 vs_lower_opcode_tgsi_const_gen7(vcc, inst->dst, dim, inst->src[1]); 248 else 249 vs_lower_opcode_tgsi_const_gen6(vcc, inst->dst, dim, inst->src[1]); 250 break; 251 case TOY_OPCODE_TGSI_SV: 252 vs_lower_opcode_tgsi_sv(vcc, inst->dst, dim, idx); 253 break; 254 case TOY_OPCODE_TGSI_IMM: 255 assert(!dim); 256 vs_lower_opcode_tgsi_imm(vcc, inst->dst, idx); 257 break; 258 default: 259 tc_fail(tc, "unhandled TGSI fetch"); 260 break; 261 } 262 263 tc_discard_inst(tc, inst); 264 } 265 266 static void 267 vs_lower_opcode_tgsi_indirect(struct vs_compile_context *vcc, 268 struct toy_inst *inst) 269 { 270 struct toy_compiler *tc = &vcc->tc; 271 enum tgsi_file_type file; 272 int dim, idx; 273 struct toy_src indirect_dim, indirect_idx; 274 275 assert(inst->src[0].file == TOY_FILE_IMM); 276 file = inst->src[0].val32; 277 278 assert(inst->src[1].file == TOY_FILE_IMM); 279 dim = inst->src[1].val32; 280 indirect_dim = inst->src[2]; 281 282 assert(inst->src[3].file == TOY_FILE_IMM); 283 idx = inst->src[3].val32; 284 indirect_idx = inst->src[4]; 285 286 /* no dimension indirection */ 287 assert(indirect_dim.file == TOY_FILE_IMM); 288 dim += indirect_dim.val32; 289 290 switch (inst->opcode) { 291 case TOY_OPCODE_TGSI_INDIRECT_FETCH: 292 if (file == TGSI_FILE_CONSTANT) { 293 if (idx) { 294 struct toy_dst tmp = tc_alloc_tmp(tc); 295 296 tc_ADD(tc, tmp, indirect_idx, tsrc_imm_d(idx)); 297 indirect_idx = tsrc_from(tmp); 298 } 299 300 if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) 301 vs_lower_opcode_tgsi_const_gen7(vcc, inst->dst, dim, indirect_idx); 302 else 303 vs_lower_opcode_tgsi_const_gen6(vcc, inst->dst, dim, indirect_idx); 304 break; 305 } 306 /* fall through */ 307 case TOY_OPCODE_TGSI_INDIRECT_STORE: 308 default: 309 tc_fail(tc, "unhandled TGSI indirection"); 310 break; 311 } 312 313 tc_discard_inst(tc, inst); 314 } 315 316 /** 317 * Emit instructions to move sampling parameters to the message registers. 318 */ 319 static int 320 vs_add_sampler_params(struct toy_compiler *tc, int msg_type, int base_mrf, 321 struct toy_src coords, int num_coords, 322 struct toy_src bias_or_lod, struct toy_src ref_or_si, 323 struct toy_src ddx, struct toy_src ddy, int num_derivs) 324 { 325 const unsigned coords_writemask = (1 << num_coords) - 1; 326 struct toy_dst m[3]; 327 int num_params, i; 328 329 assert(num_coords <= 4); 330 assert(num_derivs <= 3 && num_derivs <= num_coords); 331 332 for (i = 0; i < ARRAY_SIZE(m); i++) 333 m[i] = tdst(TOY_FILE_MRF, base_mrf + i, 0); 334 335 switch (msg_type) { 336 case GEN6_MSG_SAMPLER_SAMPLE_L: 337 tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords); 338 tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_X), bias_or_lod); 339 num_params = 5; 340 break; 341 case GEN6_MSG_SAMPLER_SAMPLE_D: 342 tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords); 343 tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_XZ), 344 tsrc_swizzle(ddx, 0, 0, 1, 1)); 345 tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_YW), 346 tsrc_swizzle(ddy, 0, 0, 1, 1)); 347 if (num_derivs > 2) { 348 tc_MOV(tc, tdst_writemask(m[2], TOY_WRITEMASK_X), 349 tsrc_swizzle1(ddx, 2)); 350 tc_MOV(tc, tdst_writemask(m[2], TOY_WRITEMASK_Y), 351 tsrc_swizzle1(ddy, 2)); 352 } 353 num_params = 4 + num_derivs * 2; 354 break; 355 case GEN6_MSG_SAMPLER_SAMPLE_L_C: 356 tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords); 357 tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_X), ref_or_si); 358 tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_Y), bias_or_lod); 359 num_params = 6; 360 break; 361 case GEN6_MSG_SAMPLER_LD: 362 assert(num_coords <= 3); 363 tc_MOV(tc, tdst_writemask(tdst_d(m[0]), coords_writemask), coords); 364 tc_MOV(tc, tdst_writemask(tdst_d(m[0]), TOY_WRITEMASK_W), bias_or_lod); 365 if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) { 366 num_params = 4; 367 } 368 else { 369 tc_MOV(tc, tdst_writemask(tdst_d(m[1]), TOY_WRITEMASK_X), ref_or_si); 370 num_params = 5; 371 } 372 break; 373 case GEN6_MSG_SAMPLER_RESINFO: 374 tc_MOV(tc, tdst_writemask(tdst_d(m[0]), TOY_WRITEMASK_X), bias_or_lod); 375 num_params = 1; 376 break; 377 default: 378 tc_fail(tc, "unknown sampler opcode"); 379 num_params = 0; 380 break; 381 } 382 383 return (num_params + 3) / 4; 384 } 385 386 /** 387 * Set up message registers and return the message descriptor for sampling. 388 */ 389 static struct toy_src 390 vs_prepare_tgsi_sampling(struct vs_compile_context *vcc, 391 const struct toy_inst *inst, 392 int base_mrf, unsigned *ret_sampler_index) 393 { 394 struct toy_compiler *tc = &vcc->tc; 395 unsigned simd_mode, msg_type, msg_len, sampler_index, binding_table_index; 396 struct toy_src coords, ddx, ddy, bias_or_lod, ref_or_si; 397 int num_coords, ref_pos, num_derivs; 398 int sampler_src; 399 400 simd_mode = GEN6_MSG_SAMPLER_SIMD4X2; 401 402 coords = inst->src[0]; 403 ddx = tsrc_null(); 404 ddy = tsrc_null(); 405 bias_or_lod = tsrc_null(); 406 ref_or_si = tsrc_null(); 407 num_derivs = 0; 408 sampler_src = 1; 409 410 num_coords = tgsi_util_get_texture_coord_dim(inst->tex.target); 411 ref_pos = tgsi_util_get_shadow_ref_src_index(inst->tex.target); 412 413 /* extract the parameters */ 414 switch (inst->opcode) { 415 case TOY_OPCODE_TGSI_TXD: 416 if (ref_pos >= 0) { 417 assert(ref_pos < 4); 418 419 msg_type = GEN7_MSG_SAMPLER_SAMPLE_D_C; 420 ref_or_si = tsrc_swizzle1(coords, ref_pos); 421 422 if (ilo_dev_gen(tc->dev) < ILO_GEN(7.5)) 423 tc_fail(tc, "TXD with shadow sampler not supported"); 424 } 425 else { 426 msg_type = GEN6_MSG_SAMPLER_SAMPLE_D; 427 } 428 429 ddx = inst->src[1]; 430 ddy = inst->src[2]; 431 num_derivs = num_coords; 432 sampler_src = 3; 433 break; 434 case TOY_OPCODE_TGSI_TXL: 435 if (ref_pos >= 0) { 436 assert(ref_pos < 3); 437 438 msg_type = GEN6_MSG_SAMPLER_SAMPLE_L_C; 439 ref_or_si = tsrc_swizzle1(coords, ref_pos); 440 } 441 else { 442 msg_type = GEN6_MSG_SAMPLER_SAMPLE_L; 443 } 444 445 bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_W); 446 break; 447 case TOY_OPCODE_TGSI_TXF: 448 msg_type = GEN6_MSG_SAMPLER_LD; 449 450 switch (inst->tex.target) { 451 case TGSI_TEXTURE_2D_MSAA: 452 case TGSI_TEXTURE_2D_ARRAY_MSAA: 453 assert(ref_pos >= 0 && ref_pos < 4); 454 /* lod is always 0 */ 455 bias_or_lod = tsrc_imm_d(0); 456 ref_or_si = tsrc_swizzle1(coords, ref_pos); 457 break; 458 default: 459 bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_W); 460 break; 461 } 462 463 /* offset the coordinates */ 464 if (!tsrc_is_null(inst->tex.offsets[0])) { 465 struct toy_dst tmp; 466 467 tmp = tc_alloc_tmp(tc); 468 tc_ADD(tc, tmp, coords, inst->tex.offsets[0]); 469 coords = tsrc_from(tmp); 470 } 471 472 sampler_src = 1; 473 break; 474 case TOY_OPCODE_TGSI_TXQ: 475 msg_type = GEN6_MSG_SAMPLER_RESINFO; 476 num_coords = 0; 477 bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_X); 478 break; 479 case TOY_OPCODE_TGSI_TXQ_LZ: 480 msg_type = GEN6_MSG_SAMPLER_RESINFO; 481 num_coords = 0; 482 sampler_src = 0; 483 break; 484 case TOY_OPCODE_TGSI_TXL2: 485 if (ref_pos >= 0) { 486 assert(ref_pos < 4); 487 488 msg_type = GEN6_MSG_SAMPLER_SAMPLE_L_C; 489 ref_or_si = tsrc_swizzle1(coords, ref_pos); 490 } 491 else { 492 msg_type = GEN6_MSG_SAMPLER_SAMPLE_L; 493 } 494 495 bias_or_lod = tsrc_swizzle1(inst->src[1], TOY_SWIZZLE_X); 496 sampler_src = 2; 497 break; 498 default: 499 assert(!"unhandled sampling opcode"); 500 if (ret_sampler_index) 501 *ret_sampler_index = 0; 502 return tsrc_null(); 503 break; 504 } 505 506 assert(inst->src[sampler_src].file == TOY_FILE_IMM); 507 sampler_index = inst->src[sampler_src].val32; 508 binding_table_index = vcc->shader->bt.tex_base + sampler_index; 509 510 /* 511 * From the Sandy Bridge PRM, volume 4 part 1, page 18: 512 * 513 * "Note that the (cube map) coordinates delivered to the sampling 514 * engine must already have been divided by the component with the 515 * largest absolute value." 516 */ 517 switch (inst->tex.target) { 518 case TGSI_TEXTURE_CUBE: 519 case TGSI_TEXTURE_SHADOWCUBE: 520 case TGSI_TEXTURE_CUBE_ARRAY: 521 case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 522 /* TXQ does not need coordinates */ 523 if (num_coords >= 3) { 524 struct toy_dst tmp, max; 525 struct toy_src abs_coords[3]; 526 unsigned i; 527 528 tmp = tc_alloc_tmp(tc); 529 max = tdst_writemask(tmp, TOY_WRITEMASK_W); 530 531 for (i = 0; i < 3; i++) 532 abs_coords[i] = tsrc_absolute(tsrc_swizzle1(coords, i)); 533 534 tc_SEL(tc, max, abs_coords[0], abs_coords[0], GEN6_COND_GE); 535 tc_SEL(tc, max, tsrc_from(max), abs_coords[0], GEN6_COND_GE); 536 tc_INV(tc, max, tsrc_from(max)); 537 538 for (i = 0; i < 3; i++) 539 tc_MUL(tc, tdst_writemask(tmp, 1 << i), coords, tsrc_from(max)); 540 541 coords = tsrc_from(tmp); 542 } 543 break; 544 } 545 546 /* set up sampler parameters */ 547 msg_len = vs_add_sampler_params(tc, msg_type, base_mrf, 548 coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs); 549 550 /* 551 * From the Sandy Bridge PRM, volume 4 part 1, page 136: 552 * 553 * "The maximum message length allowed to the sampler is 11. This would 554 * disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of 555 * SIMD16." 556 */ 557 if (msg_len > 11) 558 tc_fail(tc, "maximum length for messages to the sampler is 11"); 559 560 if (ret_sampler_index) 561 *ret_sampler_index = sampler_index; 562 563 return tsrc_imm_mdesc_sampler(tc, msg_len, 1, 564 false, simd_mode, msg_type, sampler_index, binding_table_index); 565 } 566 567 static void 568 vs_lower_opcode_tgsi_sampling(struct vs_compile_context *vcc, 569 struct toy_inst *inst) 570 { 571 struct toy_compiler *tc = &vcc->tc; 572 struct toy_src desc; 573 struct toy_dst dst, tmp; 574 unsigned sampler_index; 575 int swizzles[4], i; 576 unsigned swizzle_zero_mask, swizzle_one_mask, swizzle_normal_mask; 577 bool need_filter; 578 579 desc = vs_prepare_tgsi_sampling(vcc, inst, 580 vcc->first_free_mrf, &sampler_index); 581 582 switch (inst->opcode) { 583 case TOY_OPCODE_TGSI_TXF: 584 case TOY_OPCODE_TGSI_TXQ: 585 case TOY_OPCODE_TGSI_TXQ_LZ: 586 need_filter = false; 587 break; 588 default: 589 need_filter = true; 590 break; 591 } 592 593 toy_compiler_lower_to_send(tc, inst, false, GEN6_SFID_SAMPLER); 594 inst->src[0] = tsrc(TOY_FILE_MRF, vcc->first_free_mrf, 0); 595 inst->src[1] = desc; 596 597 /* write to a temp first */ 598 tmp = tc_alloc_tmp(tc); 599 tmp.type = inst->dst.type; 600 dst = inst->dst; 601 inst->dst = tmp; 602 603 tc_move_inst(tc, inst); 604 605 if (need_filter) { 606 assert(sampler_index < vcc->variant->num_sampler_views); 607 swizzles[0] = vcc->variant->sampler_view_swizzles[sampler_index].r; 608 swizzles[1] = vcc->variant->sampler_view_swizzles[sampler_index].g; 609 swizzles[2] = vcc->variant->sampler_view_swizzles[sampler_index].b; 610 swizzles[3] = vcc->variant->sampler_view_swizzles[sampler_index].a; 611 } 612 else { 613 swizzles[0] = PIPE_SWIZZLE_X; 614 swizzles[1] = PIPE_SWIZZLE_Y; 615 swizzles[2] = PIPE_SWIZZLE_Z; 616 swizzles[3] = PIPE_SWIZZLE_W; 617 } 618 619 swizzle_zero_mask = 0; 620 swizzle_one_mask = 0; 621 swizzle_normal_mask = 0; 622 for (i = 0; i < 4; i++) { 623 switch (swizzles[i]) { 624 case PIPE_SWIZZLE_0: 625 swizzle_zero_mask |= 1 << i; 626 swizzles[i] = i; 627 break; 628 case PIPE_SWIZZLE_1: 629 swizzle_one_mask |= 1 << i; 630 swizzles[i] = i; 631 break; 632 default: 633 swizzle_normal_mask |= 1 << i; 634 break; 635 } 636 } 637 638 /* swizzle the results */ 639 if (swizzle_normal_mask) { 640 tc_MOV(tc, tdst_writemask(dst, swizzle_normal_mask), 641 tsrc_swizzle(tsrc_from(tmp), swizzles[0], 642 swizzles[1], swizzles[2], swizzles[3])); 643 } 644 if (swizzle_zero_mask) 645 tc_MOV(tc, tdst_writemask(dst, swizzle_zero_mask), tsrc_imm_f(0.0f)); 646 if (swizzle_one_mask) 647 tc_MOV(tc, tdst_writemask(dst, swizzle_one_mask), tsrc_imm_f(1.0f)); 648 } 649 650 static void 651 vs_lower_opcode_urb_write(struct toy_compiler *tc, struct toy_inst *inst) 652 { 653 /* vs_write_vue() has set up the message registers */ 654 toy_compiler_lower_to_send(tc, inst, false, GEN6_SFID_URB); 655 } 656 657 static void 658 vs_lower_virtual_opcodes(struct vs_compile_context *vcc) 659 { 660 struct toy_compiler *tc = &vcc->tc; 661 struct toy_inst *inst; 662 663 tc_head(tc); 664 while ((inst = tc_next(tc)) != NULL) { 665 switch (inst->opcode) { 666 case TOY_OPCODE_TGSI_IN: 667 case TOY_OPCODE_TGSI_CONST: 668 case TOY_OPCODE_TGSI_SV: 669 case TOY_OPCODE_TGSI_IMM: 670 vs_lower_opcode_tgsi_direct(vcc, inst); 671 break; 672 case TOY_OPCODE_TGSI_INDIRECT_FETCH: 673 case TOY_OPCODE_TGSI_INDIRECT_STORE: 674 vs_lower_opcode_tgsi_indirect(vcc, inst); 675 break; 676 case TOY_OPCODE_TGSI_TEX: 677 case TOY_OPCODE_TGSI_TXB: 678 case TOY_OPCODE_TGSI_TXD: 679 case TOY_OPCODE_TGSI_TXL: 680 case TOY_OPCODE_TGSI_TXP: 681 case TOY_OPCODE_TGSI_TXF: 682 case TOY_OPCODE_TGSI_TXQ: 683 case TOY_OPCODE_TGSI_TXQ_LZ: 684 case TOY_OPCODE_TGSI_TEX2: 685 case TOY_OPCODE_TGSI_TXB2: 686 case TOY_OPCODE_TGSI_TXL2: 687 case TOY_OPCODE_TGSI_SAMPLE: 688 case TOY_OPCODE_TGSI_SAMPLE_I: 689 case TOY_OPCODE_TGSI_SAMPLE_I_MS: 690 case TOY_OPCODE_TGSI_SAMPLE_B: 691 case TOY_OPCODE_TGSI_SAMPLE_C: 692 case TOY_OPCODE_TGSI_SAMPLE_C_LZ: 693 case TOY_OPCODE_TGSI_SAMPLE_D: 694 case TOY_OPCODE_TGSI_SAMPLE_L: 695 case TOY_OPCODE_TGSI_GATHER4: 696 case TOY_OPCODE_TGSI_SVIEWINFO: 697 case TOY_OPCODE_TGSI_SAMPLE_POS: 698 case TOY_OPCODE_TGSI_SAMPLE_INFO: 699 vs_lower_opcode_tgsi_sampling(vcc, inst); 700 break; 701 case TOY_OPCODE_INV: 702 case TOY_OPCODE_LOG: 703 case TOY_OPCODE_EXP: 704 case TOY_OPCODE_SQRT: 705 case TOY_OPCODE_RSQ: 706 case TOY_OPCODE_SIN: 707 case TOY_OPCODE_COS: 708 case TOY_OPCODE_FDIV: 709 case TOY_OPCODE_POW: 710 case TOY_OPCODE_INT_DIV_QUOTIENT: 711 case TOY_OPCODE_INT_DIV_REMAINDER: 712 toy_compiler_lower_math(tc, inst); 713 break; 714 case TOY_OPCODE_URB_WRITE: 715 vs_lower_opcode_urb_write(tc, inst); 716 break; 717 default: 718 if (inst->opcode > 127) 719 tc_fail(tc, "unhandled virtual opcode"); 720 break; 721 } 722 } 723 } 724 725 /** 726 * Compile the shader. 727 */ 728 static bool 729 vs_compile(struct vs_compile_context *vcc) 730 { 731 struct toy_compiler *tc = &vcc->tc; 732 struct ilo_shader *sh = vcc->shader; 733 734 vs_lower_virtual_opcodes(vcc); 735 toy_compiler_legalize_for_ra(tc); 736 toy_compiler_optimize(tc); 737 toy_compiler_allocate_registers(tc, 738 vcc->first_free_grf, 739 vcc->last_free_grf, 740 vcc->num_grf_per_vrf); 741 toy_compiler_legalize_for_asm(tc); 742 743 if (tc->fail) { 744 ilo_err("failed to legalize VS instructions: %s\n", tc->reason); 745 return false; 746 } 747 748 if (ilo_debug & ILO_DEBUG_VS) { 749 ilo_printf("legalized instructions:\n"); 750 toy_compiler_dump(tc); 751 ilo_printf("\n"); 752 } 753 754 if (true) { 755 sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size); 756 } 757 else { 758 static const uint32_t microcode[] = { 759 /* fill in the microcode here */ 760 0x0, 0x0, 0x0, 0x0, 761 }; 762 const bool swap = true; 763 764 sh->kernel_size = sizeof(microcode); 765 sh->kernel = MALLOC(sh->kernel_size); 766 767 if (sh->kernel) { 768 const int num_dwords = sizeof(microcode) / 4; 769 const uint32_t *src = microcode; 770 uint32_t *dst = (uint32_t *) sh->kernel; 771 int i; 772 773 for (i = 0; i < num_dwords; i += 4) { 774 if (swap) { 775 dst[i + 0] = src[i + 3]; 776 dst[i + 1] = src[i + 2]; 777 dst[i + 2] = src[i + 1]; 778 dst[i + 3] = src[i + 0]; 779 } 780 else { 781 memcpy(dst, src, 16); 782 } 783 } 784 } 785 } 786 787 if (!sh->kernel) { 788 ilo_err("failed to compile VS: %s\n", tc->reason); 789 return false; 790 } 791 792 if (ilo_debug & ILO_DEBUG_VS) { 793 ilo_printf("disassembly:\n"); 794 toy_compiler_disassemble(tc->dev, sh->kernel, sh->kernel_size, false); 795 ilo_printf("\n"); 796 } 797 798 return true; 799 } 800 801 /** 802 * Collect the toy registers to be written to the VUE. 803 */ 804 static int 805 vs_collect_outputs(struct vs_compile_context *vcc, struct toy_src *outs) 806 { 807 const struct toy_tgsi *tgsi = &vcc->tgsi; 808 unsigned i; 809 810 for (i = 0; i < vcc->shader->out.count; i++) { 811 const int slot = vcc->output_map[i]; 812 const int vrf = (slot >= 0) ? toy_tgsi_get_vrf(tgsi, 813 TGSI_FILE_OUTPUT, 0, tgsi->outputs[slot].index) : -1; 814 struct toy_src src; 815 816 if (vrf >= 0) { 817 struct toy_dst dst; 818 819 dst = tdst(TOY_FILE_VRF, vrf, 0); 820 src = tsrc_from(dst); 821 822 if (i == 0) { 823 /* PSIZE is at channel W */ 824 tc_MOV(&vcc->tc, tdst_writemask(dst, TOY_WRITEMASK_W), 825 tsrc_swizzle1(src, TOY_SWIZZLE_X)); 826 827 /* the other channels are for the header */ 828 dst = tdst_d(dst); 829 tc_MOV(&vcc->tc, tdst_writemask(dst, TOY_WRITEMASK_XYZ), 830 tsrc_imm_d(0)); 831 } 832 else { 833 /* initialize unused channels to 0.0f */ 834 if (tgsi->outputs[slot].undefined_mask) { 835 dst = tdst_writemask(dst, tgsi->outputs[slot].undefined_mask); 836 tc_MOV(&vcc->tc, dst, tsrc_imm_f(0.0f)); 837 } 838 } 839 } 840 else { 841 /* XXX this is too ugly */ 842 if (vcc->shader->out.semantic_names[i] == TGSI_SEMANTIC_CLIPDIST && 843 slot < 0) { 844 /* ok, we need to compute clip distance */ 845 int clipvert_slot = -1, clipvert_vrf, j; 846 847 for (j = 0; j < tgsi->num_outputs; j++) { 848 if (tgsi->outputs[j].semantic_name == 849 TGSI_SEMANTIC_CLIPVERTEX) { 850 clipvert_slot = j; 851 break; 852 } 853 else if (tgsi->outputs[j].semantic_name == 854 TGSI_SEMANTIC_POSITION) { 855 /* remember pos, but keep looking */ 856 clipvert_slot = j; 857 } 858 } 859 860 clipvert_vrf = (clipvert_slot >= 0) ? toy_tgsi_get_vrf(tgsi, 861 TGSI_FILE_OUTPUT, 0, tgsi->outputs[clipvert_slot].index) : -1; 862 if (clipvert_vrf >= 0) { 863 struct toy_dst tmp = tc_alloc_tmp(&vcc->tc); 864 struct toy_src clipvert = tsrc(TOY_FILE_VRF, clipvert_vrf, 0); 865 int first_ucp, last_ucp; 866 867 if (vcc->shader->out.semantic_indices[i]) { 868 first_ucp = 4; 869 last_ucp = MIN2(7, vcc->variant->u.vs.num_ucps - 1); 870 } 871 else { 872 first_ucp = 0; 873 last_ucp = MIN2(3, vcc->variant->u.vs.num_ucps - 1); 874 } 875 876 for (j = first_ucp; j <= last_ucp; j++) { 877 const int plane_grf = vcc->first_ucp_grf + j / 2; 878 const int plane_subreg = (j & 1) * 16; 879 const struct toy_src plane = tsrc_rect(tsrc(TOY_FILE_GRF, 880 plane_grf, plane_subreg), TOY_RECT_041); 881 const unsigned writemask = 1 << ((j >= 4) ? j - 4 : j); 882 883 tc_DP4(&vcc->tc, tdst_writemask(tmp, writemask), 884 clipvert, plane); 885 } 886 887 src = tsrc_from(tmp); 888 } 889 else { 890 src = tsrc_imm_f(0.0f); 891 } 892 } 893 else { 894 src = (i == 0) ? tsrc_imm_d(0) : tsrc_imm_f(0.0f); 895 } 896 } 897 898 outs[i] = src; 899 } 900 901 return i; 902 } 903 904 /** 905 * Emit instructions to write the VUE. 906 */ 907 static void 908 vs_write_vue(struct vs_compile_context *vcc) 909 { 910 struct toy_compiler *tc = &vcc->tc; 911 struct toy_src outs[PIPE_MAX_SHADER_OUTPUTS]; 912 struct toy_dst header; 913 struct toy_src r0; 914 struct toy_inst *inst; 915 int sent_attrs, total_attrs; 916 917 header = tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0)); 918 r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0)); 919 inst = tc_MOV(tc, header, r0); 920 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; 921 922 if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) { 923 inst = tc_OR(tc, tdst_offset(header, 0, 5), 924 tsrc_rect(tsrc_offset(r0, 0, 5), TOY_RECT_010), 925 tsrc_rect(tsrc_imm_ud(0xff00), TOY_RECT_010)); 926 inst->exec_size = GEN6_EXECSIZE_1; 927 inst->access_mode = GEN6_ALIGN_1; 928 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; 929 } 930 931 total_attrs = vs_collect_outputs(vcc, outs); 932 sent_attrs = 0; 933 while (sent_attrs < total_attrs) { 934 struct toy_src desc; 935 int mrf = vcc->first_free_mrf + 1, avail_mrf_for_attrs; 936 int num_attrs, msg_len, i; 937 bool eot; 938 939 num_attrs = total_attrs - sent_attrs; 940 eot = true; 941 942 /* see if we need another message */ 943 avail_mrf_for_attrs = vcc->last_free_mrf - mrf + 1; 944 if (num_attrs > avail_mrf_for_attrs) { 945 /* 946 * From the Sandy Bridge PRM, volume 4 part 2, page 22: 947 * 948 * "Offset. This field specifies a destination offset (in 256-bit 949 * units) from the start of the URB entry(s), as referenced by 950 * URB Return Handle n, at which the data (if any) will be 951 * written." 952 * 953 * As we need to offset the following messages, we must make sure 954 * this one writes an even number of attributes. 955 */ 956 num_attrs = avail_mrf_for_attrs & ~1; 957 eot = false; 958 } 959 960 if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) { 961 /* do not forget about the header */ 962 msg_len = 1 + num_attrs; 963 } 964 else { 965 /* 966 * From the Sandy Bridge PRM, volume 4 part 2, page 26: 967 * 968 * "At least 256 bits per vertex (512 bits total, M1 & M2) must 969 * be written. Writing only 128 bits per vertex (256 bits 970 * total, M1 only) results in UNDEFINED operation." 971 * 972 * "[DevSNB] Interleave writes must be in multiples of 256 per 973 * vertex." 974 * 975 * That is, we must write or appear to write an even number of 976 * attributes, starting from two. 977 */ 978 if (num_attrs % 2 && num_attrs == avail_mrf_for_attrs) { 979 num_attrs--; 980 eot = false; 981 } 982 983 msg_len = 1 + align(num_attrs, 2); 984 } 985 986 for (i = 0; i < num_attrs; i++) 987 tc_MOV(tc, tdst(TOY_FILE_MRF, mrf++, 0), outs[sent_attrs + i]); 988 989 assert(sent_attrs % 2 == 0); 990 desc = tsrc_imm_mdesc_urb(tc, eot, msg_len, 0, 991 eot, true, false, true, sent_attrs / 2, 0); 992 993 tc_add2(tc, TOY_OPCODE_URB_WRITE, tdst_null(), tsrc_from(header), desc); 994 995 sent_attrs += num_attrs; 996 } 997 } 998 999 /** 1000 * Set up shader inputs for fixed-function units. 1001 */ 1002 static void 1003 vs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi) 1004 { 1005 int num_attrs, i; 1006 1007 /* vertex/instance id is the first VE if exists */ 1008 for (i = 0; i < tgsi->num_system_values; i++) { 1009 bool found = false; 1010 1011 switch (tgsi->system_values[i].semantic_name) { 1012 case TGSI_SEMANTIC_INSTANCEID: 1013 case TGSI_SEMANTIC_VERTEXID: 1014 found = true; 1015 break; 1016 default: 1017 break; 1018 } 1019 1020 if (found) { 1021 sh->in.semantic_names[sh->in.count] = 1022 tgsi->system_values[i].semantic_name; 1023 sh->in.semantic_indices[sh->in.count] = 1024 tgsi->system_values[i].semantic_index; 1025 sh->in.interp[sh->in.count] = TGSI_INTERPOLATE_CONSTANT; 1026 sh->in.centroid[sh->in.count] = false; 1027 1028 sh->in.count++; 1029 break; 1030 } 1031 } 1032 1033 num_attrs = 0; 1034 for (i = 0; i < tgsi->num_inputs; i++) { 1035 assert(tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_GENERIC); 1036 if (tgsi->inputs[i].semantic_index >= num_attrs) 1037 num_attrs = tgsi->inputs[i].semantic_index + 1; 1038 } 1039 assert(num_attrs <= PIPE_MAX_ATTRIBS); 1040 1041 /* VF cannot remap VEs. VE[i] must be used as GENERIC[i]. */ 1042 for (i = 0; i < num_attrs; i++) { 1043 sh->in.semantic_names[sh->in.count + i] = TGSI_SEMANTIC_GENERIC; 1044 sh->in.semantic_indices[sh->in.count + i] = i; 1045 sh->in.interp[sh->in.count + i] = TGSI_INTERPOLATE_CONSTANT; 1046 sh->in.centroid[sh->in.count + i] = false; 1047 } 1048 1049 sh->in.count += num_attrs; 1050 1051 sh->in.has_pos = false; 1052 sh->in.has_linear_interp = false; 1053 sh->in.barycentric_interpolation_mode = 0; 1054 } 1055 1056 /** 1057 * Set up shader outputs for fixed-function units. 1058 */ 1059 static void 1060 vs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi, 1061 bool output_clipdist, int *output_map) 1062 { 1063 int psize_slot = -1, pos_slot = -1; 1064 int clipdist_slot[2] = { -1, -1 }; 1065 int color_slot[4] = { -1, -1, -1, -1 }; 1066 int num_outs, i; 1067 1068 /* find out the slots of outputs that need special care */ 1069 for (i = 0; i < tgsi->num_outputs; i++) { 1070 switch (tgsi->outputs[i].semantic_name) { 1071 case TGSI_SEMANTIC_PSIZE: 1072 psize_slot = i; 1073 break; 1074 case TGSI_SEMANTIC_POSITION: 1075 pos_slot = i; 1076 break; 1077 case TGSI_SEMANTIC_CLIPDIST: 1078 if (tgsi->outputs[i].semantic_index) 1079 clipdist_slot[1] = i; 1080 else 1081 clipdist_slot[0] = i; 1082 break; 1083 case TGSI_SEMANTIC_COLOR: 1084 if (tgsi->outputs[i].semantic_index) 1085 color_slot[2] = i; 1086 else 1087 color_slot[0] = i; 1088 break; 1089 case TGSI_SEMANTIC_BCOLOR: 1090 if (tgsi->outputs[i].semantic_index) 1091 color_slot[3] = i; 1092 else 1093 color_slot[1] = i; 1094 break; 1095 default: 1096 break; 1097 } 1098 } 1099 1100 /* the first two VUEs are always PSIZE and POSITION */ 1101 num_outs = 2; 1102 output_map[0] = psize_slot; 1103 output_map[1] = pos_slot; 1104 1105 sh->out.register_indices[0] = 1106 (psize_slot >= 0) ? tgsi->outputs[psize_slot].index : -1; 1107 sh->out.semantic_names[0] = TGSI_SEMANTIC_PSIZE; 1108 sh->out.semantic_indices[0] = 0; 1109 1110 sh->out.register_indices[1] = 1111 (pos_slot >= 0) ? tgsi->outputs[pos_slot].index : -1; 1112 sh->out.semantic_names[1] = TGSI_SEMANTIC_POSITION; 1113 sh->out.semantic_indices[1] = 0; 1114 1115 sh->out.has_pos = true; 1116 1117 /* followed by optional clip distances */ 1118 if (output_clipdist) { 1119 sh->out.register_indices[num_outs] = 1120 (clipdist_slot[0] >= 0) ? tgsi->outputs[clipdist_slot[0]].index : -1; 1121 sh->out.semantic_names[num_outs] = TGSI_SEMANTIC_CLIPDIST; 1122 sh->out.semantic_indices[num_outs] = 0; 1123 output_map[num_outs++] = clipdist_slot[0]; 1124 1125 sh->out.register_indices[num_outs] = 1126 (clipdist_slot[1] >= 0) ? tgsi->outputs[clipdist_slot[1]].index : -1; 1127 sh->out.semantic_names[num_outs] = TGSI_SEMANTIC_CLIPDIST; 1128 sh->out.semantic_indices[num_outs] = 1; 1129 output_map[num_outs++] = clipdist_slot[1]; 1130 } 1131 1132 /* 1133 * make BCOLOR follow COLOR so that we can make use of 1134 * ATTRIBUTE_SWIZZLE_INPUTATTR_FACING in 3DSTATE_SF 1135 */ 1136 for (i = 0; i < 4; i++) { 1137 const int slot = color_slot[i]; 1138 1139 if (slot < 0) 1140 continue; 1141 1142 sh->out.register_indices[num_outs] = tgsi->outputs[slot].index; 1143 sh->out.semantic_names[num_outs] = tgsi->outputs[slot].semantic_name; 1144 sh->out.semantic_indices[num_outs] = tgsi->outputs[slot].semantic_index; 1145 1146 output_map[num_outs++] = slot; 1147 } 1148 1149 /* add the rest of the outputs */ 1150 for (i = 0; i < tgsi->num_outputs; i++) { 1151 switch (tgsi->outputs[i].semantic_name) { 1152 case TGSI_SEMANTIC_PSIZE: 1153 case TGSI_SEMANTIC_POSITION: 1154 case TGSI_SEMANTIC_CLIPDIST: 1155 case TGSI_SEMANTIC_COLOR: 1156 case TGSI_SEMANTIC_BCOLOR: 1157 break; 1158 default: 1159 sh->out.register_indices[num_outs] = tgsi->outputs[i].index; 1160 sh->out.semantic_names[num_outs] = tgsi->outputs[i].semantic_name; 1161 sh->out.semantic_indices[num_outs] = tgsi->outputs[i].semantic_index; 1162 output_map[num_outs++] = i; 1163 break; 1164 } 1165 } 1166 1167 sh->out.count = num_outs; 1168 } 1169 1170 /** 1171 * Translate the TGSI tokens. 1172 */ 1173 static bool 1174 vs_setup_tgsi(struct toy_compiler *tc, const struct tgsi_token *tokens, 1175 struct toy_tgsi *tgsi) 1176 { 1177 if (ilo_debug & ILO_DEBUG_VS) { 1178 ilo_printf("dumping vertex shader\n"); 1179 ilo_printf("\n"); 1180 1181 tgsi_dump(tokens, 0); 1182 ilo_printf("\n"); 1183 } 1184 1185 toy_compiler_translate_tgsi(tc, tokens, true, tgsi); 1186 if (tc->fail) { 1187 ilo_err("failed to translate VS TGSI tokens: %s\n", tc->reason); 1188 return false; 1189 } 1190 1191 if (ilo_debug & ILO_DEBUG_VS) { 1192 ilo_printf("TGSI translator:\n"); 1193 toy_tgsi_dump(tgsi); 1194 ilo_printf("\n"); 1195 toy_compiler_dump(tc); 1196 ilo_printf("\n"); 1197 } 1198 1199 return true; 1200 } 1201 1202 /** 1203 * Set up VS compile context. This includes translating the TGSI tokens. 1204 */ 1205 static bool 1206 vs_setup(struct vs_compile_context *vcc, 1207 const struct ilo_shader_state *state, 1208 const struct ilo_shader_variant *variant) 1209 { 1210 int num_consts; 1211 1212 memset(vcc, 0, sizeof(*vcc)); 1213 1214 vcc->shader = CALLOC_STRUCT(ilo_shader); 1215 if (!vcc->shader) 1216 return false; 1217 1218 vcc->variant = variant; 1219 1220 toy_compiler_init(&vcc->tc, state->info.dev); 1221 vcc->tc.templ.access_mode = GEN6_ALIGN_16; 1222 vcc->tc.templ.exec_size = GEN6_EXECSIZE_8; 1223 vcc->tc.rect_linear_width = 4; 1224 1225 /* 1226 * The classic driver uses the sampler cache (gen6) or the data cache 1227 * (gen7). Why? 1228 */ 1229 vcc->const_cache = GEN6_SFID_DP_CC; 1230 1231 if (!vs_setup_tgsi(&vcc->tc, state->info.tokens, &vcc->tgsi)) { 1232 toy_compiler_cleanup(&vcc->tc); 1233 FREE(vcc->shader); 1234 return false; 1235 } 1236 1237 vs_setup_shader_in(vcc->shader, &vcc->tgsi); 1238 vs_setup_shader_out(vcc->shader, &vcc->tgsi, 1239 (vcc->variant->u.vs.num_ucps > 0), vcc->output_map); 1240 1241 if (vcc->variant->use_pcb && !vcc->tgsi.const_indirect) { 1242 num_consts = (vcc->tgsi.const_count + 1) / 2; 1243 1244 /* 1245 * From the Sandy Bridge PRM, volume 2 part 1, page 138: 1246 * 1247 * "The sum of all four read length fields (each incremented to 1248 * represent the actual read length) must be less than or equal to 1249 * 32" 1250 */ 1251 if (num_consts > 32) 1252 num_consts = 0; 1253 } 1254 else { 1255 num_consts = 0; 1256 } 1257 1258 vcc->shader->skip_cbuf0_upload = (!vcc->tgsi.const_count || num_consts); 1259 vcc->shader->pcb.cbuf0_size = num_consts * (sizeof(float) * 8); 1260 1261 /* r0 is reserved for payload header */ 1262 vcc->first_const_grf = 1; 1263 vcc->first_ucp_grf = vcc->first_const_grf + num_consts; 1264 1265 /* fit each pair of user clip planes into a register */ 1266 vcc->first_vue_grf = vcc->first_ucp_grf + 1267 (vcc->variant->u.vs.num_ucps + 1) / 2; 1268 1269 vcc->first_free_grf = vcc->first_vue_grf + vcc->shader->in.count; 1270 vcc->last_free_grf = 127; 1271 1272 /* m0 is reserved for system routines */ 1273 vcc->first_free_mrf = 1; 1274 vcc->last_free_mrf = 15; 1275 1276 vcc->num_grf_per_vrf = 1; 1277 1278 if (ilo_dev_gen(vcc->tc.dev) >= ILO_GEN(7)) { 1279 vcc->last_free_grf -= 15; 1280 vcc->first_free_mrf = vcc->last_free_grf + 1; 1281 vcc->last_free_mrf = vcc->first_free_mrf + 14; 1282 } 1283 1284 vcc->shader->in.start_grf = vcc->first_const_grf; 1285 vcc->shader->pcb.clip_state_size = 1286 vcc->variant->u.vs.num_ucps * (sizeof(float) * 4); 1287 1288 vcc->shader->bt.tex_base = 0; 1289 vcc->shader->bt.tex_count = vcc->variant->num_sampler_views; 1290 1291 vcc->shader->bt.const_base = vcc->shader->bt.tex_base + 1292 vcc->shader->bt.tex_count; 1293 vcc->shader->bt.const_count = state->info.constant_buffer_count; 1294 1295 vcc->shader->bt.total_count = vcc->shader->bt.const_base + 1296 vcc->shader->bt.const_count; 1297 1298 return true; 1299 } 1300 1301 /** 1302 * Compile the vertex shader. 1303 */ 1304 struct ilo_shader * 1305 ilo_shader_compile_vs(const struct ilo_shader_state *state, 1306 const struct ilo_shader_variant *variant) 1307 { 1308 struct vs_compile_context vcc; 1309 bool need_gs; 1310 1311 if (!vs_setup(&vcc, state, variant)) 1312 return NULL; 1313 1314 if (ilo_dev_gen(vcc.tc.dev) >= ILO_GEN(7)) { 1315 need_gs = false; 1316 } 1317 else { 1318 need_gs = variant->u.vs.rasterizer_discard || 1319 state->info.stream_output.num_outputs; 1320 } 1321 1322 vs_write_vue(&vcc); 1323 1324 if (!vs_compile(&vcc)) { 1325 FREE(vcc.shader); 1326 vcc.shader = NULL; 1327 } 1328 1329 toy_tgsi_cleanup(&vcc.tgsi); 1330 toy_compiler_cleanup(&vcc.tc); 1331 1332 if (need_gs) { 1333 int so_mapping[PIPE_MAX_SHADER_OUTPUTS]; 1334 int i, j; 1335 1336 for (i = 0; i < vcc.tgsi.num_outputs; i++) { 1337 int attr = 0; 1338 1339 for (j = 0; j < vcc.shader->out.count; j++) { 1340 if (vcc.tgsi.outputs[i].semantic_name == 1341 vcc.shader->out.semantic_names[j] && 1342 vcc.tgsi.outputs[i].semantic_index == 1343 vcc.shader->out.semantic_indices[j]) { 1344 attr = j; 1345 break; 1346 } 1347 } 1348 1349 so_mapping[i] = attr; 1350 } 1351 1352 if (!ilo_shader_compile_gs_passthrough(state, variant, 1353 so_mapping, vcc.shader)) { 1354 ilo_shader_destroy_kernel(vcc.shader); 1355 vcc.shader = NULL; 1356 } 1357 } 1358 1359 return vcc.shader; 1360 } 1361