1 /* 2 * Mesa 3-D graphics library 3 * 4 * Copyright (C) 2012-2013 LunarG, Inc. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included 14 * in all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Chia-I Wu <olv (at) lunarg.com> 26 */ 27 28 #include "tgsi/tgsi_dump.h" 29 #include "toy_compiler.h" 30 #include "toy_tgsi.h" 31 #include "toy_legalize.h" 32 #include "toy_optimize.h" 33 #include "toy_helpers.h" 34 #include "ilo_shader_internal.h" 35 36 /* XXX Below is proof-of-concept code. Skip this file! */ 37 38 /* 39 * TODO 40 * - primitive id is in r0.1. FS receives PID as a flat attribute. 41 * - set VUE header m0.1 for layered rendering 42 */ 43 struct gs_compile_context { 44 struct ilo_shader *shader; 45 const struct ilo_shader_variant *variant; 46 const struct pipe_stream_output_info *so_info; 47 48 struct toy_compiler tc; 49 struct toy_tgsi tgsi; 50 int output_map[PIPE_MAX_SHADER_OUTPUTS]; 51 52 bool write_so; 53 bool write_vue; 54 55 int in_vue_size; 56 int in_vue_count; 57 58 int out_vue_size; 59 int out_vue_min_count; 60 61 bool is_static; 62 63 struct { 64 struct toy_src header; 65 struct toy_src svbi; 66 struct toy_src vues[6]; 67 } payload; 68 69 struct { 70 struct toy_dst urb_write_header; 71 bool prim_start; 72 bool prim_end; 73 int prim_type; 74 75 struct toy_dst tmp; 76 77 /* buffered tgsi_outs */ 78 struct toy_dst buffers[3]; 79 int buffer_needed, buffer_cur; 80 81 struct toy_dst so_written; 82 struct toy_dst so_index; 83 84 struct toy_src tgsi_outs[PIPE_MAX_SHADER_OUTPUTS]; 85 } vars; 86 87 struct { 88 struct toy_dst total_vertices; 89 struct toy_dst total_prims; 90 91 struct toy_dst num_vertices; 92 struct toy_dst num_vertices_in_prim; 93 } dynamic_data; 94 95 struct { 96 int total_vertices; 97 int total_prims; 98 /* this limits the max vertice count to be 256 */ 99 uint32_t last_vertex[8]; 100 101 int num_vertices; 102 int num_vertices_in_prim; 103 } static_data; 104 105 int first_free_grf; 106 int last_free_grf; 107 int first_free_mrf; 108 int last_free_mrf; 109 }; 110 111 static void 112 gs_COPY8(struct toy_compiler *tc, struct toy_dst dst, struct toy_src src) 113 { 114 struct toy_inst *inst; 115 116 inst = tc_MOV(tc, dst, src); 117 inst->exec_size = GEN6_EXECSIZE_8; 118 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; 119 } 120 121 static void 122 gs_COPY4(struct toy_compiler *tc, 123 struct toy_dst dst, int dst_ch, 124 struct toy_src src, int src_ch) 125 { 126 struct toy_inst *inst; 127 128 inst = tc_MOV(tc, 129 tdst_offset(dst, 0, dst_ch), 130 tsrc_offset(src, 0, src_ch)); 131 inst->exec_size = GEN6_EXECSIZE_4; 132 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; 133 } 134 135 static void 136 gs_COPY1(struct toy_compiler *tc, 137 struct toy_dst dst, int dst_ch, 138 struct toy_src src, int src_ch) 139 { 140 struct toy_inst *inst; 141 142 inst = tc_MOV(tc, 143 tdst_offset(dst, 0, dst_ch), 144 tsrc_rect(tsrc_offset(src, 0, src_ch), TOY_RECT_010)); 145 inst->exec_size = GEN6_EXECSIZE_1; 146 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; 147 } 148 149 static void 150 gs_init_vars(struct gs_compile_context *gcc) 151 { 152 struct toy_compiler *tc = &gcc->tc; 153 struct toy_dst dst; 154 155 /* init URB_WRITE header */ 156 dst = gcc->vars.urb_write_header; 157 158 gs_COPY8(tc, dst, gcc->payload.header); 159 160 gcc->vars.prim_start = true; 161 gcc->vars.prim_end = false; 162 switch (gcc->out_vue_min_count) { 163 case 1: 164 gcc->vars.prim_type = GEN6_3DPRIM_POINTLIST; 165 break; 166 case 2: 167 gcc->vars.prim_type = GEN6_3DPRIM_LINESTRIP; 168 break; 169 case 3: 170 gcc->vars.prim_type = GEN6_3DPRIM_TRISTRIP; 171 break; 172 } 173 174 if (gcc->write_so) 175 tc_MOV(tc, gcc->vars.so_written, tsrc_imm_d(0)); 176 } 177 178 static void 179 gs_save_output(struct gs_compile_context *gcc, const struct toy_src *outs) 180 { 181 struct toy_compiler *tc = &gcc->tc; 182 const struct toy_dst buf = gcc->vars.buffers[gcc->vars.buffer_cur]; 183 int i; 184 185 for (i = 0; i < gcc->shader->out.count; i++) 186 tc_MOV(tc, tdst_offset(buf, i, 0), outs[i]); 187 188 /* advance the cursor */ 189 gcc->vars.buffer_cur++; 190 gcc->vars.buffer_cur %= gcc->vars.buffer_needed; 191 } 192 193 static void 194 gs_write_so(struct gs_compile_context *gcc, 195 struct toy_dst dst, 196 struct toy_src index, struct toy_src out, 197 bool send_write_commit_message, 198 int binding_table_index) 199 { 200 struct toy_compiler *tc = &gcc->tc; 201 struct toy_dst mrf_header; 202 struct toy_src desc; 203 204 mrf_header = tdst_d(tdst(TOY_FILE_MRF, gcc->first_free_mrf, 0)); 205 206 /* m0.5: destination index */ 207 gs_COPY1(tc, mrf_header, 5, index, 0); 208 209 /* m0.0 - m0.3: RGBA */ 210 gs_COPY4(tc, mrf_header, 0, tsrc_type(out, mrf_header.type), 0); 211 212 desc = tsrc_imm_mdesc_data_port(tc, false, 213 1, send_write_commit_message, 214 true, send_write_commit_message, 215 GEN6_MSG_DP_SVB_WRITE, 0, 216 binding_table_index); 217 218 tc_SEND(tc, dst, tsrc_from(mrf_header), desc, 219 GEN6_SFID_DP_RC); 220 } 221 222 static void 223 gs_write_vue(struct gs_compile_context *gcc, 224 struct toy_dst dst, struct toy_src msg_header, 225 const struct toy_src *outs, int num_outs, 226 bool eot) 227 { 228 struct toy_compiler *tc = &gcc->tc; 229 struct toy_dst mrf_header; 230 struct toy_src desc; 231 int sent = 0; 232 233 mrf_header = tdst_d(tdst(TOY_FILE_MRF, gcc->first_free_mrf, 0)); 234 gs_COPY8(tc, mrf_header, msg_header); 235 236 while (sent < num_outs) { 237 int mrf = gcc->first_free_mrf + 1; 238 const int mrf_avail = gcc->last_free_mrf - mrf + 1; 239 int msg_len, num_entries, i; 240 bool complete; 241 242 num_entries = (num_outs - sent + 1) / 2; 243 complete = true; 244 if (num_entries > mrf_avail) { 245 num_entries = mrf_avail; 246 complete = false; 247 } 248 249 for (i = 0; i < num_entries; i++) { 250 gs_COPY4(tc, tdst(TOY_FILE_MRF, mrf + i / 2, 0), 0, 251 outs[sent + 2 * i], 0); 252 if (sent + i * 2 + 1 < gcc->shader->out.count) { 253 gs_COPY4(tc, tdst(TOY_FILE_MRF, mrf + i / 2, 0), 4, 254 outs[sent + 2 * i + 1], 0); 255 } 256 mrf++; 257 } 258 259 /* do not forget the header */ 260 msg_len = num_entries + 1; 261 262 if (complete) { 263 desc = tsrc_imm_mdesc_urb(tc, 264 eot, msg_len, !eot, true, true, !eot, 265 false, sent, 0); 266 } 267 else { 268 desc = tsrc_imm_mdesc_urb(tc, 269 false, msg_len, 0, false, true, false, 270 false, sent, 0); 271 } 272 273 tc_add2(tc, TOY_OPCODE_URB_WRITE, 274 (complete) ? dst : tdst_null(), tsrc_from(mrf_header), desc); 275 276 sent += num_entries * 2; 277 } 278 } 279 280 static void 281 gs_ff_sync(struct gs_compile_context *gcc, struct toy_dst dst, 282 struct toy_src num_prims) 283 { 284 struct toy_compiler *tc = &gcc->tc; 285 struct toy_dst mrf_header = 286 tdst_d(tdst(TOY_FILE_MRF, gcc->first_free_mrf, 0)); 287 struct toy_src desc; 288 bool allocate; 289 290 gs_COPY8(tc, mrf_header, gcc->payload.header); 291 292 /* set NumSOVertsToWrite and NumSOPrimsNeeded */ 293 if (gcc->write_so) { 294 if (num_prims.file == TOY_FILE_IMM) { 295 const uint32_t v = 296 (num_prims.val32 * gcc->in_vue_count) << 16 | num_prims.val32; 297 298 gs_COPY1(tc, mrf_header, 0, tsrc_imm_d(v), 0); 299 } 300 else { 301 struct toy_dst m0_0 = tdst_d(gcc->vars.tmp); 302 303 tc_MUL(tc, m0_0, num_prims, tsrc_imm_d(gcc->in_vue_count << 16)); 304 tc_OR(tc, m0_0, tsrc_from(m0_0), num_prims); 305 306 gs_COPY1(tc, mrf_header, 0, tsrc_from(m0_0), 0); 307 } 308 } 309 310 /* set NumGSPrimsGenerated */ 311 if (gcc->write_vue) 312 gs_COPY1(tc, mrf_header, 1, num_prims, 0); 313 314 /* 315 * From the Sandy Bridge PRM, volume 2 part 1, page 173: 316 * 317 * "Programming Note: If the GS stage is enabled, software must always 318 * allocate at least one GS URB Entry. This is true even if the GS 319 * thread never needs to output vertices to the pipeline, e.g., when 320 * only performing stream output. This is an artifact of the need to 321 * pass the GS thread an initial destination URB handle." 322 */ 323 allocate = true; 324 desc = tsrc_imm_mdesc_urb(tc, false, 1, 1, 325 false, false, allocate, 326 false, 0, 1); 327 328 tc_SEND(tc, dst, tsrc_from(mrf_header), desc, GEN6_SFID_URB); 329 } 330 331 static void 332 gs_discard(struct gs_compile_context *gcc) 333 { 334 struct toy_compiler *tc = &gcc->tc; 335 struct toy_dst mrf_header; 336 struct toy_src desc; 337 338 mrf_header = tdst_d(tdst(TOY_FILE_MRF, gcc->first_free_mrf, 0)); 339 340 gs_COPY8(tc, mrf_header, tsrc_from(gcc->vars.urb_write_header)); 341 342 desc = tsrc_imm_mdesc_urb(tc, 343 true, 1, 0, true, false, false, 344 false, 0, 0); 345 346 tc_add2(tc, TOY_OPCODE_URB_WRITE, 347 tdst_null(), tsrc_from(mrf_header), desc); 348 } 349 350 static void 351 gs_lower_opcode_endprim(struct gs_compile_context *gcc, struct toy_inst *inst) 352 { 353 /* if has control flow, set PrimEnd on the last vertex and URB_WRITE */ 354 } 355 356 static void 357 gs_lower_opcode_emit_vue_dynamic(struct gs_compile_context *gcc) 358 { 359 /* TODO similar to the static version */ 360 361 /* 362 * When SO is enabled and the inputs are lines or triangles, vertices are 363 * always buffered. we can defer the emission of the current vertex until 364 * the next EMIT or ENDPRIM. Or, we can emit two URB_WRITEs with the later 365 * patching the former. 366 */ 367 } 368 369 static void 370 gs_lower_opcode_emit_so_dynamic(struct gs_compile_context *gcc) 371 { 372 struct toy_compiler *tc = &gcc->tc; 373 374 tc_IF(tc, tdst_null(), 375 tsrc_from(gcc->dynamic_data.num_vertices_in_prim), 376 tsrc_imm_d(gcc->out_vue_min_count), 377 GEN6_COND_GE); 378 379 { 380 tc_ADD(tc, gcc->vars.tmp, tsrc_from(gcc->vars.so_index), tsrc_imm_d(0x03020100)); 381 382 /* TODO same as static version */ 383 } 384 385 tc_ENDIF(tc); 386 387 tc_ADD(tc, gcc->vars.so_index, 388 tsrc_from(gcc->vars.so_index), tsrc_imm_d(gcc->out_vue_min_count)); 389 } 390 391 static void 392 gs_lower_opcode_emit_vue_static(struct gs_compile_context *gcc) 393 { 394 struct toy_compiler *tc = &gcc->tc; 395 struct toy_inst *inst2; 396 bool eot; 397 398 eot = (gcc->static_data.num_vertices == gcc->static_data.total_vertices); 399 400 gcc->vars.prim_end = 401 ((gcc->static_data.last_vertex[(gcc->static_data.num_vertices - 1) / 32] & 402 1 << ((gcc->static_data.num_vertices - 1) % 32)) != 0); 403 404 if (eot && gcc->write_so) { 405 inst2 = tc_OR(tc, tdst_offset(gcc->vars.urb_write_header, 0, 2), 406 tsrc_from(gcc->vars.so_written), 407 tsrc_imm_d(gcc->vars.prim_type << 2 | 408 gcc->vars.prim_start << 1 | 409 gcc->vars.prim_end)); 410 inst2->exec_size = GEN6_EXECSIZE_1; 411 inst2->src[0] = tsrc_rect(inst2->src[0], TOY_RECT_010); 412 inst2->src[1] = tsrc_rect(inst2->src[1], TOY_RECT_010); 413 } 414 else { 415 gs_COPY1(tc, gcc->vars.urb_write_header, 2, 416 tsrc_imm_d(gcc->vars.prim_type << 2 | 417 gcc->vars.prim_start << 1 | 418 gcc->vars.prim_end), 0); 419 } 420 421 gs_write_vue(gcc, tdst_d(gcc->vars.tmp), 422 tsrc_from(gcc->vars.urb_write_header), 423 gcc->vars.tgsi_outs, 424 gcc->shader->out.count, eot); 425 426 if (!eot) { 427 gs_COPY1(tc, gcc->vars.urb_write_header, 0, 428 tsrc_from(tdst_d(gcc->vars.tmp)), 0); 429 } 430 431 gcc->vars.prim_start = gcc->vars.prim_end; 432 gcc->vars.prim_end = false; 433 } 434 435 static void 436 gs_lower_opcode_emit_so_static(struct gs_compile_context *gcc) 437 { 438 struct toy_compiler *tc = &gcc->tc; 439 struct toy_inst *inst; 440 int i, j; 441 442 if (gcc->static_data.num_vertices_in_prim < gcc->out_vue_min_count) 443 return; 444 445 inst = tc_MOV(tc, tdst_w(gcc->vars.tmp), tsrc_imm_v(0x03020100)); 446 inst->exec_size = GEN6_EXECSIZE_8; 447 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; 448 449 tc_ADD(tc, tdst_d(gcc->vars.tmp), tsrc_from(tdst_d(gcc->vars.tmp)), 450 tsrc_rect(tsrc_from(gcc->vars.so_index), TOY_RECT_010)); 451 452 tc_IF(tc, tdst_null(), 453 tsrc_rect(tsrc_offset(tsrc_from(tdst_d(gcc->vars.tmp)), 0, gcc->out_vue_min_count - 1), TOY_RECT_010), 454 tsrc_rect(tsrc_offset(gcc->payload.svbi, 0, 4), TOY_RECT_010), 455 GEN6_COND_LE); 456 { 457 for (i = 0; i < gcc->out_vue_min_count; i++) { 458 for (j = 0; j < gcc->so_info->num_outputs; j++) { 459 const int idx = gcc->so_info->output[j].register_index; 460 struct toy_src index, out; 461 int binding_table_index; 462 bool write_commit; 463 464 index = tsrc_d(tsrc_offset(tsrc_from(gcc->vars.tmp), 0, i)); 465 466 if (i == gcc->out_vue_min_count - 1) { 467 out = gcc->vars.tgsi_outs[idx]; 468 } 469 else { 470 /* gcc->vars.buffer_cur also points to the first vertex */ 471 const int buf = 472 (gcc->vars.buffer_cur + i) % gcc->vars.buffer_needed; 473 474 out = tsrc_offset(tsrc_from(gcc->vars.buffers[buf]), idx, 0); 475 } 476 477 out = tsrc_offset(out, 0, gcc->so_info->output[j].start_component); 478 479 /* 480 * From the Sandy Bridge PRM, volume 4 part 2, page 19: 481 * 482 * "The Kernel must do a write commit on the last write to DAP 483 * prior to a URB_WRITE with End of Thread." 484 */ 485 write_commit = 486 (gcc->static_data.num_vertices == gcc->static_data.total_vertices && 487 i == gcc->out_vue_min_count - 1 && 488 j == gcc->so_info->num_outputs - 1); 489 490 491 binding_table_index = gcc->shader->bt.gen6_so_base + j; 492 493 gs_write_so(gcc, gcc->vars.tmp, index, 494 out, write_commit, binding_table_index); 495 496 /* 497 * From the Sandy Bridge PRM, volume 4 part 1, page 168: 498 * 499 * "The write commit does not modify the destination register, but 500 * merely clears the dependency associated with the destination 501 * register. Thus, a simple "mov" instruction using the register as a 502 * source is sufficient to wait for the write commit to occur." 503 */ 504 if (write_commit) 505 tc_MOV(tc, gcc->vars.tmp, tsrc_from(gcc->vars.tmp)); 506 } 507 } 508 509 /* SONumPrimsWritten occupies the higher word of m0.2 of URB_WRITE */ 510 tc_ADD(tc, gcc->vars.so_written, 511 tsrc_from(gcc->vars.so_written), tsrc_imm_d(1 << 16)); 512 tc_ADD(tc, gcc->vars.so_index, 513 tsrc_from(gcc->vars.so_index), tsrc_imm_d(gcc->out_vue_min_count)); 514 } 515 tc_ENDIF(tc); 516 } 517 518 static void 519 gs_lower_opcode_emit_static(struct gs_compile_context *gcc, 520 struct toy_inst *inst) 521 { 522 gcc->static_data.num_vertices++; 523 gcc->static_data.num_vertices_in_prim++; 524 525 if (gcc->write_so) { 526 gs_lower_opcode_emit_so_static(gcc); 527 528 if (gcc->out_vue_min_count > 1 && 529 gcc->static_data.num_vertices != gcc->static_data.total_vertices) 530 gs_save_output(gcc, gcc->vars.tgsi_outs); 531 } 532 533 if (gcc->write_vue) 534 gs_lower_opcode_emit_vue_static(gcc); 535 } 536 537 static void 538 gs_lower_opcode_emit_dynamic(struct gs_compile_context *gcc, 539 struct toy_inst *inst) 540 { 541 struct toy_compiler *tc = &gcc->tc; 542 543 tc_ADD(tc, gcc->dynamic_data.num_vertices, 544 tsrc_from(gcc->dynamic_data.num_vertices), tsrc_imm_d(1)); 545 tc_ADD(tc, gcc->dynamic_data.num_vertices_in_prim, 546 tsrc_from(gcc->dynamic_data.num_vertices_in_prim), tsrc_imm_d(1)); 547 548 if (gcc->write_so) { 549 gs_lower_opcode_emit_so_dynamic(gcc); 550 551 if (gcc->out_vue_min_count > 1) 552 gs_save_output(gcc, gcc->vars.tgsi_outs); 553 } 554 555 if (gcc->write_vue) 556 gs_lower_opcode_emit_vue_dynamic(gcc); 557 } 558 559 static void 560 gs_lower_opcode_emit(struct gs_compile_context *gcc, struct toy_inst *inst) 561 { 562 if (gcc->is_static) 563 gs_lower_opcode_emit_static(gcc, inst); 564 else 565 gs_lower_opcode_emit_dynamic(gcc, inst); 566 } 567 568 static void 569 gs_lower_opcode_tgsi_in(struct gs_compile_context *gcc, 570 struct toy_dst dst, int dim, int idx) 571 { 572 struct toy_compiler *tc = &gcc->tc; 573 struct toy_src attr; 574 int slot, reg = -1, subreg; 575 576 slot = toy_tgsi_find_input(&gcc->tgsi, idx); 577 if (slot >= 0) { 578 int i; 579 580 for (i = 0; i < gcc->variant->u.gs.num_inputs; i++) { 581 if (gcc->variant->u.gs.semantic_names[i] == 582 gcc->tgsi.inputs[slot].semantic_name && 583 gcc->variant->u.gs.semantic_indices[i] == 584 gcc->tgsi.inputs[slot].semantic_index) { 585 reg = i / 2; 586 subreg = (i % 2) * 4; 587 break; 588 } 589 } 590 } 591 592 if (reg < 0) { 593 tc_MOV(tc, dst, tsrc_imm_f(0.0f)); 594 return; 595 } 596 597 /* fix vertex ordering for GEN6_3DPRIM_TRISTRIP_REVERSE */ 598 if (gcc->in_vue_count == 3 && dim < 2) { 599 struct toy_inst *inst; 600 601 /* get PrimType */ 602 inst = tc_AND(tc, tdst_d(gcc->vars.tmp), 603 tsrc_offset(gcc->payload.header, 0, 2), tsrc_imm_d(0x1f)); 604 inst->exec_size = GEN6_EXECSIZE_1; 605 inst->src[0] = tsrc_rect(inst->src[0], TOY_RECT_010); 606 inst->src[1] = tsrc_rect(inst->src[1], TOY_RECT_010); 607 608 inst = tc_CMP(tc, tdst_null(), tsrc_from(tdst_d(gcc->vars.tmp)), 609 tsrc_imm_d(GEN6_3DPRIM_TRISTRIP_REVERSE), GEN6_COND_NZ); 610 inst->src[0] = tsrc_rect(inst->src[0], TOY_RECT_010); 611 612 attr = tsrc_offset(gcc->payload.vues[dim], reg, subreg); 613 inst = tc_MOV(tc, dst, attr); 614 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL; 615 616 /* swap IN[0] and IN[1] for GEN6_3DPRIM_TRISTRIP_REVERSE */ 617 dim = !dim; 618 619 attr = tsrc_offset(gcc->payload.vues[dim], reg, subreg); 620 inst = tc_MOV(tc, dst, attr); 621 inst->pred_ctrl = GEN6_PREDCTRL_NORMAL; 622 inst->pred_inv = true; 623 } 624 else { 625 attr = tsrc_offset(gcc->payload.vues[dim], reg, subreg); 626 tc_MOV(tc, dst, attr); 627 } 628 629 630 } 631 632 static void 633 gs_lower_opcode_tgsi_imm(struct gs_compile_context *gcc, 634 struct toy_dst dst, int idx) 635 { 636 const uint32_t *imm; 637 int ch; 638 639 imm = toy_tgsi_get_imm(&gcc->tgsi, idx, NULL); 640 641 for (ch = 0; ch < 4; ch++) { 642 struct toy_inst *inst; 643 644 /* raw moves */ 645 inst = tc_MOV(&gcc->tc, 646 tdst_writemask(tdst_ud(dst), 1 << ch), 647 tsrc_imm_ud(imm[ch])); 648 inst->access_mode = GEN6_ALIGN_16; 649 } 650 } 651 652 static void 653 gs_lower_opcode_tgsi_direct(struct gs_compile_context *gcc, 654 struct toy_inst *inst) 655 { 656 struct toy_compiler *tc = &gcc->tc; 657 int dim, idx; 658 659 assert(inst->src[0].file == TOY_FILE_IMM); 660 dim = inst->src[0].val32; 661 662 assert(inst->src[1].file == TOY_FILE_IMM); 663 idx = inst->src[1].val32; 664 665 switch (inst->opcode) { 666 case TOY_OPCODE_TGSI_IN: 667 gs_lower_opcode_tgsi_in(gcc, inst->dst, dim, idx); 668 /* fetch all dimensions */ 669 if (dim == 0) { 670 int i; 671 672 for (i = 1; i < gcc->in_vue_count; i++) { 673 const int vrf = toy_tgsi_get_vrf(&gcc->tgsi, TGSI_FILE_INPUT, i, idx); 674 struct toy_dst dst; 675 676 if (vrf < 0) 677 continue; 678 679 dst = tdst(TOY_FILE_VRF, vrf, 0); 680 gs_lower_opcode_tgsi_in(gcc, dst, i, idx); 681 } 682 } 683 break; 684 case TOY_OPCODE_TGSI_IMM: 685 assert(!dim); 686 gs_lower_opcode_tgsi_imm(gcc, inst->dst, idx); 687 break; 688 case TOY_OPCODE_TGSI_CONST: 689 case TOY_OPCODE_TGSI_SV: 690 default: 691 tc_fail(tc, "unhandled TGSI fetch"); 692 break; 693 } 694 695 tc_discard_inst(tc, inst); 696 } 697 698 static void 699 gs_lower_virtual_opcodes(struct gs_compile_context *gcc) 700 { 701 struct toy_compiler *tc = &gcc->tc; 702 struct toy_inst *inst; 703 704 tc_head(tc); 705 while ((inst = tc_next(tc)) != NULL) { 706 switch (inst->opcode) { 707 case TOY_OPCODE_TGSI_IN: 708 case TOY_OPCODE_TGSI_CONST: 709 case TOY_OPCODE_TGSI_SV: 710 case TOY_OPCODE_TGSI_IMM: 711 gs_lower_opcode_tgsi_direct(gcc, inst); 712 break; 713 case TOY_OPCODE_TGSI_INDIRECT_FETCH: 714 case TOY_OPCODE_TGSI_INDIRECT_STORE: 715 /* TODO similar to VS */ 716 tc_fail(tc, "no indirection support"); 717 tc_discard_inst(tc, inst); 718 break; 719 case TOY_OPCODE_TGSI_TEX: 720 case TOY_OPCODE_TGSI_TXB: 721 case TOY_OPCODE_TGSI_TXD: 722 case TOY_OPCODE_TGSI_TXL: 723 case TOY_OPCODE_TGSI_TXP: 724 case TOY_OPCODE_TGSI_TXF: 725 case TOY_OPCODE_TGSI_TXQ: 726 case TOY_OPCODE_TGSI_TXQ_LZ: 727 case TOY_OPCODE_TGSI_TEX2: 728 case TOY_OPCODE_TGSI_TXB2: 729 case TOY_OPCODE_TGSI_TXL2: 730 case TOY_OPCODE_TGSI_SAMPLE: 731 case TOY_OPCODE_TGSI_SAMPLE_I: 732 case TOY_OPCODE_TGSI_SAMPLE_I_MS: 733 case TOY_OPCODE_TGSI_SAMPLE_B: 734 case TOY_OPCODE_TGSI_SAMPLE_C: 735 case TOY_OPCODE_TGSI_SAMPLE_C_LZ: 736 case TOY_OPCODE_TGSI_SAMPLE_D: 737 case TOY_OPCODE_TGSI_SAMPLE_L: 738 case TOY_OPCODE_TGSI_GATHER4: 739 case TOY_OPCODE_TGSI_SVIEWINFO: 740 case TOY_OPCODE_TGSI_SAMPLE_POS: 741 case TOY_OPCODE_TGSI_SAMPLE_INFO: 742 /* TODO similar to VS */ 743 tc_fail(tc, "no sampling support"); 744 tc_discard_inst(tc, inst); 745 break; 746 case TOY_OPCODE_EMIT: 747 gs_lower_opcode_emit(gcc, inst); 748 tc_discard_inst(tc, inst); 749 break; 750 case TOY_OPCODE_ENDPRIM: 751 gs_lower_opcode_endprim(gcc, inst); 752 tc_discard_inst(tc, inst); 753 break; 754 default: 755 break; 756 } 757 } 758 759 tc_head(tc); 760 while ((inst = tc_next(tc)) != NULL) { 761 switch (inst->opcode) { 762 case TOY_OPCODE_INV: 763 case TOY_OPCODE_LOG: 764 case TOY_OPCODE_EXP: 765 case TOY_OPCODE_SQRT: 766 case TOY_OPCODE_RSQ: 767 case TOY_OPCODE_SIN: 768 case TOY_OPCODE_COS: 769 case TOY_OPCODE_FDIV: 770 case TOY_OPCODE_POW: 771 case TOY_OPCODE_INT_DIV_QUOTIENT: 772 case TOY_OPCODE_INT_DIV_REMAINDER: 773 toy_compiler_lower_math(tc, inst); 774 break; 775 case TOY_OPCODE_URB_WRITE: 776 toy_compiler_lower_to_send(tc, inst, false, GEN6_SFID_URB); 777 break; 778 default: 779 if (inst->opcode > 127) 780 tc_fail(tc, "unhandled virtual opcode"); 781 break; 782 } 783 } 784 } 785 786 /** 787 * Get the number of (tessellated) primitives generated by this shader. 788 * Return false if that is unknown until runtime. 789 */ 790 static void 791 get_num_prims_static(struct gs_compile_context *gcc) 792 { 793 struct toy_compiler *tc = &gcc->tc; 794 const struct toy_inst *inst; 795 int num_vertices_in_prim = 0, if_depth = 0, do_depth = 0; 796 bool is_static = true; 797 798 tc_head(tc); 799 while ((inst = tc_next_no_skip(tc)) != NULL) { 800 switch (inst->opcode) { 801 case GEN6_OPCODE_IF: 802 if_depth++; 803 break; 804 case GEN6_OPCODE_ENDIF: 805 if_depth--; 806 break; 807 case TOY_OPCODE_DO: 808 do_depth++; 809 break; 810 case GEN6_OPCODE_WHILE: 811 do_depth--; 812 break; 813 case TOY_OPCODE_EMIT: 814 if (if_depth || do_depth) { 815 is_static = false; 816 } 817 else { 818 gcc->static_data.total_vertices++; 819 820 num_vertices_in_prim++; 821 if (num_vertices_in_prim >= gcc->out_vue_min_count) 822 gcc->static_data.total_prims++; 823 } 824 break; 825 case TOY_OPCODE_ENDPRIM: 826 if (if_depth || do_depth) { 827 is_static = false; 828 } 829 else { 830 const int vertidx = gcc->static_data.total_vertices - 1; 831 const int idx = vertidx / 32; 832 const int subidx = vertidx % 32; 833 834 gcc->static_data.last_vertex[idx] |= 1 << subidx; 835 num_vertices_in_prim = 0; 836 } 837 break; 838 default: 839 break; 840 } 841 842 if (!is_static) 843 break; 844 } 845 846 gcc->is_static = is_static; 847 } 848 849 /** 850 * Compile the shader. 851 */ 852 static bool 853 gs_compile(struct gs_compile_context *gcc) 854 { 855 struct toy_compiler *tc = &gcc->tc; 856 struct ilo_shader *sh = gcc->shader; 857 858 get_num_prims_static(gcc); 859 860 if (gcc->is_static) { 861 tc_head(tc); 862 863 gs_init_vars(gcc); 864 gs_ff_sync(gcc, tdst_d(gcc->vars.tmp), tsrc_imm_d(gcc->static_data.total_prims)); 865 gs_COPY1(tc, gcc->vars.urb_write_header, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 0); 866 if (gcc->write_so) 867 gs_COPY4(tc, gcc->vars.so_index, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 1); 868 869 tc_tail(tc); 870 } 871 else { 872 tc_fail(tc, "no control flow support"); 873 return false; 874 } 875 876 if (!gcc->write_vue) 877 gs_discard(gcc); 878 879 gs_lower_virtual_opcodes(gcc); 880 toy_compiler_legalize_for_ra(tc); 881 toy_compiler_optimize(tc); 882 toy_compiler_allocate_registers(tc, 883 gcc->first_free_grf, 884 gcc->last_free_grf, 885 1); 886 toy_compiler_legalize_for_asm(tc); 887 888 if (tc->fail) { 889 ilo_err("failed to legalize GS instructions: %s\n", tc->reason); 890 return false; 891 } 892 893 if (ilo_debug & ILO_DEBUG_GS) { 894 ilo_printf("legalized instructions:\n"); 895 toy_compiler_dump(tc); 896 ilo_printf("\n"); 897 } 898 899 sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size); 900 if (!sh->kernel) 901 return false; 902 903 if (ilo_debug & ILO_DEBUG_GS) { 904 ilo_printf("disassembly:\n"); 905 toy_compiler_disassemble(tc->dev, sh->kernel, sh->kernel_size, false); 906 ilo_printf("\n"); 907 } 908 909 return true; 910 } 911 912 static bool 913 gs_compile_passthrough(struct gs_compile_context *gcc) 914 { 915 struct toy_compiler *tc = &gcc->tc; 916 struct ilo_shader *sh = gcc->shader; 917 918 gcc->is_static = true; 919 gcc->static_data.total_vertices = gcc->in_vue_count; 920 gcc->static_data.total_prims = 1; 921 gcc->static_data.last_vertex[0] = 1 << (gcc->in_vue_count - 1); 922 923 gs_init_vars(gcc); 924 gs_ff_sync(gcc, tdst_d(gcc->vars.tmp), tsrc_imm_d(gcc->static_data.total_prims)); 925 gs_COPY1(tc, gcc->vars.urb_write_header, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 0); 926 if (gcc->write_so) 927 gs_COPY4(tc, gcc->vars.so_index, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 1); 928 929 { 930 int vert, attr; 931 932 for (vert = 0; vert < gcc->out_vue_min_count; vert++) { 933 for (attr = 0; attr < gcc->shader->out.count; attr++) { 934 tc_MOV(tc, tdst_from(gcc->vars.tgsi_outs[attr]), 935 tsrc_offset(gcc->payload.vues[vert], attr / 2, (attr % 2) * 4)); 936 } 937 938 gs_lower_opcode_emit(gcc, NULL); 939 } 940 941 gs_lower_opcode_endprim(gcc, NULL); 942 } 943 944 if (!gcc->write_vue) 945 gs_discard(gcc); 946 947 gs_lower_virtual_opcodes(gcc); 948 949 toy_compiler_legalize_for_ra(tc); 950 toy_compiler_optimize(tc); 951 toy_compiler_allocate_registers(tc, 952 gcc->first_free_grf, 953 gcc->last_free_grf, 954 1); 955 956 toy_compiler_legalize_for_asm(tc); 957 958 if (tc->fail) { 959 ilo_err("failed to translate GS TGSI tokens: %s\n", tc->reason); 960 return false; 961 } 962 963 if (ilo_debug & ILO_DEBUG_GS) { 964 int i; 965 966 ilo_printf("VUE count %d, VUE size %d\n", 967 gcc->in_vue_count, gcc->in_vue_size); 968 ilo_printf("%srasterizer discard\n", 969 (gcc->variant->u.gs.rasterizer_discard) ? "" : "no "); 970 971 for (i = 0; i < gcc->so_info->num_outputs; i++) { 972 ilo_printf("SO[%d] = OUT[%d]\n", i, 973 gcc->so_info->output[i].register_index); 974 } 975 976 ilo_printf("legalized instructions:\n"); 977 toy_compiler_dump(tc); 978 ilo_printf("\n"); 979 } 980 981 sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size); 982 if (!sh->kernel) { 983 ilo_err("failed to compile GS: %s\n", tc->reason); 984 return false; 985 } 986 987 if (ilo_debug & ILO_DEBUG_GS) { 988 ilo_printf("disassembly:\n"); 989 toy_compiler_disassemble(tc->dev, sh->kernel, sh->kernel_size, false); 990 ilo_printf("\n"); 991 } 992 993 return true; 994 } 995 996 /** 997 * Translate the TGSI tokens. 998 */ 999 static bool 1000 gs_setup_tgsi(struct toy_compiler *tc, const struct tgsi_token *tokens, 1001 struct toy_tgsi *tgsi) 1002 { 1003 if (ilo_debug & ILO_DEBUG_GS) { 1004 ilo_printf("dumping geometry shader\n"); 1005 ilo_printf("\n"); 1006 1007 tgsi_dump(tokens, 0); 1008 ilo_printf("\n"); 1009 } 1010 1011 toy_compiler_translate_tgsi(tc, tokens, true, tgsi); 1012 if (tc->fail) 1013 return false; 1014 1015 if (ilo_debug & ILO_DEBUG_GS) { 1016 ilo_printf("TGSI translator:\n"); 1017 toy_tgsi_dump(tgsi); 1018 ilo_printf("\n"); 1019 toy_compiler_dump(tc); 1020 ilo_printf("\n"); 1021 } 1022 1023 return true; 1024 } 1025 1026 /** 1027 * Set up shader inputs for fixed-function units. 1028 */ 1029 static void 1030 gs_setup_shader_in(struct ilo_shader *sh, 1031 const struct ilo_shader_variant *variant) 1032 { 1033 int i; 1034 1035 for (i = 0; i < variant->u.gs.num_inputs; i++) { 1036 sh->in.semantic_names[i] = variant->u.gs.semantic_names[i]; 1037 sh->in.semantic_indices[i] = variant->u.gs.semantic_indices[i]; 1038 sh->in.interp[i] = TGSI_INTERPOLATE_CONSTANT; 1039 sh->in.centroid[i] = false; 1040 } 1041 1042 sh->in.count = variant->u.gs.num_inputs; 1043 1044 sh->in.has_pos = false; 1045 sh->in.has_linear_interp = false; 1046 sh->in.barycentric_interpolation_mode = 0; 1047 } 1048 1049 /** 1050 * Set up shader outputs for fixed-function units. 1051 * 1052 * XXX share the code with VS 1053 */ 1054 static void 1055 gs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi, 1056 bool output_clipdist, int *output_map) 1057 { 1058 int psize_slot = -1, pos_slot = -1; 1059 int clipdist_slot[2] = { -1, -1 }; 1060 int color_slot[4] = { -1, -1, -1, -1 }; 1061 int num_outs, i; 1062 1063 /* find out the slots of outputs that need special care */ 1064 for (i = 0; i < tgsi->num_outputs; i++) { 1065 switch (tgsi->outputs[i].semantic_name) { 1066 case TGSI_SEMANTIC_PSIZE: 1067 psize_slot = i; 1068 break; 1069 case TGSI_SEMANTIC_POSITION: 1070 pos_slot = i; 1071 break; 1072 case TGSI_SEMANTIC_CLIPDIST: 1073 if (tgsi->outputs[i].semantic_index) 1074 clipdist_slot[1] = i; 1075 else 1076 clipdist_slot[0] = i; 1077 break; 1078 case TGSI_SEMANTIC_COLOR: 1079 if (tgsi->outputs[i].semantic_index) 1080 color_slot[2] = i; 1081 else 1082 color_slot[0] = i; 1083 break; 1084 case TGSI_SEMANTIC_BCOLOR: 1085 if (tgsi->outputs[i].semantic_index) 1086 color_slot[3] = i; 1087 else 1088 color_slot[1] = i; 1089 break; 1090 default: 1091 break; 1092 } 1093 } 1094 1095 /* the first two VUEs are always PSIZE and POSITION */ 1096 num_outs = 2; 1097 output_map[0] = psize_slot; 1098 output_map[1] = pos_slot; 1099 1100 sh->out.register_indices[0] = 1101 (psize_slot >= 0) ? tgsi->outputs[psize_slot].index : -1; 1102 sh->out.semantic_names[0] = TGSI_SEMANTIC_PSIZE; 1103 sh->out.semantic_indices[0] = 0; 1104 1105 sh->out.register_indices[1] = 1106 (pos_slot >= 0) ? tgsi->outputs[pos_slot].index : -1; 1107 sh->out.semantic_names[1] = TGSI_SEMANTIC_POSITION; 1108 sh->out.semantic_indices[1] = 0; 1109 1110 sh->out.has_pos = true; 1111 1112 /* followed by optional clip distances */ 1113 if (output_clipdist) { 1114 sh->out.register_indices[num_outs] = 1115 (clipdist_slot[0] >= 0) ? tgsi->outputs[clipdist_slot[0]].index : -1; 1116 sh->out.semantic_names[num_outs] = TGSI_SEMANTIC_CLIPDIST; 1117 sh->out.semantic_indices[num_outs] = 0; 1118 output_map[num_outs++] = clipdist_slot[0]; 1119 1120 sh->out.register_indices[num_outs] = 1121 (clipdist_slot[1] >= 0) ? tgsi->outputs[clipdist_slot[1]].index : -1; 1122 sh->out.semantic_names[num_outs] = TGSI_SEMANTIC_CLIPDIST; 1123 sh->out.semantic_indices[num_outs] = 1; 1124 output_map[num_outs++] = clipdist_slot[1]; 1125 } 1126 1127 /* 1128 * make BCOLOR follow COLOR so that we can make use of 1129 * ATTRIBUTE_SWIZZLE_INPUTATTR_FACING in 3DSTATE_SF 1130 */ 1131 for (i = 0; i < 4; i++) { 1132 const int slot = color_slot[i]; 1133 1134 if (slot < 0) 1135 continue; 1136 1137 sh->out.register_indices[num_outs] = tgsi->outputs[slot].index; 1138 sh->out.semantic_names[num_outs] = tgsi->outputs[slot].semantic_name; 1139 sh->out.semantic_indices[num_outs] = tgsi->outputs[slot].semantic_index; 1140 1141 output_map[num_outs++] = slot; 1142 } 1143 1144 /* add the rest of the outputs */ 1145 for (i = 0; i < tgsi->num_outputs; i++) { 1146 switch (tgsi->outputs[i].semantic_name) { 1147 case TGSI_SEMANTIC_PSIZE: 1148 case TGSI_SEMANTIC_POSITION: 1149 case TGSI_SEMANTIC_CLIPDIST: 1150 case TGSI_SEMANTIC_COLOR: 1151 case TGSI_SEMANTIC_BCOLOR: 1152 break; 1153 default: 1154 sh->out.register_indices[num_outs] = tgsi->outputs[i].index; 1155 sh->out.semantic_names[num_outs] = tgsi->outputs[i].semantic_name; 1156 sh->out.semantic_indices[num_outs] = tgsi->outputs[i].semantic_index; 1157 output_map[num_outs++] = i; 1158 break; 1159 } 1160 } 1161 1162 sh->out.count = num_outs; 1163 } 1164 1165 static void 1166 gs_setup_vars(struct gs_compile_context *gcc) 1167 { 1168 int grf = gcc->first_free_grf; 1169 int i; 1170 1171 gcc->vars.urb_write_header = tdst_d(tdst(TOY_FILE_GRF, grf, 0)); 1172 grf++; 1173 1174 gcc->vars.tmp = tdst(TOY_FILE_GRF, grf, 0); 1175 grf++; 1176 1177 if (gcc->write_so) { 1178 gcc->vars.buffer_needed = gcc->out_vue_min_count - 1; 1179 for (i = 0; i < gcc->vars.buffer_needed; i++) { 1180 gcc->vars.buffers[i] = tdst(TOY_FILE_GRF, grf, 0); 1181 grf += gcc->shader->out.count; 1182 } 1183 1184 gcc->vars.so_written = tdst_d(tdst(TOY_FILE_GRF, grf, 0)); 1185 grf++; 1186 1187 gcc->vars.so_index = tdst_d(tdst(TOY_FILE_GRF, grf, 0)); 1188 grf++; 1189 } 1190 1191 gcc->first_free_grf = grf; 1192 1193 if (!gcc->tgsi.reg_mapping) { 1194 for (i = 0; i < gcc->shader->out.count; i++) 1195 gcc->vars.tgsi_outs[i] = tsrc(TOY_FILE_GRF, grf++, 0); 1196 1197 gcc->first_free_grf = grf; 1198 return; 1199 } 1200 1201 for (i = 0; i < gcc->shader->out.count; i++) { 1202 const int slot = gcc->output_map[i]; 1203 const int vrf = (slot >= 0) ? toy_tgsi_get_vrf(&gcc->tgsi, 1204 TGSI_FILE_OUTPUT, 0, gcc->tgsi.outputs[slot].index) : -1; 1205 1206 if (vrf >= 0) 1207 gcc->vars.tgsi_outs[i] = tsrc(TOY_FILE_VRF, vrf, 0); 1208 else 1209 gcc->vars.tgsi_outs[i] = (i == 0) ? tsrc_imm_d(0) : tsrc_imm_f(0.0f); 1210 } 1211 } 1212 1213 static void 1214 gs_setup_payload(struct gs_compile_context *gcc) 1215 { 1216 int grf, i; 1217 1218 grf = 0; 1219 1220 /* r0: payload header */ 1221 gcc->payload.header = tsrc_d(tsrc(TOY_FILE_GRF, grf, 0)); 1222 grf++; 1223 1224 /* r1: SVBI */ 1225 if (gcc->write_so) { 1226 gcc->payload.svbi = tsrc_ud(tsrc(TOY_FILE_GRF, grf, 0)); 1227 grf++; 1228 } 1229 1230 /* URB data */ 1231 gcc->shader->in.start_grf = grf; 1232 1233 /* no pull constants */ 1234 1235 /* VUEs */ 1236 for (i = 0; i < gcc->in_vue_count; i++) { 1237 gcc->payload.vues[i] = tsrc(TOY_FILE_GRF, grf, 0); 1238 grf += gcc->in_vue_size; 1239 } 1240 1241 gcc->first_free_grf = grf; 1242 gcc->last_free_grf = 127; 1243 } 1244 1245 /** 1246 * Set up GS compile context. This includes translating the TGSI tokens. 1247 */ 1248 static bool 1249 gs_setup(struct gs_compile_context *gcc, 1250 const struct ilo_shader_state *state, 1251 const struct ilo_shader_variant *variant, 1252 int num_verts) 1253 { 1254 memset(gcc, 0, sizeof(*gcc)); 1255 1256 gcc->shader = CALLOC_STRUCT(ilo_shader); 1257 if (!gcc->shader) 1258 return false; 1259 1260 gcc->variant = variant; 1261 gcc->so_info = &state->info.stream_output; 1262 1263 toy_compiler_init(&gcc->tc, state->info.dev); 1264 1265 gcc->write_so = (state->info.stream_output.num_outputs > 0); 1266 gcc->write_vue = !gcc->variant->u.gs.rasterizer_discard; 1267 1268 gcc->tc.templ.access_mode = GEN6_ALIGN_16; 1269 gcc->tc.templ.exec_size = GEN6_EXECSIZE_4; 1270 gcc->tc.rect_linear_width = 4; 1271 1272 if (state->info.tokens) { 1273 if (!gs_setup_tgsi(&gcc->tc, state->info.tokens, &gcc->tgsi)) { 1274 toy_compiler_cleanup(&gcc->tc); 1275 FREE(gcc->shader); 1276 return false; 1277 } 1278 1279 switch (gcc->tgsi.props.gs_input_prim) { 1280 case PIPE_PRIM_POINTS: 1281 gcc->in_vue_count = 1; 1282 break; 1283 case PIPE_PRIM_LINES: 1284 gcc->in_vue_count = 2; 1285 gcc->shader->in.discard_adj = true; 1286 break; 1287 case PIPE_PRIM_TRIANGLES: 1288 gcc->in_vue_count = 3; 1289 gcc->shader->in.discard_adj = true; 1290 break; 1291 case PIPE_PRIM_LINES_ADJACENCY: 1292 gcc->in_vue_count = 4; 1293 break; 1294 case PIPE_PRIM_TRIANGLES_ADJACENCY: 1295 gcc->in_vue_count = 6; 1296 break; 1297 default: 1298 tc_fail(&gcc->tc, "unsupported GS input type"); 1299 gcc->in_vue_count = 0; 1300 break; 1301 } 1302 1303 switch (gcc->tgsi.props.gs_output_prim) { 1304 case PIPE_PRIM_POINTS: 1305 gcc->out_vue_min_count = 1; 1306 break; 1307 case PIPE_PRIM_LINE_STRIP: 1308 gcc->out_vue_min_count = 2; 1309 break; 1310 case PIPE_PRIM_TRIANGLE_STRIP: 1311 gcc->out_vue_min_count = 3; 1312 break; 1313 default: 1314 tc_fail(&gcc->tc, "unsupported GS output type"); 1315 gcc->out_vue_min_count = 0; 1316 break; 1317 } 1318 } 1319 else { 1320 int i; 1321 1322 gcc->in_vue_count = num_verts; 1323 gcc->out_vue_min_count = num_verts; 1324 1325 gcc->tgsi.num_outputs = gcc->variant->u.gs.num_inputs; 1326 for (i = 0; i < gcc->variant->u.gs.num_inputs; i++) { 1327 gcc->tgsi.outputs[i].semantic_name = 1328 gcc->variant->u.gs.semantic_names[i]; 1329 gcc->tgsi.outputs[i].semantic_index = 1330 gcc->variant->u.gs.semantic_indices[i]; 1331 } 1332 } 1333 1334 gcc->tc.templ.access_mode = GEN6_ALIGN_1; 1335 1336 gs_setup_shader_in(gcc->shader, gcc->variant); 1337 gs_setup_shader_out(gcc->shader, &gcc->tgsi, false, gcc->output_map); 1338 1339 gcc->in_vue_size = (gcc->shader->in.count + 1) / 2; 1340 1341 gcc->out_vue_size = (gcc->shader->out.count + 1) / 2; 1342 1343 gs_setup_payload(gcc); 1344 gs_setup_vars(gcc); 1345 1346 /* m0 is reserved for system routines */ 1347 gcc->first_free_mrf = 1; 1348 gcc->last_free_mrf = 15; 1349 1350 gcc->shader->bt.gen6_so_base = 0; 1351 gcc->shader->bt.gen6_so_count = gcc->so_info->num_outputs; 1352 1353 gcc->shader->bt.total_count = gcc->shader->bt.gen6_so_count; 1354 1355 return true; 1356 } 1357 1358 /** 1359 * Compile the geometry shader. 1360 */ 1361 struct ilo_shader * 1362 ilo_shader_compile_gs(const struct ilo_shader_state *state, 1363 const struct ilo_shader_variant *variant) 1364 { 1365 struct gs_compile_context gcc; 1366 1367 if (!gs_setup(&gcc, state, variant, 0)) 1368 return NULL; 1369 1370 if (!gs_compile(&gcc)) { 1371 FREE(gcc.shader); 1372 gcc.shader = NULL; 1373 } 1374 1375 toy_tgsi_cleanup(&gcc.tgsi); 1376 toy_compiler_cleanup(&gcc.tc); 1377 1378 return gcc.shader; 1379 } 1380 1381 static bool 1382 append_gs_to_vs(struct ilo_shader *vs, struct ilo_shader *gs, int num_verts) 1383 { 1384 void *combined; 1385 int gs_offset; 1386 1387 if (!gs) 1388 return false; 1389 1390 /* kernels must be aligned to 64-byte */ 1391 gs_offset = align(vs->kernel_size, 64); 1392 combined = REALLOC(vs->kernel, vs->kernel_size, 1393 gs_offset + gs->kernel_size); 1394 if (!combined) 1395 return false; 1396 1397 memcpy(combined + gs_offset, gs->kernel, gs->kernel_size); 1398 1399 vs->kernel = combined; 1400 vs->kernel_size = gs_offset + gs->kernel_size; 1401 1402 vs->stream_output = true; 1403 vs->gs_offsets[num_verts - 1] = gs_offset; 1404 vs->gs_start_grf = gs->in.start_grf; 1405 vs->gs_bt_so_count = gs->bt.gen6_so_count; 1406 1407 ilo_shader_destroy_kernel(gs); 1408 1409 return true; 1410 } 1411 1412 bool 1413 ilo_shader_compile_gs_passthrough(const struct ilo_shader_state *vs_state, 1414 const struct ilo_shader_variant *vs_variant, 1415 const int *so_mapping, 1416 struct ilo_shader *vs) 1417 { 1418 struct gs_compile_context gcc; 1419 struct ilo_shader_state state; 1420 struct ilo_shader_variant variant; 1421 const int num_verts = 3; 1422 int i; 1423 1424 /* init GS state and variant */ 1425 state = *vs_state; 1426 state.info.tokens = NULL; 1427 for (i = 0; i < state.info.stream_output.num_outputs; i++) { 1428 const int reg = state.info.stream_output.output[i].register_index; 1429 1430 state.info.stream_output.output[i].register_index = so_mapping[reg]; 1431 } 1432 1433 variant = *vs_variant; 1434 variant.u.gs.rasterizer_discard = vs_variant->u.vs.rasterizer_discard; 1435 variant.u.gs.num_inputs = vs->out.count; 1436 for (i = 0; i < vs->out.count; i++) { 1437 variant.u.gs.semantic_names[i] = 1438 vs->out.semantic_names[i]; 1439 variant.u.gs.semantic_indices[i] = 1440 vs->out.semantic_indices[i]; 1441 } 1442 1443 if (!gs_setup(&gcc, &state, &variant, num_verts)) 1444 return false; 1445 1446 if (!gs_compile_passthrough(&gcc)) { 1447 FREE(gcc.shader); 1448 gcc.shader = NULL; 1449 } 1450 1451 /* no need to call toy_tgsi_cleanup() */ 1452 toy_compiler_cleanup(&gcc.tc); 1453 1454 return append_gs_to_vs(vs, gcc.shader, num_verts); 1455 } 1456