1 /* 2 * Mesa 3-D graphics library 3 * 4 * Copyright (C) 2012-2013 LunarG, Inc. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included 14 * in all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Chia-I Wu <olv (at) lunarg.com> 26 */ 27 28 #include "tgsi/tgsi_dump.h" 29 #include "tgsi/tgsi_util.h" 30 #include "toy_compiler.h" 31 #include "toy_tgsi.h" 32 #include "toy_legalize.h" 33 #include "toy_optimize.h" 34 #include "toy_helpers.h" 35 #include "ilo_shader_internal.h" 36 37 struct fs_compile_context { 38 struct ilo_shader *shader; 39 const struct ilo_shader_variant *variant; 40 41 struct toy_compiler tc; 42 struct toy_tgsi tgsi; 43 44 int const_cache; 45 int dispatch_mode; 46 47 struct { 48 int interp_perspective_pixel; 49 int interp_perspective_centroid; 50 int interp_perspective_sample; 51 int interp_nonperspective_pixel; 52 int interp_nonperspective_centroid; 53 int interp_nonperspective_sample; 54 int source_depth; 55 int source_w; 56 int pos_offset; 57 } payloads[2]; 58 59 int first_const_grf; 60 int first_attr_grf; 61 int first_free_grf; 62 int last_free_grf; 63 64 int num_grf_per_vrf; 65 66 int first_free_mrf; 67 int last_free_mrf; 68 }; 69 70 static void 71 fetch_position(struct fs_compile_context *fcc, struct toy_dst dst) 72 { 73 struct toy_compiler *tc = &fcc->tc; 74 const struct toy_src src_z = 75 tsrc(TOY_FILE_GRF, fcc->payloads[0].source_depth, 0); 76 const struct toy_src src_w = 77 tsrc(TOY_FILE_GRF, fcc->payloads[0].source_w, 0); 78 const int fb_height = 79 (fcc->variant->u.fs.fb_height) ? fcc->variant->u.fs.fb_height : 1; 80 const bool origin_upper_left = 81 (fcc->tgsi.props.fs_coord_origin == TGSI_FS_COORD_ORIGIN_UPPER_LEFT); 82 const bool pixel_center_integer = 83 (fcc->tgsi.props.fs_coord_pixel_center == 84 TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 85 struct toy_src subspan_x, subspan_y; 86 struct toy_dst tmp, tmp_uw; 87 struct toy_dst real_dst[4]; 88 89 tdst_transpose(dst, real_dst); 90 91 subspan_x = tsrc_uw(tsrc(TOY_FILE_GRF, 1, 2 * 4)); 92 subspan_x = tsrc_rect(subspan_x, TOY_RECT_240); 93 94 subspan_y = tsrc_offset(subspan_x, 0, 1); 95 96 tmp_uw = tdst_uw(tc_alloc_tmp(tc)); 97 tmp = tc_alloc_tmp(tc); 98 99 /* X */ 100 tc_ADD(tc, tmp_uw, subspan_x, tsrc_imm_v(0x10101010)); 101 tc_MOV(tc, tmp, tsrc_from(tmp_uw)); 102 if (pixel_center_integer) 103 tc_MOV(tc, real_dst[0], tsrc_from(tmp)); 104 else 105 tc_ADD(tc, real_dst[0], tsrc_from(tmp), tsrc_imm_f(0.5f)); 106 107 /* Y */ 108 tc_ADD(tc, tmp_uw, subspan_y, tsrc_imm_v(0x11001100)); 109 tc_MOV(tc, tmp, tsrc_from(tmp_uw)); 110 if (origin_upper_left && pixel_center_integer) { 111 tc_MOV(tc, real_dst[1], tsrc_from(tmp)); 112 } 113 else { 114 struct toy_src y = tsrc_from(tmp); 115 float offset = 0.0f; 116 117 if (!pixel_center_integer) 118 offset += 0.5f; 119 120 if (!origin_upper_left) { 121 offset += (float) (fb_height - 1); 122 y = tsrc_negate(y); 123 } 124 125 tc_ADD(tc, real_dst[1], y, tsrc_imm_f(offset)); 126 } 127 128 /* Z and W */ 129 tc_MOV(tc, real_dst[2], src_z); 130 tc_INV(tc, real_dst[3], src_w); 131 } 132 133 static void 134 fetch_face(struct fs_compile_context *fcc, struct toy_dst dst) 135 { 136 struct toy_compiler *tc = &fcc->tc; 137 const struct toy_src r0 = tsrc_d(tsrc(TOY_FILE_GRF, 0, 0)); 138 struct toy_dst tmp_f, tmp; 139 struct toy_dst real_dst[4]; 140 141 tdst_transpose(dst, real_dst); 142 143 tmp_f = tc_alloc_tmp(tc); 144 tmp = tdst_d(tmp_f); 145 tc_SHR(tc, tmp, tsrc_rect(r0, TOY_RECT_010), tsrc_imm_d(15)); 146 tc_AND(tc, tmp, tsrc_from(tmp), tsrc_imm_d(1)); 147 tc_MOV(tc, tmp_f, tsrc_from(tmp)); 148 149 /* convert to 1.0 and -1.0 */ 150 tc_MUL(tc, tmp_f, tsrc_from(tmp_f), tsrc_imm_f(-2.0f)); 151 tc_ADD(tc, real_dst[0], tsrc_from(tmp_f), tsrc_imm_f(1.0f)); 152 153 tc_MOV(tc, real_dst[1], tsrc_imm_f(0.0f)); 154 tc_MOV(tc, real_dst[2], tsrc_imm_f(0.0f)); 155 tc_MOV(tc, real_dst[3], tsrc_imm_f(1.0f)); 156 } 157 158 static void 159 fetch_attr(struct fs_compile_context *fcc, struct toy_dst dst, int slot) 160 { 161 struct toy_compiler *tc = &fcc->tc; 162 struct toy_dst real_dst[4]; 163 bool is_const = false; 164 int grf, interp, ch; 165 166 tdst_transpose(dst, real_dst); 167 168 grf = fcc->first_attr_grf + slot * 2; 169 170 switch (fcc->tgsi.inputs[slot].interp) { 171 case TGSI_INTERPOLATE_CONSTANT: 172 is_const = true; 173 break; 174 case TGSI_INTERPOLATE_LINEAR: 175 if (fcc->tgsi.inputs[slot].centroid) 176 interp = fcc->payloads[0].interp_nonperspective_centroid; 177 else 178 interp = fcc->payloads[0].interp_nonperspective_pixel; 179 break; 180 case TGSI_INTERPOLATE_COLOR: 181 if (fcc->variant->u.fs.flatshade) { 182 is_const = true; 183 break; 184 } 185 /* fall through */ 186 case TGSI_INTERPOLATE_PERSPECTIVE: 187 if (fcc->tgsi.inputs[slot].centroid) 188 interp = fcc->payloads[0].interp_perspective_centroid; 189 else 190 interp = fcc->payloads[0].interp_perspective_pixel; 191 break; 192 default: 193 assert(!"unexpected FS interpolation"); 194 interp = fcc->payloads[0].interp_perspective_pixel; 195 break; 196 } 197 198 if (is_const) { 199 struct toy_src a0[4]; 200 201 a0[0] = tsrc(TOY_FILE_GRF, grf + 0, 3 * 4); 202 a0[1] = tsrc(TOY_FILE_GRF, grf + 0, 7 * 4); 203 a0[2] = tsrc(TOY_FILE_GRF, grf + 1, 3 * 4); 204 a0[3] = tsrc(TOY_FILE_GRF, grf + 1, 7 * 4); 205 206 for (ch = 0; ch < 4; ch++) 207 tc_MOV(tc, real_dst[ch], tsrc_rect(a0[ch], TOY_RECT_010)); 208 } 209 else { 210 struct toy_src attr[4], uv; 211 212 attr[0] = tsrc(TOY_FILE_GRF, grf + 0, 0); 213 attr[1] = tsrc(TOY_FILE_GRF, grf + 0, 4 * 4); 214 attr[2] = tsrc(TOY_FILE_GRF, grf + 1, 0); 215 attr[3] = tsrc(TOY_FILE_GRF, grf + 1, 4 * 4); 216 217 uv = tsrc(TOY_FILE_GRF, interp, 0); 218 219 for (ch = 0; ch < 4; ch++) { 220 tc_add2(tc, GEN6_OPCODE_PLN, real_dst[ch], 221 tsrc_rect(attr[ch], TOY_RECT_010), uv); 222 } 223 } 224 225 if (fcc->tgsi.inputs[slot].semantic_name == TGSI_SEMANTIC_FOG) { 226 tc_MOV(tc, real_dst[1], tsrc_imm_f(0.0f)); 227 tc_MOV(tc, real_dst[2], tsrc_imm_f(0.0f)); 228 tc_MOV(tc, real_dst[3], tsrc_imm_f(1.0f)); 229 } 230 } 231 232 static void 233 fs_lower_opcode_tgsi_in(struct fs_compile_context *fcc, 234 struct toy_dst dst, int dim, int idx) 235 { 236 int slot; 237 238 assert(!dim); 239 240 slot = toy_tgsi_find_input(&fcc->tgsi, idx); 241 if (slot < 0) 242 return; 243 244 switch (fcc->tgsi.inputs[slot].semantic_name) { 245 case TGSI_SEMANTIC_POSITION: 246 fetch_position(fcc, dst); 247 break; 248 case TGSI_SEMANTIC_FACE: 249 fetch_face(fcc, dst); 250 break; 251 default: 252 fetch_attr(fcc, dst, slot); 253 break; 254 } 255 } 256 257 static void 258 fs_lower_opcode_tgsi_indirect_const(struct fs_compile_context *fcc, 259 struct toy_dst dst, int dim, 260 struct toy_src idx) 261 { 262 const struct toy_dst offset = 263 tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0)); 264 struct toy_compiler *tc = &fcc->tc; 265 unsigned simd_mode, param_size; 266 struct toy_inst *inst; 267 struct toy_src desc, real_src[4]; 268 struct toy_dst tmp, real_dst[4]; 269 unsigned i; 270 271 tsrc_transpose(idx, real_src); 272 273 /* set offset */ 274 inst = tc_MOV(tc, offset, real_src[0]); 275 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; 276 277 switch (inst->exec_size) { 278 case GEN6_EXECSIZE_8: 279 simd_mode = GEN6_MSG_SAMPLER_SIMD8; 280 param_size = 1; 281 break; 282 case GEN6_EXECSIZE_16: 283 simd_mode = GEN6_MSG_SAMPLER_SIMD16; 284 param_size = 2; 285 break; 286 default: 287 assert(!"unsupported execution size"); 288 tc_MOV(tc, dst, tsrc_imm_f(0.0f)); 289 return; 290 break; 291 } 292 293 desc = tsrc_imm_mdesc_sampler(tc, param_size, param_size * 4, false, 294 simd_mode, 295 GEN6_MSG_SAMPLER_LD, 296 0, 297 fcc->shader->bt.const_base + dim); 298 299 tmp = tdst(TOY_FILE_VRF, tc_alloc_vrf(tc, param_size * 4), 0); 300 inst = tc_SEND(tc, tmp, tsrc_from(offset), desc, GEN6_SFID_SAMPLER); 301 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; 302 303 tdst_transpose(dst, real_dst); 304 for (i = 0; i < 4; i++) { 305 const struct toy_src src = 306 tsrc_offset(tsrc_from(tmp), param_size * i, 0); 307 308 /* cast to type D to make sure these are raw moves */ 309 tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src)); 310 } 311 } 312 313 static bool 314 fs_lower_opcode_tgsi_const_pcb(struct fs_compile_context *fcc, 315 struct toy_dst dst, int dim, 316 struct toy_src idx) 317 { 318 const int grf = fcc->first_const_grf + idx.val32 / 2; 319 const int grf_subreg = (idx.val32 & 1) * 16; 320 struct toy_src src; 321 struct toy_dst real_dst[4]; 322 unsigned i; 323 324 if (!fcc->variant->use_pcb || dim != 0 || idx.file != TOY_FILE_IMM || 325 grf >= fcc->first_attr_grf) 326 return false; 327 328 src = tsrc_rect(tsrc(TOY_FILE_GRF, grf, grf_subreg), TOY_RECT_010); 329 330 tdst_transpose(dst, real_dst); 331 for (i = 0; i < 4; i++) { 332 /* cast to type D to make sure these are raw moves */ 333 tc_MOV(&fcc->tc, tdst_d(real_dst[i]), tsrc_d(tsrc_offset(src, 0, i))); 334 } 335 336 return true; 337 } 338 339 static void 340 fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context *fcc, 341 struct toy_dst dst, int dim, struct toy_src idx) 342 { 343 const struct toy_dst header = 344 tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0)); 345 const struct toy_dst global_offset = 346 tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 2 * 4)); 347 const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0)); 348 struct toy_compiler *tc = &fcc->tc; 349 unsigned msg_type, msg_ctrl, msg_len; 350 struct toy_inst *inst; 351 struct toy_src desc; 352 struct toy_dst tmp, real_dst[4]; 353 unsigned i; 354 355 if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx)) 356 return; 357 358 /* set message header */ 359 inst = tc_MOV(tc, header, r0); 360 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; 361 362 /* set global offset */ 363 inst = tc_MOV(tc, global_offset, idx); 364 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; 365 inst->exec_size = GEN6_EXECSIZE_1; 366 inst->src[0].rect = TOY_RECT_010; 367 368 msg_type = GEN6_MSG_DP_OWORD_BLOCK_READ; 369 msg_ctrl = GEN6_MSG_DP_OWORD_BLOCK_SIZE_1_LO; 370 msg_len = 1; 371 372 desc = tsrc_imm_mdesc_data_port(tc, false, msg_len, 1, true, false, 373 msg_type, msg_ctrl, fcc->shader->bt.const_base + dim); 374 375 tmp = tc_alloc_tmp(tc); 376 377 tc_SEND(tc, tmp, tsrc_from(header), desc, fcc->const_cache); 378 379 tdst_transpose(dst, real_dst); 380 for (i = 0; i < 4; i++) { 381 const struct toy_src src = 382 tsrc_offset(tsrc_rect(tsrc_from(tmp), TOY_RECT_010), 0, i); 383 384 /* cast to type D to make sure these are raw moves */ 385 tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src)); 386 } 387 } 388 389 static void 390 fs_lower_opcode_tgsi_const_gen7(struct fs_compile_context *fcc, 391 struct toy_dst dst, int dim, struct toy_src idx) 392 { 393 struct toy_compiler *tc = &fcc->tc; 394 const struct toy_dst offset = 395 tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0)); 396 struct toy_src desc; 397 struct toy_inst *inst; 398 struct toy_dst tmp, real_dst[4]; 399 unsigned i; 400 401 if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx)) 402 return; 403 404 /* 405 * In 4c1fdae0a01b3f92ec03b61aac1d3df500d51fc6, pull constant load was 406 * changed from OWord Block Read to ld to increase performance in the 407 * classic driver. Since we use the constant cache instead of the data 408 * cache, I wonder if we still want to follow the classic driver. 409 */ 410 411 /* set offset */ 412 inst = tc_MOV(tc, offset, tsrc_rect(idx, TOY_RECT_010)); 413 inst->exec_size = GEN6_EXECSIZE_8; 414 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; 415 416 desc = tsrc_imm_mdesc_sampler(tc, 1, 1, false, 417 GEN6_MSG_SAMPLER_SIMD4X2, 418 GEN6_MSG_SAMPLER_LD, 419 0, 420 fcc->shader->bt.const_base + dim); 421 422 tmp = tc_alloc_tmp(tc); 423 inst = tc_SEND(tc, tmp, tsrc_from(offset), desc, GEN6_SFID_SAMPLER); 424 inst->exec_size = GEN6_EXECSIZE_8; 425 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; 426 427 tdst_transpose(dst, real_dst); 428 for (i = 0; i < 4; i++) { 429 const struct toy_src src = 430 tsrc_offset(tsrc_rect(tsrc_from(tmp), TOY_RECT_010), 0, i); 431 432 /* cast to type D to make sure these are raw moves */ 433 tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src)); 434 } 435 } 436 437 static void 438 fs_lower_opcode_tgsi_imm(struct fs_compile_context *fcc, 439 struct toy_dst dst, int idx) 440 { 441 const uint32_t *imm; 442 struct toy_dst real_dst[4]; 443 int ch; 444 445 imm = toy_tgsi_get_imm(&fcc->tgsi, idx, NULL); 446 447 tdst_transpose(dst, real_dst); 448 /* raw moves */ 449 for (ch = 0; ch < 4; ch++) 450 tc_MOV(&fcc->tc, tdst_ud(real_dst[ch]), tsrc_imm_ud(imm[ch])); 451 } 452 453 static void 454 fs_lower_opcode_tgsi_sv(struct fs_compile_context *fcc, 455 struct toy_dst dst, int dim, int idx) 456 { 457 struct toy_compiler *tc = &fcc->tc; 458 const struct toy_tgsi *tgsi = &fcc->tgsi; 459 int slot; 460 461 assert(!dim); 462 463 slot = toy_tgsi_find_system_value(tgsi, idx); 464 if (slot < 0) 465 return; 466 467 switch (tgsi->system_values[slot].semantic_name) { 468 case TGSI_SEMANTIC_PRIMID: 469 case TGSI_SEMANTIC_INSTANCEID: 470 case TGSI_SEMANTIC_VERTEXID: 471 default: 472 tc_fail(tc, "unhandled system value"); 473 tc_MOV(tc, dst, tsrc_imm_d(0)); 474 break; 475 } 476 } 477 478 static void 479 fs_lower_opcode_tgsi_direct(struct fs_compile_context *fcc, 480 struct toy_inst *inst) 481 { 482 struct toy_compiler *tc = &fcc->tc; 483 int dim, idx; 484 485 assert(inst->src[0].file == TOY_FILE_IMM); 486 dim = inst->src[0].val32; 487 488 assert(inst->src[1].file == TOY_FILE_IMM); 489 idx = inst->src[1].val32; 490 491 switch (inst->opcode) { 492 case TOY_OPCODE_TGSI_IN: 493 fs_lower_opcode_tgsi_in(fcc, inst->dst, dim, idx); 494 break; 495 case TOY_OPCODE_TGSI_CONST: 496 if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) 497 fs_lower_opcode_tgsi_const_gen7(fcc, inst->dst, dim, inst->src[1]); 498 else 499 fs_lower_opcode_tgsi_const_gen6(fcc, inst->dst, dim, inst->src[1]); 500 break; 501 case TOY_OPCODE_TGSI_SV: 502 fs_lower_opcode_tgsi_sv(fcc, inst->dst, dim, idx); 503 break; 504 case TOY_OPCODE_TGSI_IMM: 505 assert(!dim); 506 fs_lower_opcode_tgsi_imm(fcc, inst->dst, idx); 507 break; 508 default: 509 tc_fail(tc, "unhandled TGSI fetch"); 510 break; 511 } 512 513 tc_discard_inst(tc, inst); 514 } 515 516 static void 517 fs_lower_opcode_tgsi_indirect(struct fs_compile_context *fcc, 518 struct toy_inst *inst) 519 { 520 struct toy_compiler *tc = &fcc->tc; 521 enum tgsi_file_type file; 522 int dim, idx; 523 struct toy_src indirect_dim, indirect_idx; 524 525 assert(inst->src[0].file == TOY_FILE_IMM); 526 file = inst->src[0].val32; 527 528 assert(inst->src[1].file == TOY_FILE_IMM); 529 dim = inst->src[1].val32; 530 indirect_dim = inst->src[2]; 531 532 assert(inst->src[3].file == TOY_FILE_IMM); 533 idx = inst->src[3].val32; 534 indirect_idx = inst->src[4]; 535 536 /* no dimension indirection */ 537 assert(indirect_dim.file == TOY_FILE_IMM); 538 dim += indirect_dim.val32; 539 540 switch (inst->opcode) { 541 case TOY_OPCODE_TGSI_INDIRECT_FETCH: 542 if (file == TGSI_FILE_CONSTANT) { 543 if (idx) { 544 struct toy_dst tmp = tc_alloc_tmp(tc); 545 546 tc_ADD(tc, tmp, indirect_idx, tsrc_imm_d(idx)); 547 indirect_idx = tsrc_from(tmp); 548 } 549 550 fs_lower_opcode_tgsi_indirect_const(fcc, inst->dst, dim, indirect_idx); 551 break; 552 } 553 /* fall through */ 554 case TOY_OPCODE_TGSI_INDIRECT_STORE: 555 default: 556 tc_fail(tc, "unhandled TGSI indirection"); 557 break; 558 } 559 560 tc_discard_inst(tc, inst); 561 } 562 563 /** 564 * Emit instructions to move sampling parameters to the message registers. 565 */ 566 static int 567 fs_add_sampler_params_gen6(struct toy_compiler *tc, int msg_type, 568 int base_mrf, int param_size, 569 struct toy_src *coords, int num_coords, 570 struct toy_src bias_or_lod, struct toy_src ref_or_si, 571 struct toy_src *ddx, struct toy_src *ddy, 572 int num_derivs) 573 { 574 int num_params, i; 575 576 assert(num_coords <= 4); 577 assert(num_derivs <= 3 && num_derivs <= num_coords); 578 579 #define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0)) 580 switch (msg_type) { 581 case GEN6_MSG_SAMPLER_SAMPLE: 582 for (i = 0; i < num_coords; i++) 583 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]); 584 num_params = num_coords; 585 break; 586 case GEN6_MSG_SAMPLER_SAMPLE_B: 587 case GEN6_MSG_SAMPLER_SAMPLE_L: 588 for (i = 0; i < num_coords; i++) 589 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]); 590 tc_MOV(tc, SAMPLER_PARAM(4), bias_or_lod); 591 num_params = 5; 592 break; 593 case GEN6_MSG_SAMPLER_SAMPLE_C: 594 for (i = 0; i < num_coords; i++) 595 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]); 596 tc_MOV(tc, SAMPLER_PARAM(4), ref_or_si); 597 num_params = 5; 598 break; 599 case GEN6_MSG_SAMPLER_SAMPLE_D: 600 for (i = 0; i < num_coords; i++) 601 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]); 602 for (i = 0; i < num_derivs; i++) { 603 tc_MOV(tc, SAMPLER_PARAM(4 + i * 2), ddx[i]); 604 tc_MOV(tc, SAMPLER_PARAM(5 + i * 2), ddy[i]); 605 } 606 num_params = 4 + num_derivs * 2; 607 break; 608 case GEN6_MSG_SAMPLER_SAMPLE_B_C: 609 case GEN6_MSG_SAMPLER_SAMPLE_L_C: 610 for (i = 0; i < num_coords; i++) 611 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]); 612 tc_MOV(tc, SAMPLER_PARAM(4), ref_or_si); 613 tc_MOV(tc, SAMPLER_PARAM(5), bias_or_lod); 614 num_params = 6; 615 break; 616 case GEN6_MSG_SAMPLER_LD: 617 assert(num_coords <= 3); 618 619 for (i = 0; i < num_coords; i++) 620 tc_MOV(tc, tdst_d(SAMPLER_PARAM(i)), coords[i]); 621 tc_MOV(tc, tdst_d(SAMPLER_PARAM(3)), bias_or_lod); 622 tc_MOV(tc, tdst_d(SAMPLER_PARAM(4)), ref_or_si); 623 num_params = 5; 624 break; 625 case GEN6_MSG_SAMPLER_RESINFO: 626 tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), bias_or_lod); 627 num_params = 1; 628 break; 629 default: 630 tc_fail(tc, "unknown sampler opcode"); 631 num_params = 0; 632 break; 633 } 634 #undef SAMPLER_PARAM 635 636 return num_params * param_size; 637 } 638 639 static int 640 fs_add_sampler_params_gen7(struct toy_compiler *tc, int msg_type, 641 int base_mrf, int param_size, 642 struct toy_src *coords, int num_coords, 643 struct toy_src bias_or_lod, struct toy_src ref_or_si, 644 struct toy_src *ddx, struct toy_src *ddy, 645 int num_derivs) 646 { 647 int num_params, i; 648 649 assert(num_coords <= 4); 650 assert(num_derivs <= 3 && num_derivs <= num_coords); 651 652 #define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0)) 653 switch (msg_type) { 654 case GEN6_MSG_SAMPLER_SAMPLE: 655 for (i = 0; i < num_coords; i++) 656 tc_MOV(tc, SAMPLER_PARAM(i), coords[i]); 657 num_params = num_coords; 658 break; 659 case GEN6_MSG_SAMPLER_SAMPLE_B: 660 case GEN6_MSG_SAMPLER_SAMPLE_L: 661 tc_MOV(tc, SAMPLER_PARAM(0), bias_or_lod); 662 for (i = 0; i < num_coords; i++) 663 tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]); 664 num_params = 1 + num_coords; 665 break; 666 case GEN6_MSG_SAMPLER_SAMPLE_C: 667 tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si); 668 for (i = 0; i < num_coords; i++) 669 tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]); 670 num_params = 1 + num_coords; 671 break; 672 case GEN6_MSG_SAMPLER_SAMPLE_D: 673 for (i = 0; i < num_coords; i++) { 674 tc_MOV(tc, SAMPLER_PARAM(i * 3), coords[i]); 675 if (i < num_derivs) { 676 tc_MOV(tc, SAMPLER_PARAM(i * 3 + 1), ddx[i]); 677 tc_MOV(tc, SAMPLER_PARAM(i * 3 + 2), ddy[i]); 678 } 679 } 680 num_params = num_coords * 3 - ((num_coords > num_derivs) ? 2 : 0); 681 break; 682 case GEN6_MSG_SAMPLER_SAMPLE_B_C: 683 case GEN6_MSG_SAMPLER_SAMPLE_L_C: 684 tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si); 685 tc_MOV(tc, SAMPLER_PARAM(1), bias_or_lod); 686 for (i = 0; i < num_coords; i++) 687 tc_MOV(tc, SAMPLER_PARAM(2 + i), coords[i]); 688 num_params = 2 + num_coords; 689 break; 690 case GEN6_MSG_SAMPLER_LD: 691 assert(num_coords >= 1 && num_coords <= 3); 692 693 tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), coords[0]); 694 tc_MOV(tc, tdst_d(SAMPLER_PARAM(1)), bias_or_lod); 695 for (i = 1; i < num_coords; i++) 696 tc_MOV(tc, tdst_d(SAMPLER_PARAM(1 + i)), coords[i]); 697 num_params = 1 + num_coords; 698 break; 699 case GEN6_MSG_SAMPLER_RESINFO: 700 tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), bias_or_lod); 701 num_params = 1; 702 break; 703 default: 704 tc_fail(tc, "unknown sampler opcode"); 705 num_params = 0; 706 break; 707 } 708 #undef SAMPLER_PARAM 709 710 return num_params * param_size; 711 } 712 713 /** 714 * Set up message registers and return the message descriptor for sampling. 715 */ 716 static struct toy_src 717 fs_prepare_tgsi_sampling(struct fs_compile_context *fcc, 718 const struct toy_inst *inst, 719 int base_mrf, const uint32_t *saturate_coords, 720 unsigned *ret_sampler_index) 721 { 722 struct toy_compiler *tc = &fcc->tc; 723 unsigned simd_mode, msg_type, msg_len, sampler_index, binding_table_index; 724 struct toy_src coords[4], ddx[4], ddy[4], bias_or_lod, ref_or_si; 725 int num_coords, ref_pos, num_derivs; 726 int sampler_src, param_size, i; 727 728 switch (inst->exec_size) { 729 case GEN6_EXECSIZE_8: 730 simd_mode = GEN6_MSG_SAMPLER_SIMD8; 731 param_size = 1; 732 break; 733 case GEN6_EXECSIZE_16: 734 simd_mode = GEN6_MSG_SAMPLER_SIMD16; 735 param_size = 2; 736 break; 737 default: 738 tc_fail(tc, "unsupported execute size for sampling"); 739 return tsrc_null(); 740 break; 741 } 742 743 num_coords = tgsi_util_get_texture_coord_dim(inst->tex.target); 744 ref_pos = tgsi_util_get_shadow_ref_src_index(inst->tex.target); 745 746 tsrc_transpose(inst->src[0], coords); 747 bias_or_lod = tsrc_null(); 748 ref_or_si = tsrc_null(); 749 num_derivs = 0; 750 sampler_src = 1; 751 752 /* 753 * For TXD, 754 * 755 * src0 := (x, y, z, w) 756 * src1 := ddx 757 * src2 := ddy 758 * src3 := sampler 759 * 760 * For TEX2, TXB2, and TXL2, 761 * 762 * src0 := (x, y, z, w) 763 * src1 := (v or bias or lod, ...) 764 * src2 := sampler 765 * 766 * For TEX, TXB, TXL, and TXP, 767 * 768 * src0 := (x, y, z, w or bias or lod or projection) 769 * src1 := sampler 770 * 771 * For TXQ, 772 * 773 * src0 := (lod, ...) 774 * src1 := sampler 775 * 776 * For TXQ_LZ, 777 * 778 * src0 := sampler 779 * 780 * And for TXF, 781 * 782 * src0 := (x, y, z, w or lod) 783 * src1 := sampler 784 * 785 * State trackers should not generate opcode+texture combinations with 786 * which the two definitions conflict (e.g., TXB with SHADOW2DARRAY). 787 */ 788 switch (inst->opcode) { 789 case TOY_OPCODE_TGSI_TEX: 790 if (ref_pos >= 0) { 791 assert(ref_pos < 4); 792 793 msg_type = GEN6_MSG_SAMPLER_SAMPLE_C; 794 ref_or_si = coords[ref_pos]; 795 } 796 else { 797 msg_type = GEN6_MSG_SAMPLER_SAMPLE; 798 } 799 break; 800 case TOY_OPCODE_TGSI_TXD: 801 if (ref_pos >= 0) { 802 assert(ref_pos < 4); 803 804 msg_type = GEN7_MSG_SAMPLER_SAMPLE_D_C; 805 ref_or_si = coords[ref_pos]; 806 807 if (ilo_dev_gen(tc->dev) < ILO_GEN(7.5)) 808 tc_fail(tc, "TXD with shadow sampler not supported"); 809 } 810 else { 811 msg_type = GEN6_MSG_SAMPLER_SAMPLE_D; 812 } 813 814 tsrc_transpose(inst->src[1], ddx); 815 tsrc_transpose(inst->src[2], ddy); 816 num_derivs = num_coords; 817 sampler_src = 3; 818 break; 819 case TOY_OPCODE_TGSI_TXP: 820 if (ref_pos >= 0) { 821 assert(ref_pos < 3); 822 823 msg_type = GEN6_MSG_SAMPLER_SAMPLE_C; 824 ref_or_si = coords[ref_pos]; 825 } 826 else { 827 msg_type = GEN6_MSG_SAMPLER_SAMPLE; 828 } 829 830 /* project the coordinates */ 831 { 832 struct toy_dst tmp[4]; 833 834 tc_alloc_tmp4(tc, tmp); 835 836 tc_INV(tc, tmp[3], coords[3]); 837 for (i = 0; i < num_coords && i < 3; i++) { 838 tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3])); 839 coords[i] = tsrc_from(tmp[i]); 840 } 841 842 if (ref_pos >= i) { 843 tc_MUL(tc, tmp[ref_pos], ref_or_si, tsrc_from(tmp[3])); 844 ref_or_si = tsrc_from(tmp[ref_pos]); 845 } 846 } 847 break; 848 case TOY_OPCODE_TGSI_TXB: 849 if (ref_pos >= 0) { 850 assert(ref_pos < 3); 851 852 msg_type = GEN6_MSG_SAMPLER_SAMPLE_B_C; 853 ref_or_si = coords[ref_pos]; 854 } 855 else { 856 msg_type = GEN6_MSG_SAMPLER_SAMPLE_B; 857 } 858 859 bias_or_lod = coords[3]; 860 break; 861 case TOY_OPCODE_TGSI_TXL: 862 if (ref_pos >= 0) { 863 assert(ref_pos < 3); 864 865 msg_type = GEN6_MSG_SAMPLER_SAMPLE_L_C; 866 ref_or_si = coords[ref_pos]; 867 } 868 else { 869 msg_type = GEN6_MSG_SAMPLER_SAMPLE_L; 870 } 871 872 bias_or_lod = coords[3]; 873 break; 874 case TOY_OPCODE_TGSI_TXF: 875 msg_type = GEN6_MSG_SAMPLER_LD; 876 877 switch (inst->tex.target) { 878 case TGSI_TEXTURE_2D_MSAA: 879 case TGSI_TEXTURE_2D_ARRAY_MSAA: 880 assert(ref_pos >= 0 && ref_pos < 4); 881 /* lod is always 0 */ 882 bias_or_lod = tsrc_imm_d(0); 883 ref_or_si = coords[ref_pos]; 884 break; 885 default: 886 bias_or_lod = coords[3]; 887 break; 888 } 889 890 /* offset the coordinates */ 891 if (!tsrc_is_null(inst->tex.offsets[0])) { 892 struct toy_dst tmp[4]; 893 struct toy_src offsets[4]; 894 895 tc_alloc_tmp4(tc, tmp); 896 tsrc_transpose(inst->tex.offsets[0], offsets); 897 898 for (i = 0; i < num_coords; i++) { 899 tc_ADD(tc, tmp[i], coords[i], offsets[i]); 900 coords[i] = tsrc_from(tmp[i]); 901 } 902 } 903 904 sampler_src = 1; 905 break; 906 case TOY_OPCODE_TGSI_TXQ: 907 msg_type = GEN6_MSG_SAMPLER_RESINFO; 908 num_coords = 0; 909 bias_or_lod = coords[0]; 910 break; 911 case TOY_OPCODE_TGSI_TXQ_LZ: 912 msg_type = GEN6_MSG_SAMPLER_RESINFO; 913 num_coords = 0; 914 sampler_src = 0; 915 break; 916 case TOY_OPCODE_TGSI_TEX2: 917 if (ref_pos >= 0) { 918 assert(ref_pos < 5); 919 920 msg_type = GEN6_MSG_SAMPLER_SAMPLE_C; 921 922 if (ref_pos >= 4) { 923 struct toy_src src1[4]; 924 tsrc_transpose(inst->src[1], src1); 925 ref_or_si = src1[ref_pos - 4]; 926 } 927 else { 928 ref_or_si = coords[ref_pos]; 929 } 930 } 931 else { 932 msg_type = GEN6_MSG_SAMPLER_SAMPLE; 933 } 934 935 sampler_src = 2; 936 break; 937 case TOY_OPCODE_TGSI_TXB2: 938 if (ref_pos >= 0) { 939 assert(ref_pos < 4); 940 941 msg_type = GEN6_MSG_SAMPLER_SAMPLE_B_C; 942 ref_or_si = coords[ref_pos]; 943 } 944 else { 945 msg_type = GEN6_MSG_SAMPLER_SAMPLE_B; 946 } 947 948 { 949 struct toy_src src1[4]; 950 tsrc_transpose(inst->src[1], src1); 951 bias_or_lod = src1[0]; 952 } 953 954 sampler_src = 2; 955 break; 956 case TOY_OPCODE_TGSI_TXL2: 957 if (ref_pos >= 0) { 958 assert(ref_pos < 4); 959 960 msg_type = GEN6_MSG_SAMPLER_SAMPLE_L_C; 961 ref_or_si = coords[ref_pos]; 962 } 963 else { 964 msg_type = GEN6_MSG_SAMPLER_SAMPLE_L; 965 } 966 967 { 968 struct toy_src src1[4]; 969 tsrc_transpose(inst->src[1], src1); 970 bias_or_lod = src1[0]; 971 } 972 973 sampler_src = 2; 974 break; 975 default: 976 assert(!"unhandled sampling opcode"); 977 return tsrc_null(); 978 break; 979 } 980 981 assert(inst->src[sampler_src].file == TOY_FILE_IMM); 982 sampler_index = inst->src[sampler_src].val32; 983 binding_table_index = fcc->shader->bt.tex_base + sampler_index; 984 985 /* 986 * From the Sandy Bridge PRM, volume 4 part 1, page 18: 987 * 988 * "Note that the (cube map) coordinates delivered to the sampling 989 * engine must already have been divided by the component with the 990 * largest absolute value." 991 */ 992 switch (inst->tex.target) { 993 case TGSI_TEXTURE_CUBE: 994 case TGSI_TEXTURE_SHADOWCUBE: 995 case TGSI_TEXTURE_CUBE_ARRAY: 996 case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 997 /* TXQ does not need coordinates */ 998 if (num_coords >= 3) { 999 struct toy_dst tmp[4]; 1000 1001 tc_alloc_tmp4(tc, tmp); 1002 1003 tc_SEL(tc, tmp[3], tsrc_absolute(coords[0]), 1004 tsrc_absolute(coords[1]), GEN6_COND_GE); 1005 tc_SEL(tc, tmp[3], tsrc_from(tmp[3]), 1006 tsrc_absolute(coords[2]), GEN6_COND_GE); 1007 tc_INV(tc, tmp[3], tsrc_from(tmp[3])); 1008 1009 for (i = 0; i < 3; i++) { 1010 tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3])); 1011 coords[i] = tsrc_from(tmp[i]); 1012 } 1013 } 1014 break; 1015 } 1016 1017 /* 1018 * Saturate (s, t, r). saturate_coords is set for sampler and coordinate 1019 * that uses linear filtering and PIPE_TEX_WRAP_CLAMP respectively. It is 1020 * so that sampling outside the border gets the correct colors. 1021 */ 1022 for (i = 0; i < MIN2(num_coords, 3); i++) { 1023 bool is_rect; 1024 1025 if (!(saturate_coords[i] & (1 << sampler_index))) 1026 continue; 1027 1028 switch (inst->tex.target) { 1029 case TGSI_TEXTURE_RECT: 1030 case TGSI_TEXTURE_SHADOWRECT: 1031 is_rect = true; 1032 break; 1033 default: 1034 is_rect = false; 1035 break; 1036 } 1037 1038 if (is_rect) { 1039 struct toy_src min, max; 1040 struct toy_dst tmp; 1041 1042 tc_fail(tc, "GL_CLAMP with rectangle texture unsupported"); 1043 tmp = tc_alloc_tmp(tc); 1044 1045 /* saturate to [0, width] or [0, height] */ 1046 /* TODO TXQ? */ 1047 min = tsrc_imm_f(0.0f); 1048 max = tsrc_imm_f(2048.0f); 1049 1050 tc_SEL(tc, tmp, coords[i], min, GEN6_COND_G); 1051 tc_SEL(tc, tmp, tsrc_from(tmp), max, GEN6_COND_L); 1052 1053 coords[i] = tsrc_from(tmp); 1054 } 1055 else { 1056 struct toy_dst tmp; 1057 struct toy_inst *inst2; 1058 1059 tmp = tc_alloc_tmp(tc); 1060 1061 /* saturate to [0.0f, 1.0f] */ 1062 inst2 = tc_MOV(tc, tmp, coords[i]); 1063 inst2->saturate = true; 1064 1065 coords[i] = tsrc_from(tmp); 1066 } 1067 } 1068 1069 /* set up sampler parameters */ 1070 if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) { 1071 msg_len = fs_add_sampler_params_gen7(tc, msg_type, base_mrf, param_size, 1072 coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs); 1073 } 1074 else { 1075 msg_len = fs_add_sampler_params_gen6(tc, msg_type, base_mrf, param_size, 1076 coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs); 1077 } 1078 1079 /* 1080 * From the Sandy Bridge PRM, volume 4 part 1, page 136: 1081 * 1082 * "The maximum message length allowed to the sampler is 11. This would 1083 * disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of 1084 * SIMD16." 1085 */ 1086 if (msg_len > 11) 1087 tc_fail(tc, "maximum length for messages to the sampler is 11"); 1088 1089 if (ret_sampler_index) 1090 *ret_sampler_index = sampler_index; 1091 1092 return tsrc_imm_mdesc_sampler(tc, msg_len, 4 * param_size, 1093 false, simd_mode, msg_type, sampler_index, binding_table_index); 1094 } 1095 1096 static void 1097 fs_lower_opcode_tgsi_sampling(struct fs_compile_context *fcc, 1098 struct toy_inst *inst) 1099 { 1100 struct toy_compiler *tc = &fcc->tc; 1101 struct toy_dst dst[4], tmp[4]; 1102 struct toy_src desc; 1103 unsigned sampler_index; 1104 int swizzles[4], i; 1105 bool need_filter; 1106 1107 desc = fs_prepare_tgsi_sampling(fcc, inst, 1108 fcc->first_free_mrf, 1109 fcc->variant->saturate_tex_coords, 1110 &sampler_index); 1111 1112 switch (inst->opcode) { 1113 case TOY_OPCODE_TGSI_TXF: 1114 case TOY_OPCODE_TGSI_TXQ: 1115 case TOY_OPCODE_TGSI_TXQ_LZ: 1116 need_filter = false; 1117 break; 1118 default: 1119 need_filter = true; 1120 break; 1121 } 1122 1123 toy_compiler_lower_to_send(tc, inst, false, GEN6_SFID_SAMPLER); 1124 inst->src[0] = tsrc(TOY_FILE_MRF, fcc->first_free_mrf, 0); 1125 inst->src[1] = desc; 1126 for (i = 2; i < ARRAY_SIZE(inst->src); i++) 1127 inst->src[i] = tsrc_null(); 1128 1129 /* write to temps first */ 1130 tc_alloc_tmp4(tc, tmp); 1131 for (i = 0; i < 4; i++) 1132 tmp[i].type = inst->dst.type; 1133 tdst_transpose(inst->dst, dst); 1134 inst->dst = tmp[0]; 1135 1136 tc_move_inst(tc, inst); 1137 1138 if (need_filter) { 1139 assert(sampler_index < fcc->variant->num_sampler_views); 1140 swizzles[0] = fcc->variant->sampler_view_swizzles[sampler_index].r; 1141 swizzles[1] = fcc->variant->sampler_view_swizzles[sampler_index].g; 1142 swizzles[2] = fcc->variant->sampler_view_swizzles[sampler_index].b; 1143 swizzles[3] = fcc->variant->sampler_view_swizzles[sampler_index].a; 1144 } 1145 else { 1146 swizzles[0] = PIPE_SWIZZLE_X; 1147 swizzles[1] = PIPE_SWIZZLE_Y; 1148 swizzles[2] = PIPE_SWIZZLE_Z; 1149 swizzles[3] = PIPE_SWIZZLE_W; 1150 } 1151 1152 /* swizzle the results */ 1153 for (i = 0; i < 4; i++) { 1154 switch (swizzles[i]) { 1155 case PIPE_SWIZZLE_0: 1156 tc_MOV(tc, dst[i], tsrc_imm_f(0.0f)); 1157 break; 1158 case PIPE_SWIZZLE_1: 1159 tc_MOV(tc, dst[i], tsrc_imm_f(1.0f)); 1160 break; 1161 default: 1162 tc_MOV(tc, dst[i], tsrc_from(tmp[swizzles[i]])); 1163 break; 1164 } 1165 } 1166 } 1167 1168 static void 1169 fs_lower_opcode_derivative(struct toy_compiler *tc, struct toy_inst *inst) 1170 { 1171 struct toy_dst dst[4]; 1172 struct toy_src src[4]; 1173 unsigned i; 1174 1175 tdst_transpose(inst->dst, dst); 1176 tsrc_transpose(inst->src[0], src); 1177 1178 /* 1179 * Every four fragments are from a 2x2 subspan, with 1180 * 1181 * fragment 1 on the top-left, 1182 * fragment 2 on the top-right, 1183 * fragment 3 on the bottom-left, 1184 * fragment 4 on the bottom-right. 1185 * 1186 * DDX should thus produce 1187 * 1188 * dst = src.yyww - src.xxzz 1189 * 1190 * and DDY should produce 1191 * 1192 * dst = src.zzww - src.xxyy 1193 * 1194 * But since we are in GEN6_ALIGN_1, swizzling does not work and we have to 1195 * play with the region parameters. 1196 */ 1197 if (inst->opcode == TOY_OPCODE_DDX) { 1198 for (i = 0; i < 4; i++) { 1199 struct toy_src left, right; 1200 1201 left = tsrc_rect(src[i], TOY_RECT_220); 1202 right = tsrc_offset(left, 0, 1); 1203 1204 tc_ADD(tc, dst[i], right, tsrc_negate(left)); 1205 } 1206 } 1207 else { 1208 for (i = 0; i < 4; i++) { 1209 struct toy_src top, bottom; 1210 1211 /* approximate with dst = src.zzzz - src.xxxx */ 1212 top = tsrc_rect(src[i], TOY_RECT_440); 1213 bottom = tsrc_offset(top, 0, 2); 1214 1215 tc_ADD(tc, dst[i], bottom, tsrc_negate(top)); 1216 } 1217 } 1218 1219 tc_discard_inst(tc, inst); 1220 } 1221 1222 static void 1223 fs_lower_opcode_fb_write(struct toy_compiler *tc, struct toy_inst *inst) 1224 { 1225 /* fs_write_fb() has set up the message registers */ 1226 toy_compiler_lower_to_send(tc, inst, true, 1227 GEN6_SFID_DP_RC); 1228 } 1229 1230 static void 1231 fs_lower_opcode_kil(struct toy_compiler *tc, struct toy_inst *inst) 1232 { 1233 struct toy_dst pixel_mask_dst; 1234 struct toy_src f0, pixel_mask; 1235 struct toy_inst *tmp; 1236 1237 /* lower half of r1.7:ud */ 1238 pixel_mask_dst = tdst_uw(tdst(TOY_FILE_GRF, 1, 7 * 4)); 1239 pixel_mask = tsrc_rect(tsrc_from(pixel_mask_dst), TOY_RECT_010); 1240 1241 f0 = tsrc_rect(tsrc_uw(tsrc(TOY_FILE_ARF, GEN6_ARF_F0, 0)), TOY_RECT_010); 1242 1243 /* KILL or KILL_IF */ 1244 if (tsrc_is_null(inst->src[0])) { 1245 struct toy_src dummy = tsrc_uw(tsrc(TOY_FILE_GRF, 0, 0)); 1246 struct toy_dst f0_dst = tdst_uw(tdst(TOY_FILE_ARF, GEN6_ARF_F0, 0)); 1247 1248 /* create a mask that masks out all pixels */ 1249 tmp = tc_MOV(tc, f0_dst, tsrc_rect(tsrc_imm_uw(0xffff), TOY_RECT_010)); 1250 tmp->exec_size = GEN6_EXECSIZE_1; 1251 tmp->mask_ctrl = GEN6_MASKCTRL_NOMASK; 1252 1253 tc_CMP(tc, tdst_null(), dummy, dummy, GEN6_COND_NZ); 1254 1255 /* swapping the two src operands breaks glBitmap()!? */ 1256 tmp = tc_AND(tc, pixel_mask_dst, f0, pixel_mask); 1257 tmp->exec_size = GEN6_EXECSIZE_1; 1258 tmp->mask_ctrl = GEN6_MASKCTRL_NOMASK; 1259 } 1260 else { 1261 struct toy_src src[4]; 1262 unsigned i; 1263 1264 tsrc_transpose(inst->src[0], src); 1265 /* mask out killed pixels */ 1266 for (i = 0; i < 4; i++) { 1267 tc_CMP(tc, tdst_null(), src[i], tsrc_imm_f(0.0f), 1268 GEN6_COND_GE); 1269 1270 /* swapping the two src operands breaks glBitmap()!? */ 1271 tmp = tc_AND(tc, pixel_mask_dst, f0, pixel_mask); 1272 tmp->exec_size = GEN6_EXECSIZE_1; 1273 tmp->mask_ctrl = GEN6_MASKCTRL_NOMASK; 1274 } 1275 } 1276 1277 tc_discard_inst(tc, inst); 1278 } 1279 1280 static void 1281 fs_lower_virtual_opcodes(struct fs_compile_context *fcc) 1282 { 1283 struct toy_compiler *tc = &fcc->tc; 1284 struct toy_inst *inst; 1285 1286 /* lower TGSI's first, as they might be lowered to other virtual opcodes */ 1287 tc_head(tc); 1288 while ((inst = tc_next(tc)) != NULL) { 1289 switch (inst->opcode) { 1290 case TOY_OPCODE_TGSI_IN: 1291 case TOY_OPCODE_TGSI_CONST: 1292 case TOY_OPCODE_TGSI_SV: 1293 case TOY_OPCODE_TGSI_IMM: 1294 fs_lower_opcode_tgsi_direct(fcc, inst); 1295 break; 1296 case TOY_OPCODE_TGSI_INDIRECT_FETCH: 1297 case TOY_OPCODE_TGSI_INDIRECT_STORE: 1298 fs_lower_opcode_tgsi_indirect(fcc, inst); 1299 break; 1300 case TOY_OPCODE_TGSI_TEX: 1301 case TOY_OPCODE_TGSI_TXB: 1302 case TOY_OPCODE_TGSI_TXD: 1303 case TOY_OPCODE_TGSI_TXL: 1304 case TOY_OPCODE_TGSI_TXP: 1305 case TOY_OPCODE_TGSI_TXF: 1306 case TOY_OPCODE_TGSI_TXQ: 1307 case TOY_OPCODE_TGSI_TXQ_LZ: 1308 case TOY_OPCODE_TGSI_TEX2: 1309 case TOY_OPCODE_TGSI_TXB2: 1310 case TOY_OPCODE_TGSI_TXL2: 1311 case TOY_OPCODE_TGSI_SAMPLE: 1312 case TOY_OPCODE_TGSI_SAMPLE_I: 1313 case TOY_OPCODE_TGSI_SAMPLE_I_MS: 1314 case TOY_OPCODE_TGSI_SAMPLE_B: 1315 case TOY_OPCODE_TGSI_SAMPLE_C: 1316 case TOY_OPCODE_TGSI_SAMPLE_C_LZ: 1317 case TOY_OPCODE_TGSI_SAMPLE_D: 1318 case TOY_OPCODE_TGSI_SAMPLE_L: 1319 case TOY_OPCODE_TGSI_GATHER4: 1320 case TOY_OPCODE_TGSI_SVIEWINFO: 1321 case TOY_OPCODE_TGSI_SAMPLE_POS: 1322 case TOY_OPCODE_TGSI_SAMPLE_INFO: 1323 fs_lower_opcode_tgsi_sampling(fcc, inst); 1324 break; 1325 } 1326 } 1327 1328 tc_head(tc); 1329 while ((inst = tc_next(tc)) != NULL) { 1330 switch (inst->opcode) { 1331 case TOY_OPCODE_INV: 1332 case TOY_OPCODE_LOG: 1333 case TOY_OPCODE_EXP: 1334 case TOY_OPCODE_SQRT: 1335 case TOY_OPCODE_RSQ: 1336 case TOY_OPCODE_SIN: 1337 case TOY_OPCODE_COS: 1338 case TOY_OPCODE_FDIV: 1339 case TOY_OPCODE_POW: 1340 case TOY_OPCODE_INT_DIV_QUOTIENT: 1341 case TOY_OPCODE_INT_DIV_REMAINDER: 1342 toy_compiler_lower_math(tc, inst); 1343 break; 1344 case TOY_OPCODE_DDX: 1345 case TOY_OPCODE_DDY: 1346 fs_lower_opcode_derivative(tc, inst); 1347 break; 1348 case TOY_OPCODE_FB_WRITE: 1349 fs_lower_opcode_fb_write(tc, inst); 1350 break; 1351 case TOY_OPCODE_KIL: 1352 fs_lower_opcode_kil(tc, inst); 1353 break; 1354 default: 1355 if (inst->opcode > 127) 1356 tc_fail(tc, "unhandled virtual opcode"); 1357 break; 1358 } 1359 } 1360 } 1361 1362 /** 1363 * Compile the shader. 1364 */ 1365 static bool 1366 fs_compile(struct fs_compile_context *fcc) 1367 { 1368 struct toy_compiler *tc = &fcc->tc; 1369 struct ilo_shader *sh = fcc->shader; 1370 1371 fs_lower_virtual_opcodes(fcc); 1372 toy_compiler_legalize_for_ra(tc); 1373 toy_compiler_optimize(tc); 1374 toy_compiler_allocate_registers(tc, 1375 fcc->first_free_grf, 1376 fcc->last_free_grf, 1377 fcc->num_grf_per_vrf); 1378 toy_compiler_legalize_for_asm(tc); 1379 1380 if (tc->fail) { 1381 ilo_err("failed to legalize FS instructions: %s\n", tc->reason); 1382 return false; 1383 } 1384 1385 if (ilo_debug & ILO_DEBUG_FS) { 1386 ilo_printf("legalized instructions:\n"); 1387 toy_compiler_dump(tc); 1388 ilo_printf("\n"); 1389 } 1390 1391 if (true) { 1392 sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size); 1393 } 1394 else { 1395 static const uint32_t microcode[] = { 1396 /* fill in the microcode here */ 1397 0x0, 0x0, 0x0, 0x0, 1398 }; 1399 const bool swap = true; 1400 1401 sh->kernel_size = sizeof(microcode); 1402 sh->kernel = MALLOC(sh->kernel_size); 1403 1404 if (sh->kernel) { 1405 const int num_dwords = sizeof(microcode) / 4; 1406 const uint32_t *src = microcode; 1407 uint32_t *dst = (uint32_t *) sh->kernel; 1408 int i; 1409 1410 for (i = 0; i < num_dwords; i += 4) { 1411 if (swap) { 1412 dst[i + 0] = src[i + 3]; 1413 dst[i + 1] = src[i + 2]; 1414 dst[i + 2] = src[i + 1]; 1415 dst[i + 3] = src[i + 0]; 1416 } 1417 else { 1418 memcpy(dst, src, 16); 1419 } 1420 } 1421 } 1422 } 1423 1424 if (!sh->kernel) { 1425 ilo_err("failed to compile FS: %s\n", tc->reason); 1426 return false; 1427 } 1428 1429 if (ilo_debug & ILO_DEBUG_FS) { 1430 ilo_printf("disassembly:\n"); 1431 toy_compiler_disassemble(tc->dev, sh->kernel, sh->kernel_size, false); 1432 ilo_printf("\n"); 1433 } 1434 1435 return true; 1436 } 1437 1438 /** 1439 * Emit instructions to write the color buffers (and the depth buffer). 1440 */ 1441 static void 1442 fs_write_fb(struct fs_compile_context *fcc) 1443 { 1444 struct toy_compiler *tc = &fcc->tc; 1445 int base_mrf = fcc->first_free_mrf; 1446 const struct toy_dst header = tdst_ud(tdst(TOY_FILE_MRF, base_mrf, 0)); 1447 bool header_present = false; 1448 struct toy_src desc; 1449 unsigned msg_type, ctrl; 1450 int color_slots[ILO_MAX_DRAW_BUFFERS], num_cbufs; 1451 int pos_slot = -1, cbuf, i; 1452 1453 for (i = 0; i < ARRAY_SIZE(color_slots); i++) 1454 color_slots[i] = -1; 1455 1456 for (i = 0; i < fcc->tgsi.num_outputs; i++) { 1457 if (fcc->tgsi.outputs[i].semantic_name == TGSI_SEMANTIC_COLOR) { 1458 assert(fcc->tgsi.outputs[i].semantic_index < ARRAY_SIZE(color_slots)); 1459 color_slots[fcc->tgsi.outputs[i].semantic_index] = i; 1460 } 1461 else if (fcc->tgsi.outputs[i].semantic_name == TGSI_SEMANTIC_POSITION) { 1462 pos_slot = i; 1463 } 1464 } 1465 1466 num_cbufs = fcc->variant->u.fs.num_cbufs; 1467 /* still need to send EOT (and probably depth) */ 1468 if (!num_cbufs) 1469 num_cbufs = 1; 1470 1471 /* we need the header to specify the pixel mask or render target */ 1472 if (fcc->tgsi.uses_kill || num_cbufs > 1) { 1473 const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0)); 1474 struct toy_inst *inst; 1475 1476 inst = tc_MOV(tc, header, r0); 1477 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; 1478 base_mrf += fcc->num_grf_per_vrf; 1479 1480 /* this is a two-register header */ 1481 if (fcc->dispatch_mode == GEN6_PS_DISPATCH_8) { 1482 inst = tc_MOV(tc, tdst_offset(header, 1, 0), tsrc_offset(r0, 1, 0)); 1483 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; 1484 base_mrf += fcc->num_grf_per_vrf; 1485 } 1486 1487 header_present = true; 1488 } 1489 1490 for (cbuf = 0; cbuf < num_cbufs; cbuf++) { 1491 const int slot = 1492 color_slots[(fcc->tgsi.props.fs_color0_writes_all_cbufs) ? 0 : cbuf]; 1493 int mrf = base_mrf, vrf; 1494 struct toy_src src[4]; 1495 1496 if (slot >= 0) { 1497 const unsigned undefined_mask = 1498 fcc->tgsi.outputs[slot].undefined_mask; 1499 const int index = fcc->tgsi.outputs[slot].index; 1500 1501 vrf = toy_tgsi_get_vrf(&fcc->tgsi, TGSI_FILE_OUTPUT, 0, index); 1502 if (vrf >= 0) { 1503 const struct toy_src tmp = tsrc(TOY_FILE_VRF, vrf, 0); 1504 tsrc_transpose(tmp, src); 1505 } 1506 else { 1507 /* use (0, 0, 0, 0) */ 1508 tsrc_transpose(tsrc_imm_f(0.0f), src); 1509 } 1510 1511 for (i = 0; i < 4; i++) { 1512 const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0); 1513 1514 if (undefined_mask & (1 << i)) 1515 src[i] = tsrc_imm_f(0.0f); 1516 1517 tc_MOV(tc, dst, src[i]); 1518 1519 mrf += fcc->num_grf_per_vrf; 1520 } 1521 } 1522 else { 1523 /* use (0, 0, 0, 0) */ 1524 for (i = 0; i < 4; i++) { 1525 const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0); 1526 1527 tc_MOV(tc, dst, tsrc_imm_f(0.0f)); 1528 mrf += fcc->num_grf_per_vrf; 1529 } 1530 } 1531 1532 /* select BLEND_STATE[rt] */ 1533 if (cbuf > 0) { 1534 struct toy_inst *inst; 1535 1536 inst = tc_MOV(tc, tdst_offset(header, 0, 2), tsrc_imm_ud(cbuf)); 1537 inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; 1538 inst->exec_size = GEN6_EXECSIZE_1; 1539 inst->src[0].rect = TOY_RECT_010; 1540 } 1541 1542 if (cbuf == 0 && pos_slot >= 0) { 1543 const int index = fcc->tgsi.outputs[pos_slot].index; 1544 const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0); 1545 struct toy_src src[4]; 1546 int vrf; 1547 1548 vrf = toy_tgsi_get_vrf(&fcc->tgsi, TGSI_FILE_OUTPUT, 0, index); 1549 if (vrf >= 0) { 1550 const struct toy_src tmp = tsrc(TOY_FILE_VRF, vrf, 0); 1551 tsrc_transpose(tmp, src); 1552 } 1553 else { 1554 /* use (0, 0, 0, 0) */ 1555 tsrc_transpose(tsrc_imm_f(0.0f), src); 1556 } 1557 1558 /* only Z */ 1559 tc_MOV(tc, dst, src[2]); 1560 1561 mrf += fcc->num_grf_per_vrf; 1562 } 1563 1564 msg_type = (fcc->dispatch_mode == GEN6_PS_DISPATCH_16) ? 1565 GEN6_MSG_DP_RT_MODE_SIMD16 >> 8 : 1566 GEN6_MSG_DP_RT_MODE_SIMD8_LO >> 8; 1567 1568 ctrl = (cbuf == num_cbufs - 1) << 12 | 1569 msg_type << 8; 1570 1571 desc = tsrc_imm_mdesc_data_port(tc, cbuf == num_cbufs - 1, 1572 mrf - fcc->first_free_mrf, 0, 1573 header_present, false, 1574 GEN6_MSG_DP_RT_WRITE, 1575 ctrl, fcc->shader->bt.rt_base + cbuf); 1576 1577 tc_add2(tc, TOY_OPCODE_FB_WRITE, tdst_null(), 1578 tsrc(TOY_FILE_MRF, fcc->first_free_mrf, 0), desc); 1579 } 1580 } 1581 1582 /** 1583 * Set up shader outputs for fixed-function units. 1584 */ 1585 static void 1586 fs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi) 1587 { 1588 unsigned i; 1589 1590 sh->out.count = tgsi->num_outputs; 1591 for (i = 0; i < tgsi->num_outputs; i++) { 1592 sh->out.register_indices[i] = tgsi->outputs[i].index; 1593 sh->out.semantic_names[i] = tgsi->outputs[i].semantic_name; 1594 sh->out.semantic_indices[i] = tgsi->outputs[i].semantic_index; 1595 1596 if (tgsi->outputs[i].semantic_name == TGSI_SEMANTIC_POSITION) 1597 sh->out.has_pos = true; 1598 } 1599 } 1600 1601 /** 1602 * Set up shader inputs for fixed-function units. 1603 */ 1604 static void 1605 fs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi, 1606 bool flatshade) 1607 { 1608 unsigned i; 1609 1610 sh->in.count = tgsi->num_inputs; 1611 for (i = 0; i < tgsi->num_inputs; i++) { 1612 sh->in.semantic_names[i] = tgsi->inputs[i].semantic_name; 1613 sh->in.semantic_indices[i] = tgsi->inputs[i].semantic_index; 1614 sh->in.interp[i] = tgsi->inputs[i].interp; 1615 sh->in.centroid[i] = tgsi->inputs[i].centroid; 1616 1617 if (tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_POSITION) { 1618 sh->in.has_pos = true; 1619 continue; 1620 } 1621 else if (tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_FACE) { 1622 continue; 1623 } 1624 1625 switch (tgsi->inputs[i].interp) { 1626 case TGSI_INTERPOLATE_CONSTANT: 1627 sh->in.const_interp_enable |= 1 << i; 1628 break; 1629 case TGSI_INTERPOLATE_LINEAR: 1630 sh->in.has_linear_interp = true; 1631 1632 if (tgsi->inputs[i].centroid) { 1633 sh->in.barycentric_interpolation_mode |= 1634 GEN6_INTERP_NONPERSPECTIVE_CENTROID; 1635 } 1636 else { 1637 sh->in.barycentric_interpolation_mode |= 1638 GEN6_INTERP_NONPERSPECTIVE_PIXEL; 1639 } 1640 break; 1641 case TGSI_INTERPOLATE_COLOR: 1642 if (flatshade) { 1643 sh->in.const_interp_enable |= 1 << i; 1644 break; 1645 } 1646 /* fall through */ 1647 case TGSI_INTERPOLATE_PERSPECTIVE: 1648 if (tgsi->inputs[i].centroid) { 1649 sh->in.barycentric_interpolation_mode |= 1650 GEN6_INTERP_PERSPECTIVE_CENTROID; 1651 } 1652 else { 1653 sh->in.barycentric_interpolation_mode |= 1654 GEN6_INTERP_PERSPECTIVE_PIXEL; 1655 } 1656 break; 1657 default: 1658 break; 1659 } 1660 } 1661 } 1662 1663 static int 1664 fs_setup_payloads(struct fs_compile_context *fcc) 1665 { 1666 const struct ilo_shader *sh = fcc->shader; 1667 int grf, i; 1668 1669 grf = 0; 1670 1671 /* r0: header */ 1672 grf++; 1673 1674 /* r1-r2: coordinates and etc. */ 1675 grf += (fcc->dispatch_mode == GEN6_PS_DISPATCH_32) ? 2 : 1; 1676 1677 for (i = 0; i < ARRAY_SIZE(fcc->payloads); i++) { 1678 const int reg_scale = 1679 (fcc->dispatch_mode == GEN6_PS_DISPATCH_8) ? 1 : 2; 1680 1681 /* r3-r26 or r32-r55: barycentric interpolation parameters */ 1682 if (sh->in.barycentric_interpolation_mode & 1683 (GEN6_INTERP_PERSPECTIVE_PIXEL)) { 1684 fcc->payloads[i].interp_perspective_pixel = grf; 1685 grf += 2 * reg_scale; 1686 } 1687 if (sh->in.barycentric_interpolation_mode & 1688 (GEN6_INTERP_PERSPECTIVE_CENTROID)) { 1689 fcc->payloads[i].interp_perspective_centroid = grf; 1690 grf += 2 * reg_scale; 1691 } 1692 if (sh->in.barycentric_interpolation_mode & 1693 (GEN6_INTERP_PERSPECTIVE_SAMPLE)) { 1694 fcc->payloads[i].interp_perspective_sample = grf; 1695 grf += 2 * reg_scale; 1696 } 1697 if (sh->in.barycentric_interpolation_mode & 1698 (GEN6_INTERP_NONPERSPECTIVE_PIXEL)) { 1699 fcc->payloads[i].interp_nonperspective_pixel = grf; 1700 grf += 2 * reg_scale; 1701 } 1702 if (sh->in.barycentric_interpolation_mode & 1703 (GEN6_INTERP_NONPERSPECTIVE_CENTROID)) { 1704 fcc->payloads[i].interp_nonperspective_centroid = grf; 1705 grf += 2 * reg_scale; 1706 } 1707 if (sh->in.barycentric_interpolation_mode & 1708 (GEN6_INTERP_NONPERSPECTIVE_SAMPLE)) { 1709 fcc->payloads[i].interp_nonperspective_sample = grf; 1710 grf += 2 * reg_scale; 1711 } 1712 1713 /* r27-r28 or r56-r57: interpoloated depth */ 1714 if (sh->in.has_pos) { 1715 fcc->payloads[i].source_depth = grf; 1716 grf += 1 * reg_scale; 1717 } 1718 1719 /* r29-r30 or r58-r59: interpoloated w */ 1720 if (sh->in.has_pos) { 1721 fcc->payloads[i].source_w = grf; 1722 grf += 1 * reg_scale; 1723 } 1724 1725 /* r31 or r60: position offset */ 1726 if (false) { 1727 fcc->payloads[i].pos_offset = grf; 1728 grf++; 1729 } 1730 1731 if (fcc->dispatch_mode != GEN6_PS_DISPATCH_32) 1732 break; 1733 } 1734 1735 return grf; 1736 } 1737 1738 /** 1739 * Translate the TGSI tokens. 1740 */ 1741 static bool 1742 fs_setup_tgsi(struct toy_compiler *tc, const struct tgsi_token *tokens, 1743 struct toy_tgsi *tgsi) 1744 { 1745 if (ilo_debug & ILO_DEBUG_FS) { 1746 ilo_printf("dumping fragment shader\n"); 1747 ilo_printf("\n"); 1748 1749 tgsi_dump(tokens, 0); 1750 ilo_printf("\n"); 1751 } 1752 1753 toy_compiler_translate_tgsi(tc, tokens, false, tgsi); 1754 if (tc->fail) { 1755 ilo_err("failed to translate FS TGSI tokens: %s\n", tc->reason); 1756 return false; 1757 } 1758 1759 if (ilo_debug & ILO_DEBUG_FS) { 1760 ilo_printf("TGSI translator:\n"); 1761 toy_tgsi_dump(tgsi); 1762 ilo_printf("\n"); 1763 toy_compiler_dump(tc); 1764 ilo_printf("\n"); 1765 } 1766 1767 return true; 1768 } 1769 1770 /** 1771 * Set up FS compile context. This includes translating the TGSI tokens. 1772 */ 1773 static bool 1774 fs_setup(struct fs_compile_context *fcc, 1775 const struct ilo_shader_state *state, 1776 const struct ilo_shader_variant *variant) 1777 { 1778 int num_consts; 1779 1780 memset(fcc, 0, sizeof(*fcc)); 1781 1782 fcc->shader = CALLOC_STRUCT(ilo_shader); 1783 if (!fcc->shader) 1784 return false; 1785 1786 fcc->variant = variant; 1787 1788 toy_compiler_init(&fcc->tc, state->info.dev); 1789 1790 fcc->dispatch_mode = GEN6_PS_DISPATCH_8; 1791 1792 fcc->tc.templ.access_mode = GEN6_ALIGN_1; 1793 if (fcc->dispatch_mode == GEN6_PS_DISPATCH_16) { 1794 fcc->tc.templ.qtr_ctrl = GEN6_QTRCTRL_1H; 1795 fcc->tc.templ.exec_size = GEN6_EXECSIZE_16; 1796 } 1797 else { 1798 fcc->tc.templ.qtr_ctrl = GEN6_QTRCTRL_1Q; 1799 fcc->tc.templ.exec_size = GEN6_EXECSIZE_8; 1800 } 1801 1802 fcc->tc.rect_linear_width = 8; 1803 1804 /* 1805 * The classic driver uses the sampler cache (gen6) or the data cache 1806 * (gen7). Why? 1807 */ 1808 fcc->const_cache = GEN6_SFID_DP_CC; 1809 1810 if (!fs_setup_tgsi(&fcc->tc, state->info.tokens, &fcc->tgsi)) { 1811 toy_compiler_cleanup(&fcc->tc); 1812 FREE(fcc->shader); 1813 return false; 1814 } 1815 1816 fs_setup_shader_in(fcc->shader, &fcc->tgsi, fcc->variant->u.fs.flatshade); 1817 fs_setup_shader_out(fcc->shader, &fcc->tgsi); 1818 1819 if (fcc->variant->use_pcb && !fcc->tgsi.const_indirect) { 1820 num_consts = (fcc->tgsi.const_count + 1) / 2; 1821 1822 /* 1823 * From the Sandy Bridge PRM, volume 2 part 1, page 287: 1824 * 1825 * "The sum of all four read length fields (each incremented to 1826 * represent the actual read length) must be less than or equal to 1827 * 64" 1828 * 1829 * Since we are usually under a high register pressure, do not allow 1830 * for more than 8. 1831 */ 1832 if (num_consts > 8) 1833 num_consts = 0; 1834 } 1835 else { 1836 num_consts = 0; 1837 } 1838 1839 fcc->shader->skip_cbuf0_upload = (!fcc->tgsi.const_count || num_consts); 1840 fcc->shader->pcb.cbuf0_size = num_consts * (sizeof(float) * 8); 1841 1842 fcc->first_const_grf = fs_setup_payloads(fcc); 1843 fcc->first_attr_grf = fcc->first_const_grf + num_consts; 1844 fcc->first_free_grf = fcc->first_attr_grf + fcc->shader->in.count * 2; 1845 fcc->last_free_grf = 127; 1846 1847 /* m0 is reserved for system routines */ 1848 fcc->first_free_mrf = 1; 1849 fcc->last_free_mrf = 15; 1850 1851 /* instructions are compressed with GEN6_EXECSIZE_16 */ 1852 fcc->num_grf_per_vrf = 1853 (fcc->dispatch_mode == GEN6_PS_DISPATCH_16) ? 2 : 1; 1854 1855 if (ilo_dev_gen(fcc->tc.dev) >= ILO_GEN(7)) { 1856 fcc->last_free_grf -= 15; 1857 fcc->first_free_mrf = fcc->last_free_grf + 1; 1858 fcc->last_free_mrf = fcc->first_free_mrf + 14; 1859 } 1860 1861 fcc->shader->in.start_grf = fcc->first_const_grf; 1862 fcc->shader->has_kill = fcc->tgsi.uses_kill; 1863 fcc->shader->dispatch_16 = 1864 (fcc->dispatch_mode == GEN6_PS_DISPATCH_16); 1865 1866 fcc->shader->bt.rt_base = 0; 1867 fcc->shader->bt.rt_count = fcc->variant->u.fs.num_cbufs; 1868 /* to send EOT */ 1869 if (!fcc->shader->bt.rt_count) 1870 fcc->shader->bt.rt_count = 1; 1871 1872 fcc->shader->bt.tex_base = fcc->shader->bt.rt_base + 1873 fcc->shader->bt.rt_count; 1874 fcc->shader->bt.tex_count = fcc->variant->num_sampler_views; 1875 1876 fcc->shader->bt.const_base = fcc->shader->bt.tex_base + 1877 fcc->shader->bt.tex_count; 1878 fcc->shader->bt.const_count = state->info.constant_buffer_count; 1879 1880 fcc->shader->bt.total_count = fcc->shader->bt.const_base + 1881 fcc->shader->bt.const_count; 1882 1883 return true; 1884 } 1885 1886 /** 1887 * Compile the fragment shader. 1888 */ 1889 struct ilo_shader * 1890 ilo_shader_compile_fs(const struct ilo_shader_state *state, 1891 const struct ilo_shader_variant *variant) 1892 { 1893 struct fs_compile_context fcc; 1894 1895 if (!fs_setup(&fcc, state, variant)) 1896 return NULL; 1897 1898 fs_write_fb(&fcc); 1899 1900 if (!fs_compile(&fcc)) { 1901 FREE(fcc.shader); 1902 fcc.shader = NULL; 1903 } 1904 1905 toy_tgsi_cleanup(&fcc.tgsi); 1906 toy_compiler_cleanup(&fcc.tc); 1907 1908 return fcc.shader; 1909 } 1910