1 /* 2 * Mesa 3-D graphics library 3 * 4 * Copyright (C) 2012-2013 LunarG, Inc. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included 14 * in all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: 25 * Chia-I Wu <olv (at) lunarg.com> 26 */ 27 28 #include "pipe/p_shader_tokens.h" 29 #include "toy_compiler.h" 30 #include "toy_tgsi.h" 31 #include "toy_helpers.h" 32 #include "toy_legalize.h" 33 34 /** 35 * Lower an instruction to GEN6_OPCODE_SEND(C). 36 */ 37 void 38 toy_compiler_lower_to_send(struct toy_compiler *tc, struct toy_inst *inst, 39 bool sendc, unsigned sfid) 40 { 41 assert(inst->opcode >= 128); 42 43 inst->opcode = (sendc) ? GEN6_OPCODE_SENDC : GEN6_OPCODE_SEND; 44 45 /* thread control is reserved */ 46 assert(inst->thread_ctrl == 0); 47 48 assert(inst->cond_modifier == GEN6_COND_NONE); 49 inst->cond_modifier = sfid; 50 } 51 52 static int 53 math_op_to_func(unsigned opcode) 54 { 55 switch (opcode) { 56 case TOY_OPCODE_INV: return GEN6_MATH_INV; 57 case TOY_OPCODE_LOG: return GEN6_MATH_LOG; 58 case TOY_OPCODE_EXP: return GEN6_MATH_EXP; 59 case TOY_OPCODE_SQRT: return GEN6_MATH_SQRT; 60 case TOY_OPCODE_RSQ: return GEN6_MATH_RSQ; 61 case TOY_OPCODE_SIN: return GEN6_MATH_SIN; 62 case TOY_OPCODE_COS: return GEN6_MATH_COS; 63 case TOY_OPCODE_FDIV: return GEN6_MATH_FDIV; 64 case TOY_OPCODE_POW: return GEN6_MATH_POW; 65 case TOY_OPCODE_INT_DIV_QUOTIENT: return GEN6_MATH_INT_DIV_QUOTIENT; 66 case TOY_OPCODE_INT_DIV_REMAINDER: return GEN6_MATH_INT_DIV_REMAINDER; 67 default: 68 assert(!"unknown math opcode"); 69 return -1; 70 } 71 } 72 73 /** 74 * Lower virtual math opcodes to GEN6_OPCODE_MATH. 75 */ 76 void 77 toy_compiler_lower_math(struct toy_compiler *tc, struct toy_inst *inst) 78 { 79 struct toy_dst tmp; 80 int i; 81 82 /* see commit 250770b74d33bb8625c780a74a89477af033d13a */ 83 for (i = 0; i < ARRAY_SIZE(inst->src); i++) { 84 if (tsrc_is_null(inst->src[i])) 85 break; 86 87 /* no swizzling in align1 */ 88 /* XXX how about source modifiers? */ 89 if (toy_file_is_virtual(inst->src[i].file) && 90 !tsrc_is_swizzled(inst->src[i]) && 91 !inst->src[i].absolute && 92 !inst->src[i].negate) 93 continue; 94 95 tmp = tdst_type(tc_alloc_tmp(tc), inst->src[i].type); 96 tc_MOV(tc, tmp, inst->src[i]); 97 inst->src[i] = tsrc_from(tmp); 98 } 99 100 /* FC[0:3] */ 101 assert(inst->cond_modifier == GEN6_COND_NONE); 102 inst->cond_modifier = math_op_to_func(inst->opcode); 103 /* FC[4:5] */ 104 assert(inst->thread_ctrl == 0); 105 inst->thread_ctrl = 0; 106 107 inst->opcode = GEN6_OPCODE_MATH; 108 tc_move_inst(tc, inst); 109 110 /* no writemask in align1 */ 111 if (inst->dst.writemask != TOY_WRITEMASK_XYZW) { 112 struct toy_dst dst = inst->dst; 113 struct toy_inst *inst2; 114 115 tmp = tc_alloc_tmp(tc); 116 tmp.type = inst->dst.type; 117 inst->dst = tmp; 118 119 inst2 = tc_MOV(tc, dst, tsrc_from(tmp)); 120 inst2->pred_ctrl = inst->pred_ctrl; 121 } 122 } 123 124 static uint32_t 125 absolute_imm(uint32_t imm32, enum toy_type type) 126 { 127 union fi val = { .ui = imm32 }; 128 129 switch (type) { 130 case TOY_TYPE_F: 131 val.f = fabs(val.f); 132 break; 133 case TOY_TYPE_D: 134 if (val.i < 0) 135 val.i = -val.i; 136 break; 137 case TOY_TYPE_W: 138 if ((int16_t) (val.ui & 0xffff) < 0) 139 val.i = -((int16_t) (val.ui & 0xffff)); 140 break; 141 case TOY_TYPE_V: 142 assert(!"cannot take absoulte of immediates of type V"); 143 break; 144 default: 145 break; 146 } 147 148 return val.ui; 149 } 150 151 static uint32_t 152 negate_imm(uint32_t imm32, enum toy_type type) 153 { 154 union fi val = { .ui = imm32 }; 155 156 switch (type) { 157 case TOY_TYPE_F: 158 val.f = -val.f; 159 break; 160 case TOY_TYPE_D: 161 case TOY_TYPE_UD: 162 val.i = -val.i; 163 break; 164 case TOY_TYPE_W: 165 case TOY_TYPE_UW: 166 val.i = -((int16_t) (val.ui & 0xffff)); 167 break; 168 default: 169 assert(!"negate immediate of unknown type"); 170 break; 171 } 172 173 return val.ui; 174 } 175 176 static void 177 validate_imm(struct toy_compiler *tc, struct toy_inst *inst) 178 { 179 bool move_inst = false; 180 int i; 181 182 for (i = 0; i < ARRAY_SIZE(inst->src); i++) { 183 struct toy_dst tmp; 184 185 if (tsrc_is_null(inst->src[i])) 186 break; 187 188 if (inst->src[i].file != TOY_FILE_IMM) 189 continue; 190 191 if (inst->src[i].absolute) { 192 inst->src[i].val32 = 193 absolute_imm(inst->src[i].val32, inst->src[i].type); 194 inst->src[i].absolute = false; 195 } 196 197 if (inst->src[i].negate) { 198 inst->src[i].val32 = 199 negate_imm(inst->src[i].val32, inst->src[i].type); 200 inst->src[i].negate = false; 201 } 202 203 /* this is the last operand */ 204 if (i + 1 == ARRAY_SIZE(inst->src) || tsrc_is_null(inst->src[i + 1])) 205 break; 206 207 /* need to use a temp if this imm is not the last operand */ 208 /* TODO we should simply swap the operands if the op is commutative */ 209 tmp = tc_alloc_tmp(tc); 210 tmp = tdst_type(tmp, inst->src[i].type); 211 tc_MOV(tc, tmp, inst->src[i]); 212 inst->src[i] = tsrc_from(tmp); 213 214 move_inst = true; 215 } 216 217 if (move_inst) 218 tc_move_inst(tc, inst); 219 } 220 221 static void 222 lower_opcode_mul(struct toy_compiler *tc, struct toy_inst *inst) 223 { 224 const enum toy_type inst_type = inst->dst.type; 225 const struct toy_dst acc0 = 226 tdst_type(tdst(TOY_FILE_ARF, GEN6_ARF_ACC0, 0), inst_type); 227 struct toy_inst *inst2; 228 229 /* only need to take care of integer multiplications */ 230 if (inst_type != TOY_TYPE_UD && inst_type != TOY_TYPE_D) 231 return; 232 233 /* acc0 = (src0 & 0x0000ffff) * src1 */ 234 tc_MUL(tc, acc0, inst->src[0], inst->src[1]); 235 236 /* acc0 = (src0 & 0xffff0000) * src1 + acc0 */ 237 inst2 = tc_add2(tc, GEN6_OPCODE_MACH, tdst_type(tdst_null(), inst_type), 238 inst->src[0], inst->src[1]); 239 inst2->acc_wr_ctrl = true; 240 241 /* dst = acc0 & 0xffffffff */ 242 tc_MOV(tc, inst->dst, tsrc_from(acc0)); 243 244 tc_discard_inst(tc, inst); 245 } 246 247 static void 248 lower_opcode_mac(struct toy_compiler *tc, struct toy_inst *inst) 249 { 250 const enum toy_type inst_type = inst->dst.type; 251 252 if (inst_type != TOY_TYPE_UD && inst_type != TOY_TYPE_D) { 253 const struct toy_dst acc0 = tdst(TOY_FILE_ARF, GEN6_ARF_ACC0, 0); 254 255 tc_MOV(tc, acc0, inst->src[2]); 256 inst->src[2] = tsrc_null(); 257 tc_move_inst(tc, inst); 258 } 259 else { 260 struct toy_dst tmp = tdst_type(tc_alloc_tmp(tc), inst_type); 261 struct toy_inst *inst2; 262 263 inst2 = tc_MUL(tc, tmp, inst->src[0], inst->src[1]); 264 lower_opcode_mul(tc, inst2); 265 266 tc_ADD(tc, inst->dst, tsrc_from(tmp), inst->src[2]); 267 268 tc_discard_inst(tc, inst); 269 } 270 } 271 272 /** 273 * Legalize the instructions for register allocation. 274 */ 275 void 276 toy_compiler_legalize_for_ra(struct toy_compiler *tc) 277 { 278 struct toy_inst *inst; 279 280 tc_head(tc); 281 while ((inst = tc_next(tc)) != NULL) { 282 switch (inst->opcode) { 283 case GEN6_OPCODE_MAC: 284 lower_opcode_mac(tc, inst); 285 break; 286 case GEN6_OPCODE_MAD: 287 /* TODO operands must be floats */ 288 break; 289 case GEN6_OPCODE_MUL: 290 lower_opcode_mul(tc, inst); 291 break; 292 default: 293 if (inst->opcode > TOY_OPCODE_LAST_HW) 294 tc_fail(tc, "internal opcodes not lowered"); 295 } 296 } 297 298 /* loop again as the previous pass may add new instructions */ 299 tc_head(tc); 300 while ((inst = tc_next(tc)) != NULL) { 301 validate_imm(tc, inst); 302 } 303 } 304 305 static void 306 patch_while_jip(struct toy_compiler *tc, struct toy_inst *inst) 307 { 308 struct toy_inst *inst2; 309 int nest_level, dist; 310 311 nest_level = 0; 312 dist = -1; 313 314 /* search backward */ 315 LIST_FOR_EACH_ENTRY_FROM_REV(inst2, inst->list.prev, 316 &tc->instructions, list) { 317 if (inst2->marker) { 318 if (inst2->opcode == TOY_OPCODE_DO) { 319 if (nest_level) { 320 nest_level--; 321 } 322 else { 323 /* the following instruction */ 324 dist++; 325 break; 326 } 327 } 328 329 continue; 330 } 331 332 if (inst2->opcode == GEN6_OPCODE_WHILE) 333 nest_level++; 334 335 dist--; 336 } 337 338 if (ilo_dev_gen(tc->dev) >= ILO_GEN(8)) 339 inst->src[1] = tsrc_imm_d(dist * 16); 340 else if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) 341 inst->src[1] = tsrc_imm_w(dist * 2); 342 else 343 inst->dst = tdst_imm_w(dist * 2); 344 } 345 346 static void 347 patch_if_else_jip(struct toy_compiler *tc, struct toy_inst *inst) 348 { 349 struct toy_inst *inst2; 350 int nest_level, dist; 351 int jip, uip; 352 353 nest_level = 0; 354 dist = 1; 355 jip = 0; 356 uip = 0; 357 358 /* search forward */ 359 LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) { 360 if (inst2->marker) 361 continue; 362 363 if (inst2->opcode == GEN6_OPCODE_ENDIF) { 364 if (nest_level) { 365 nest_level--; 366 } 367 else { 368 uip = dist * 2; 369 if (!jip) 370 jip = uip; 371 break; 372 } 373 } 374 else if (inst2->opcode == GEN6_OPCODE_ELSE && 375 inst->opcode == GEN6_OPCODE_IF) { 376 if (!nest_level) { 377 /* the following instruction */ 378 jip = (dist + 1) * 2; 379 380 if (ilo_dev_gen(tc->dev) == ILO_GEN(6)) { 381 uip = jip; 382 break; 383 } 384 } 385 } 386 else if (inst2->opcode == GEN6_OPCODE_IF) { 387 nest_level++; 388 } 389 390 dist++; 391 } 392 393 if (ilo_dev_gen(tc->dev) >= ILO_GEN(8)) { 394 inst->dst.type = TOY_TYPE_D; 395 inst->src[0] = tsrc_imm_d(uip * 8); 396 inst->src[1] = tsrc_imm_d(jip * 8); 397 } else if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) { 398 /* what should the type be? */ 399 inst->dst.type = TOY_TYPE_D; 400 inst->src[0].type = TOY_TYPE_D; 401 inst->src[1] = tsrc_imm_d(uip << 16 | jip); 402 } else { 403 inst->dst = tdst_imm_w(jip); 404 } 405 } 406 407 static void 408 patch_endif_jip(struct toy_compiler *tc, struct toy_inst *inst) 409 { 410 struct toy_inst *inst2; 411 bool found = false; 412 int dist = 1; 413 414 /* search forward for instructions that may enable channels */ 415 LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) { 416 if (inst2->marker) 417 continue; 418 419 switch (inst2->opcode) { 420 case GEN6_OPCODE_ENDIF: 421 case GEN6_OPCODE_ELSE: 422 case GEN6_OPCODE_WHILE: 423 found = true; 424 break; 425 default: 426 break; 427 } 428 429 if (found) 430 break; 431 432 dist++; 433 } 434 435 /* should we set dist to (dist - 1) or 1? */ 436 if (!found) 437 dist = 1; 438 439 if (ilo_dev_gen(tc->dev) >= ILO_GEN(8)) 440 inst->src[1] = tsrc_imm_d(dist * 16); 441 else if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) 442 inst->src[1] = tsrc_imm_w(dist * 2); 443 else 444 inst->dst = tdst_imm_w(dist * 2); 445 } 446 447 static void 448 patch_break_continue_jip(struct toy_compiler *tc, struct toy_inst *inst) 449 { 450 struct toy_inst *inst2, *inst3; 451 int nest_level, dist, jip, uip; 452 453 nest_level = 0; 454 dist = 1; 455 jip = 1 * 2; 456 uip = 1 * 2; 457 458 /* search forward */ 459 LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) { 460 if (inst2->marker) { 461 if (inst2->opcode == TOY_OPCODE_DO) 462 nest_level++; 463 continue; 464 } 465 466 if (inst2->opcode == GEN6_OPCODE_ELSE || 467 inst2->opcode == GEN6_OPCODE_ENDIF || 468 inst2->opcode == GEN6_OPCODE_WHILE) { 469 jip = dist * 2; 470 break; 471 } 472 473 dist++; 474 } 475 476 /* go on to determine uip */ 477 inst3 = inst2; 478 LIST_FOR_EACH_ENTRY_FROM(inst2, &inst3->list, &tc->instructions, list) { 479 if (inst2->marker) { 480 if (inst2->opcode == TOY_OPCODE_DO) 481 nest_level++; 482 continue; 483 } 484 485 if (inst2->opcode == GEN6_OPCODE_WHILE) { 486 if (nest_level) { 487 nest_level--; 488 } 489 else { 490 /* the following instruction */ 491 if (ilo_dev_gen(tc->dev) == ILO_GEN(6) && 492 inst->opcode == GEN6_OPCODE_BREAK) 493 dist++; 494 495 uip = dist * 2; 496 break; 497 } 498 } 499 500 dist++; 501 } 502 503 /* should the type be D or W? */ 504 inst->dst.type = TOY_TYPE_D; 505 if (ilo_dev_gen(tc->dev) >= ILO_GEN(8)) { 506 inst->src[0] = tsrc_imm_d(uip * 8); 507 inst->src[1] = tsrc_imm_d(jip * 8); 508 } else { 509 inst->src[0].type = TOY_TYPE_D; 510 inst->src[1] = tsrc_imm_d(uip << 16 | jip); 511 } 512 } 513 514 /** 515 * Legalize the instructions for assembling. 516 */ 517 void 518 toy_compiler_legalize_for_asm(struct toy_compiler *tc) 519 { 520 struct toy_inst *inst; 521 int pc = 0; 522 523 tc_head(tc); 524 while ((inst = tc_next(tc)) != NULL) { 525 int i; 526 527 pc++; 528 529 /* 530 * From the Sandy Bridge PRM, volume 4 part 2, page 112: 531 * 532 * "Specifically, for instructions with a single source, it only 533 * uses the first source operand <src0>. In this case, the second 534 * source operand <src1> must be set to null and also with the same 535 * type as the first source operand <src0>. It is a special case 536 * when <src0> is an immediate, as an immediate <src0> uses DW3 of 537 * the instruction word, which is normally used by <src1>. In this 538 * case, <src1> must be programmed with register file ARF and the 539 * same data type as <src0>." 540 * 541 * Since we already fill unused operands with null, we only need to take 542 * care of the type. 543 */ 544 if (tsrc_is_null(inst->src[1])) 545 inst->src[1].type = inst->src[0].type; 546 547 switch (inst->opcode) { 548 case GEN6_OPCODE_MATH: 549 /* math does not support align16 nor exec_size > 8 */ 550 inst->access_mode = GEN6_ALIGN_1; 551 552 if (inst->exec_size == GEN6_EXECSIZE_16) { 553 /* 554 * From the Ivy Bridge PRM, volume 4 part 3, page 192: 555 * 556 * "INT DIV function does not support SIMD16." 557 */ 558 if (ilo_dev_gen(tc->dev) < ILO_GEN(7) || 559 inst->cond_modifier == GEN6_MATH_INT_DIV_QUOTIENT || 560 inst->cond_modifier == GEN6_MATH_INT_DIV_REMAINDER) { 561 struct toy_inst *inst2; 562 563 inst->exec_size = GEN6_EXECSIZE_8; 564 inst->qtr_ctrl = GEN6_QTRCTRL_1Q; 565 566 inst2 = tc_duplicate_inst(tc, inst); 567 inst2->qtr_ctrl = GEN6_QTRCTRL_2Q; 568 inst2->dst = tdst_offset(inst2->dst, 1, 0); 569 inst2->src[0] = tsrc_offset(inst2->src[0], 1, 0); 570 if (!tsrc_is_null(inst2->src[1])) 571 inst2->src[1] = tsrc_offset(inst2->src[1], 1, 0); 572 573 pc++; 574 } 575 } 576 break; 577 case GEN6_OPCODE_IF: 578 if (ilo_dev_gen(tc->dev) >= ILO_GEN(7) && 579 inst->cond_modifier != GEN6_COND_NONE) { 580 struct toy_inst *inst2; 581 582 inst2 = tc_duplicate_inst(tc, inst); 583 584 /* replace the original IF by CMP */ 585 inst->opcode = GEN6_OPCODE_CMP; 586 587 /* predicate control instead of condition modifier */ 588 inst2->dst = tdst_null(); 589 inst2->src[0] = tsrc_null(); 590 inst2->src[1] = tsrc_null(); 591 inst2->cond_modifier = GEN6_COND_NONE; 592 inst2->pred_ctrl = GEN6_PREDCTRL_NORMAL; 593 594 pc++; 595 } 596 break; 597 default: 598 break; 599 } 600 601 /* MRF to GRF */ 602 if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) { 603 for (i = 0; i < ARRAY_SIZE(inst->src); i++) { 604 if (inst->src[i].file != TOY_FILE_MRF) 605 continue; 606 else if (tsrc_is_null(inst->src[i])) 607 break; 608 609 inst->src[i].file = TOY_FILE_GRF; 610 } 611 612 if (inst->dst.file == TOY_FILE_MRF) 613 inst->dst.file = TOY_FILE_GRF; 614 } 615 } 616 617 tc->num_instructions = pc; 618 619 /* set JIP/UIP */ 620 tc_head(tc); 621 while ((inst = tc_next(tc)) != NULL) { 622 switch (inst->opcode) { 623 case GEN6_OPCODE_IF: 624 case GEN6_OPCODE_ELSE: 625 patch_if_else_jip(tc, inst); 626 break; 627 case GEN6_OPCODE_ENDIF: 628 patch_endif_jip(tc, inst); 629 break; 630 case GEN6_OPCODE_WHILE: 631 patch_while_jip(tc, inst); 632 break; 633 case GEN6_OPCODE_BREAK: 634 case GEN6_OPCODE_CONT: 635 patch_break_continue_jip(tc, inst); 636 break; 637 default: 638 break; 639 } 640 } 641 } 642