1 /* 2 * Copyright (c) 2013 Rob Clark <robdclark (at) gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24 #ifndef IR3_H_ 25 #define IR3_H_ 26 27 #include <stdint.h> 28 #include <stdbool.h> 29 30 #include "util/u_debug.h" 31 #include "util/list.h" 32 33 #include "instr-a3xx.h" 34 #include "disasm.h" /* TODO move 'enum shader_t' somewhere else.. */ 35 36 /* low level intermediate representation of an adreno shader program */ 37 38 struct ir3_compiler; 39 struct ir3; 40 struct ir3_instruction; 41 struct ir3_block; 42 43 struct ir3_info { 44 uint32_t gpu_id; 45 uint16_t sizedwords; 46 uint16_t instrs_count; /* expanded to account for rpt's */ 47 /* NOTE: max_reg, etc, does not include registers not touched 48 * by the shader (ie. vertex fetched via VFD_DECODE but not 49 * touched by shader) 50 */ 51 int8_t max_reg; /* highest GPR # used by shader */ 52 int8_t max_half_reg; 53 int16_t max_const; 54 }; 55 56 struct ir3_register { 57 enum { 58 IR3_REG_CONST = 0x001, 59 IR3_REG_IMMED = 0x002, 60 IR3_REG_HALF = 0x004, 61 IR3_REG_RELATIV= 0x008, 62 IR3_REG_R = 0x010, 63 /* Most instructions, it seems, can do float abs/neg but not 64 * integer. The CP pass needs to know what is intended (int or 65 * float) in order to do the right thing. For this reason the 66 * abs/neg flags are split out into float and int variants. In 67 * addition, .b (bitwise) operations, the negate is actually a 68 * bitwise not, so split that out into a new flag to make it 69 * more clear. 70 */ 71 IR3_REG_FNEG = 0x020, 72 IR3_REG_FABS = 0x040, 73 IR3_REG_SNEG = 0x080, 74 IR3_REG_SABS = 0x100, 75 IR3_REG_BNOT = 0x200, 76 IR3_REG_EVEN = 0x400, 77 IR3_REG_POS_INF= 0x800, 78 /* (ei) flag, end-input? Set on last bary, presumably to signal 79 * that the shader needs no more input: 80 */ 81 IR3_REG_EI = 0x1000, 82 /* meta-flags, for intermediate stages of IR, ie. 83 * before register assignment is done: 84 */ 85 IR3_REG_SSA = 0x2000, /* 'instr' is ptr to assigning instr */ 86 IR3_REG_ARRAY = 0x4000, 87 IR3_REG_PHI_SRC= 0x8000, /* phi src, regs[0]->instr points to phi */ 88 89 } flags; 90 union { 91 /* normal registers: 92 * the component is in the low two bits of the reg #, so 93 * rN.x becomes: (N << 2) | x 94 */ 95 int num; 96 /* immediate: */ 97 int32_t iim_val; 98 uint32_t uim_val; 99 float fim_val; 100 /* relative: */ 101 struct { 102 uint16_t id; 103 int16_t offset; 104 } array; 105 }; 106 107 /* For IR3_REG_SSA, src registers contain ptr back to assigning 108 * instruction. 109 * 110 * For IR3_REG_ARRAY, the pointer is back to the last dependent 111 * array access (although the net effect is the same, it points 112 * back to a previous instruction that we depend on). 113 */ 114 struct ir3_instruction *instr; 115 116 union { 117 /* used for cat5 instructions, but also for internal/IR level 118 * tracking of what registers are read/written by an instruction. 119 * wrmask may be a bad name since it is used to represent both 120 * src and dst that touch multiple adjacent registers. 121 */ 122 unsigned wrmask; 123 /* for relative addressing, 32bits for array size is too small, 124 * but otoh we don't need to deal with disjoint sets, so instead 125 * use a simple size field (number of scalar components). 126 */ 127 unsigned size; 128 }; 129 }; 130 131 struct ir3_instruction { 132 struct ir3_block *block; 133 opc_t opc; 134 enum { 135 /* (sy) flag is set on first instruction, and after sample 136 * instructions (probably just on RAW hazard). 137 */ 138 IR3_INSTR_SY = 0x001, 139 /* (ss) flag is set on first instruction, and first instruction 140 * to depend on the result of "long" instructions (RAW hazard): 141 * 142 * rcp, rsq, log2, exp2, sin, cos, sqrt 143 * 144 * It seems to synchronize until all in-flight instructions are 145 * completed, for example: 146 * 147 * rsq hr1.w, hr1.w 148 * add.f hr2.z, (neg)hr2.z, hc0.y 149 * mul.f hr2.w, (neg)hr2.y, (neg)hr2.y 150 * rsq hr2.x, hr2.x 151 * (rpt1)nop 152 * mad.f16 hr2.w, hr2.z, hr2.z, hr2.w 153 * nop 154 * mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w 155 * (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w 156 * (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x 157 * 158 * The last mul.f does not have (ss) set, presumably because the 159 * (ss) on the previous instruction does the job. 160 * 161 * The blob driver also seems to set it on WAR hazards, although 162 * not really clear if this is needed or just blob compiler being 163 * sloppy. So far I haven't found a case where removing the (ss) 164 * causes problems for WAR hazard, but I could just be getting 165 * lucky: 166 * 167 * rcp r1.y, r3.y 168 * (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z 169 * 170 */ 171 IR3_INSTR_SS = 0x002, 172 /* (jp) flag is set on jump targets: 173 */ 174 IR3_INSTR_JP = 0x004, 175 IR3_INSTR_UL = 0x008, 176 IR3_INSTR_3D = 0x010, 177 IR3_INSTR_A = 0x020, 178 IR3_INSTR_O = 0x040, 179 IR3_INSTR_P = 0x080, 180 IR3_INSTR_S = 0x100, 181 IR3_INSTR_S2EN = 0x200, 182 IR3_INSTR_G = 0x400, 183 /* meta-flags, for intermediate stages of IR, ie. 184 * before register assignment is done: 185 */ 186 IR3_INSTR_MARK = 0x1000, 187 IR3_INSTR_UNUSED= 0x2000, 188 } flags; 189 int repeat; 190 #ifdef DEBUG 191 unsigned regs_max; 192 #endif 193 unsigned regs_count; 194 struct ir3_register **regs; 195 union { 196 struct { 197 char inv; 198 char comp; 199 int immed; 200 struct ir3_block *target; 201 } cat0; 202 struct { 203 type_t src_type, dst_type; 204 } cat1; 205 struct { 206 enum { 207 IR3_COND_LT = 0, 208 IR3_COND_LE = 1, 209 IR3_COND_GT = 2, 210 IR3_COND_GE = 3, 211 IR3_COND_EQ = 4, 212 IR3_COND_NE = 5, 213 } condition; 214 } cat2; 215 struct { 216 unsigned samp, tex; 217 type_t type; 218 } cat5; 219 struct { 220 type_t type; 221 int src_offset; 222 int dst_offset; 223 int iim_val; 224 } cat6; 225 /* for meta-instructions, just used to hold extra data 226 * before instruction scheduling, etc 227 */ 228 struct { 229 int off; /* component/offset */ 230 } fo; 231 struct { 232 /* used to temporarily hold reference to nir_phi_instr 233 * until we resolve the phi srcs 234 */ 235 void *nphi; 236 } phi; 237 struct { 238 struct ir3_block *block; 239 } inout; 240 }; 241 242 /* transient values used during various algorithms: */ 243 union { 244 /* The instruction depth is the max dependency distance to output. 245 * 246 * You can also think of it as the "cost", if we did any sort of 247 * optimization for register footprint. Ie. a value that is just 248 * result of moving a const to a reg would have a low cost, so to 249 * it could make sense to duplicate the instruction at various 250 * points where the result is needed to reduce register footprint. 251 */ 252 unsigned depth; 253 /* When we get to the RA stage, we no longer need depth, but 254 * we do need instruction's position/name: 255 */ 256 struct { 257 uint16_t ip; 258 uint16_t name; 259 }; 260 }; 261 262 /* used for per-pass extra instruction data. 263 */ 264 void *data; 265 266 /* Used during CP and RA stages. For fanin and shader inputs/ 267 * outputs where we need a sequence of consecutive registers, 268 * keep track of each src instructions left (ie 'n-1') and right 269 * (ie 'n+1') neighbor. The front-end must insert enough mov's 270 * to ensure that each instruction has at most one left and at 271 * most one right neighbor. During the copy-propagation pass, 272 * we only remove mov's when we can preserve this constraint. 273 * And during the RA stage, we use the neighbor information to 274 * allocate a block of registers in one shot. 275 * 276 * TODO: maybe just add something like: 277 * struct ir3_instruction_ref { 278 * struct ir3_instruction *instr; 279 * unsigned cnt; 280 * } 281 * 282 * Or can we get away without the refcnt stuff? It seems like 283 * it should be overkill.. the problem is if, potentially after 284 * already eliminating some mov's, if you have a single mov that 285 * needs to be grouped with it's neighbors in two different 286 * places (ex. shader output and a fanin). 287 */ 288 struct { 289 struct ir3_instruction *left, *right; 290 uint16_t left_cnt, right_cnt; 291 } cp; 292 293 /* an instruction can reference at most one address register amongst 294 * it's src/dst registers. Beyond that, you need to insert mov's. 295 * 296 * NOTE: do not write this directly, use ir3_instr_set_address() 297 */ 298 struct ir3_instruction *address; 299 300 /* Entry in ir3_block's instruction list: */ 301 struct list_head node; 302 303 #ifdef DEBUG 304 uint32_t serialno; 305 #endif 306 }; 307 308 static inline struct ir3_instruction * 309 ir3_neighbor_first(struct ir3_instruction *instr) 310 { 311 int cnt = 0; 312 while (instr->cp.left) { 313 instr = instr->cp.left; 314 if (++cnt > 0xffff) { 315 debug_assert(0); 316 break; 317 } 318 } 319 return instr; 320 } 321 322 static inline int ir3_neighbor_count(struct ir3_instruction *instr) 323 { 324 int num = 1; 325 326 debug_assert(!instr->cp.left); 327 328 while (instr->cp.right) { 329 num++; 330 instr = instr->cp.right; 331 if (num > 0xffff) { 332 debug_assert(0); 333 break; 334 } 335 } 336 337 return num; 338 } 339 340 struct ir3 { 341 struct ir3_compiler *compiler; 342 343 unsigned ninputs, noutputs; 344 struct ir3_instruction **inputs; 345 struct ir3_instruction **outputs; 346 347 /* Track bary.f (and ldlv) instructions.. this is needed in 348 * scheduling to ensure that all varying fetches happen before 349 * any potential kill instructions. The hw gets grumpy if all 350 * threads in a group are killed before the last bary.f gets 351 * a chance to signal end of input (ei). 352 */ 353 unsigned baryfs_count, baryfs_sz; 354 struct ir3_instruction **baryfs; 355 356 /* Track all indirect instructions (read and write). To avoid 357 * deadlock scenario where an address register gets scheduled, 358 * but other dependent src instructions cannot be scheduled due 359 * to dependency on a *different* address register value, the 360 * scheduler needs to ensure that all dependencies other than 361 * the instruction other than the address register are scheduled 362 * before the one that writes the address register. Having a 363 * convenient list of instructions that reference some address 364 * register simplifies this. 365 */ 366 unsigned indirects_count, indirects_sz; 367 struct ir3_instruction **indirects; 368 /* and same for instructions that consume predicate register: */ 369 unsigned predicates_count, predicates_sz; 370 struct ir3_instruction **predicates; 371 372 /* Track instructions which do not write a register but other- 373 * wise must not be discarded (such as kill, stg, etc) 374 */ 375 unsigned keeps_count, keeps_sz; 376 struct ir3_instruction **keeps; 377 378 /* Track texture sample instructions which need texture state 379 * patched in (for astc-srgb workaround): 380 */ 381 unsigned astc_srgb_count, astc_srgb_sz; 382 struct ir3_instruction **astc_srgb; 383 384 /* List of blocks: */ 385 struct list_head block_list; 386 387 /* List of ir3_array's: */ 388 struct list_head array_list; 389 }; 390 391 typedef struct nir_variable nir_variable; 392 393 struct ir3_array { 394 struct list_head node; 395 unsigned length; 396 unsigned id; 397 398 nir_variable *var; 399 400 /* We track the last write and last access (read or write) to 401 * setup dependencies on instructions that read or write the 402 * array. Reads can be re-ordered wrt. other reads, but should 403 * not be re-ordered wrt. to writes. Writes cannot be reordered 404 * wrt. any other access to the array. 405 * 406 * So array reads depend on last write, and array writes depend 407 * on the last access. 408 */ 409 struct ir3_instruction *last_write, *last_access; 410 411 /* extra stuff used in RA pass: */ 412 unsigned base; /* base vreg name */ 413 unsigned reg; /* base physical reg */ 414 uint16_t start_ip, end_ip; 415 }; 416 417 struct ir3_array * ir3_lookup_array(struct ir3 *ir, unsigned id); 418 419 typedef struct nir_block nir_block; 420 421 struct ir3_block { 422 struct list_head node; 423 struct ir3 *shader; 424 425 nir_block *nblock; 426 427 struct list_head instr_list; /* list of ir3_instruction */ 428 429 /* each block has either one or two successors.. in case of 430 * two successors, 'condition' decides which one to follow. 431 * A block preceding an if/else has two successors. 432 */ 433 struct ir3_instruction *condition; 434 struct ir3_block *successors[2]; 435 436 uint16_t start_ip, end_ip; 437 438 /* used for per-pass extra block data. Mainly used right 439 * now in RA step to track livein/liveout. 440 */ 441 void *data; 442 443 #ifdef DEBUG 444 uint32_t serialno; 445 #endif 446 }; 447 448 static inline uint32_t 449 block_id(struct ir3_block *block) 450 { 451 #ifdef DEBUG 452 return block->serialno; 453 #else 454 return (uint32_t)(unsigned long)block; 455 #endif 456 } 457 458 struct ir3 * ir3_create(struct ir3_compiler *compiler, 459 unsigned nin, unsigned nout); 460 void ir3_destroy(struct ir3 *shader); 461 void * ir3_assemble(struct ir3 *shader, 462 struct ir3_info *info, uint32_t gpu_id); 463 void * ir3_alloc(struct ir3 *shader, int sz); 464 465 struct ir3_block * ir3_block_create(struct ir3 *shader); 466 467 struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc); 468 struct ir3_instruction * ir3_instr_create2(struct ir3_block *block, 469 opc_t opc, int nreg); 470 struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr); 471 const char *ir3_instr_name(struct ir3_instruction *instr); 472 473 struct ir3_register * ir3_reg_create(struct ir3_instruction *instr, 474 int num, int flags); 475 struct ir3_register * ir3_reg_clone(struct ir3 *shader, 476 struct ir3_register *reg); 477 478 void ir3_instr_set_address(struct ir3_instruction *instr, 479 struct ir3_instruction *addr); 480 481 static inline bool ir3_instr_check_mark(struct ir3_instruction *instr) 482 { 483 if (instr->flags & IR3_INSTR_MARK) 484 return true; /* already visited */ 485 instr->flags |= IR3_INSTR_MARK; 486 return false; 487 } 488 489 void ir3_block_clear_mark(struct ir3_block *block); 490 void ir3_clear_mark(struct ir3 *shader); 491 492 unsigned ir3_count_instructions(struct ir3 *ir); 493 494 static inline int ir3_instr_regno(struct ir3_instruction *instr, 495 struct ir3_register *reg) 496 { 497 unsigned i; 498 for (i = 0; i < instr->regs_count; i++) 499 if (reg == instr->regs[i]) 500 return i; 501 return -1; 502 } 503 504 505 #define MAX_ARRAYS 16 506 507 /* comp: 508 * 0 - x 509 * 1 - y 510 * 2 - z 511 * 3 - w 512 */ 513 static inline uint32_t regid(int num, int comp) 514 { 515 return (num << 2) | (comp & 0x3); 516 } 517 518 static inline uint32_t reg_num(struct ir3_register *reg) 519 { 520 return reg->num >> 2; 521 } 522 523 static inline uint32_t reg_comp(struct ir3_register *reg) 524 { 525 return reg->num & 0x3; 526 } 527 528 static inline bool is_flow(struct ir3_instruction *instr) 529 { 530 return (opc_cat(instr->opc) == 0); 531 } 532 533 static inline bool is_kill(struct ir3_instruction *instr) 534 { 535 return instr->opc == OPC_KILL; 536 } 537 538 static inline bool is_nop(struct ir3_instruction *instr) 539 { 540 return instr->opc == OPC_NOP; 541 } 542 543 /* Is it a non-transformative (ie. not type changing) mov? This can 544 * also include absneg.s/absneg.f, which for the most part can be 545 * treated as a mov (single src argument). 546 */ 547 static inline bool is_same_type_mov(struct ir3_instruction *instr) 548 { 549 struct ir3_register *dst = instr->regs[0]; 550 551 /* mov's that write to a0.x or p0.x are special: */ 552 if (dst->num == regid(REG_P0, 0)) 553 return false; 554 if (dst->num == regid(REG_A0, 0)) 555 return false; 556 557 if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY)) 558 return false; 559 560 switch (instr->opc) { 561 case OPC_MOV: 562 return instr->cat1.src_type == instr->cat1.dst_type; 563 case OPC_ABSNEG_F: 564 case OPC_ABSNEG_S: 565 return true; 566 default: 567 return false; 568 } 569 } 570 571 static inline bool is_alu(struct ir3_instruction *instr) 572 { 573 return (1 <= opc_cat(instr->opc)) && (opc_cat(instr->opc) <= 3); 574 } 575 576 static inline bool is_sfu(struct ir3_instruction *instr) 577 { 578 return (opc_cat(instr->opc) == 4); 579 } 580 581 static inline bool is_tex(struct ir3_instruction *instr) 582 { 583 return (opc_cat(instr->opc) == 5); 584 } 585 586 static inline bool is_mem(struct ir3_instruction *instr) 587 { 588 return (opc_cat(instr->opc) == 6); 589 } 590 591 static inline bool 592 is_store(struct ir3_instruction *instr) 593 { 594 /* these instructions, the "destination" register is 595 * actually a source, the address to store to. 596 */ 597 switch (instr->opc) { 598 case OPC_STG: 599 case OPC_STP: 600 case OPC_STL: 601 case OPC_STLW: 602 case OPC_L2G: 603 case OPC_G2L: 604 return true; 605 default: 606 return false; 607 } 608 } 609 610 static inline bool is_load(struct ir3_instruction *instr) 611 { 612 switch (instr->opc) { 613 case OPC_LDG: 614 case OPC_LDL: 615 case OPC_LDP: 616 case OPC_L2G: 617 case OPC_LDLW: 618 case OPC_LDC_4: 619 case OPC_LDLV: 620 /* probably some others too.. */ 621 return true; 622 default: 623 return false; 624 } 625 } 626 627 static inline bool is_input(struct ir3_instruction *instr) 628 { 629 /* in some cases, ldlv is used to fetch varying without 630 * interpolation.. fortunately inloc is the first src 631 * register in either case 632 */ 633 switch (instr->opc) { 634 case OPC_LDLV: 635 case OPC_BARY_F: 636 return true; 637 default: 638 return false; 639 } 640 } 641 642 static inline bool is_bool(struct ir3_instruction *instr) 643 { 644 switch (instr->opc) { 645 case OPC_CMPS_F: 646 case OPC_CMPS_S: 647 case OPC_CMPS_U: 648 return true; 649 default: 650 return false; 651 } 652 } 653 654 static inline bool is_meta(struct ir3_instruction *instr) 655 { 656 /* TODO how should we count PHI (and maybe fan-in/out) which 657 * might actually contribute some instructions to the final 658 * result? 659 */ 660 return (opc_cat(instr->opc) == -1); 661 } 662 663 static inline bool writes_addr(struct ir3_instruction *instr) 664 { 665 if (instr->regs_count > 0) { 666 struct ir3_register *dst = instr->regs[0]; 667 return reg_num(dst) == REG_A0; 668 } 669 return false; 670 } 671 672 static inline bool writes_pred(struct ir3_instruction *instr) 673 { 674 if (instr->regs_count > 0) { 675 struct ir3_register *dst = instr->regs[0]; 676 return reg_num(dst) == REG_P0; 677 } 678 return false; 679 } 680 681 /* returns defining instruction for reg */ 682 /* TODO better name */ 683 static inline struct ir3_instruction *ssa(struct ir3_register *reg) 684 { 685 if (reg->flags & (IR3_REG_SSA | IR3_REG_ARRAY)) { 686 debug_assert(!(reg->instr && (reg->instr->flags & IR3_INSTR_UNUSED))); 687 return reg->instr; 688 } 689 return NULL; 690 } 691 692 static inline bool conflicts(struct ir3_instruction *a, 693 struct ir3_instruction *b) 694 { 695 return (a && b) && (a != b); 696 } 697 698 static inline bool reg_gpr(struct ir3_register *r) 699 { 700 if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED)) 701 return false; 702 if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0)) 703 return false; 704 return true; 705 } 706 707 static inline type_t half_type(type_t type) 708 { 709 switch (type) { 710 case TYPE_F32: return TYPE_F16; 711 case TYPE_U32: return TYPE_U16; 712 case TYPE_S32: return TYPE_S16; 713 case TYPE_F16: 714 case TYPE_U16: 715 case TYPE_S16: 716 return type; 717 default: 718 assert(0); 719 return ~0; 720 } 721 } 722 723 /* some cat2 instructions (ie. those which are not float) can embed an 724 * immediate: 725 */ 726 static inline bool ir3_cat2_int(opc_t opc) 727 { 728 switch (opc) { 729 case OPC_ADD_U: 730 case OPC_ADD_S: 731 case OPC_SUB_U: 732 case OPC_SUB_S: 733 case OPC_CMPS_U: 734 case OPC_CMPS_S: 735 case OPC_MIN_U: 736 case OPC_MIN_S: 737 case OPC_MAX_U: 738 case OPC_MAX_S: 739 case OPC_CMPV_U: 740 case OPC_CMPV_S: 741 case OPC_MUL_U: 742 case OPC_MUL_S: 743 case OPC_MULL_U: 744 case OPC_CLZ_S: 745 case OPC_ABSNEG_S: 746 case OPC_AND_B: 747 case OPC_OR_B: 748 case OPC_NOT_B: 749 case OPC_XOR_B: 750 case OPC_BFREV_B: 751 case OPC_CLZ_B: 752 case OPC_SHL_B: 753 case OPC_SHR_B: 754 case OPC_ASHR_B: 755 case OPC_MGEN_B: 756 case OPC_GETBIT_B: 757 case OPC_CBITS_B: 758 case OPC_BARY_F: 759 return true; 760 761 default: 762 return false; 763 } 764 } 765 766 767 /* map cat2 instruction to valid abs/neg flags: */ 768 static inline unsigned ir3_cat2_absneg(opc_t opc) 769 { 770 switch (opc) { 771 case OPC_ADD_F: 772 case OPC_MIN_F: 773 case OPC_MAX_F: 774 case OPC_MUL_F: 775 case OPC_SIGN_F: 776 case OPC_CMPS_F: 777 case OPC_ABSNEG_F: 778 case OPC_CMPV_F: 779 case OPC_FLOOR_F: 780 case OPC_CEIL_F: 781 case OPC_RNDNE_F: 782 case OPC_RNDAZ_F: 783 case OPC_TRUNC_F: 784 case OPC_BARY_F: 785 return IR3_REG_FABS | IR3_REG_FNEG; 786 787 case OPC_ADD_U: 788 case OPC_ADD_S: 789 case OPC_SUB_U: 790 case OPC_SUB_S: 791 case OPC_CMPS_U: 792 case OPC_CMPS_S: 793 case OPC_MIN_U: 794 case OPC_MIN_S: 795 case OPC_MAX_U: 796 case OPC_MAX_S: 797 case OPC_CMPV_U: 798 case OPC_CMPV_S: 799 case OPC_MUL_U: 800 case OPC_MUL_S: 801 case OPC_MULL_U: 802 case OPC_CLZ_S: 803 return 0; 804 805 case OPC_ABSNEG_S: 806 return IR3_REG_SABS | IR3_REG_SNEG; 807 808 case OPC_AND_B: 809 case OPC_OR_B: 810 case OPC_NOT_B: 811 case OPC_XOR_B: 812 case OPC_BFREV_B: 813 case OPC_CLZ_B: 814 case OPC_SHL_B: 815 case OPC_SHR_B: 816 case OPC_ASHR_B: 817 case OPC_MGEN_B: 818 case OPC_GETBIT_B: 819 case OPC_CBITS_B: 820 return IR3_REG_BNOT; 821 822 default: 823 return 0; 824 } 825 } 826 827 /* map cat3 instructions to valid abs/neg flags: */ 828 static inline unsigned ir3_cat3_absneg(opc_t opc) 829 { 830 switch (opc) { 831 case OPC_MAD_F16: 832 case OPC_MAD_F32: 833 case OPC_SEL_F16: 834 case OPC_SEL_F32: 835 return IR3_REG_FNEG; 836 837 case OPC_MAD_U16: 838 case OPC_MADSH_U16: 839 case OPC_MAD_S16: 840 case OPC_MADSH_M16: 841 case OPC_MAD_U24: 842 case OPC_MAD_S24: 843 case OPC_SEL_S16: 844 case OPC_SEL_S32: 845 case OPC_SAD_S16: 846 case OPC_SAD_S32: 847 /* neg *may* work on 3rd src.. */ 848 849 case OPC_SEL_B16: 850 case OPC_SEL_B32: 851 852 default: 853 return 0; 854 } 855 } 856 857 #define array_insert(arr, val) do { \ 858 if (arr ## _count == arr ## _sz) { \ 859 arr ## _sz = MAX2(2 * arr ## _sz, 16); \ 860 arr = realloc(arr, arr ## _sz * sizeof(arr[0])); \ 861 } \ 862 arr[arr ##_count++] = val; \ 863 } while (0) 864 865 /* iterator for an instructions's sources (reg), also returns src #: */ 866 #define foreach_src_n(__srcreg, __n, __instr) \ 867 if ((__instr)->regs_count) \ 868 for (unsigned __cnt = (__instr)->regs_count - 1, __n = 0; __n < __cnt; __n++) \ 869 if ((__srcreg = (__instr)->regs[__n + 1])) 870 871 /* iterator for an instructions's sources (reg): */ 872 #define foreach_src(__srcreg, __instr) \ 873 foreach_src_n(__srcreg, __i, __instr) 874 875 static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr) 876 { 877 if (instr->address) 878 return instr->regs_count + 1; 879 return instr->regs_count; 880 } 881 882 static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr, unsigned n) 883 { 884 if (n == (instr->regs_count + 0)) 885 return instr->address; 886 return ssa(instr->regs[n]); 887 } 888 889 #define __src_cnt(__instr) ((__instr)->address ? (__instr)->regs_count : (__instr)->regs_count - 1) 890 891 /* iterator for an instruction's SSA sources (instr), also returns src #: */ 892 #define foreach_ssa_src_n(__srcinst, __n, __instr) \ 893 if ((__instr)->regs_count) \ 894 for (unsigned __cnt = __ssa_src_cnt(__instr), __n = 0; __n < __cnt; __n++) \ 895 if ((__srcinst = __ssa_src_n(__instr, __n))) 896 897 /* iterator for an instruction's SSA sources (instr): */ 898 #define foreach_ssa_src(__srcinst, __instr) \ 899 foreach_ssa_src_n(__srcinst, __i, __instr) 900 901 902 /* dump: */ 903 void ir3_print(struct ir3 *ir); 904 void ir3_print_instr(struct ir3_instruction *instr); 905 906 /* depth calculation: */ 907 int ir3_delayslots(struct ir3_instruction *assigner, 908 struct ir3_instruction *consumer, unsigned n); 909 void ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list); 910 void ir3_depth(struct ir3 *ir); 911 912 /* copy-propagate: */ 913 struct ir3_shader_variant; 914 void ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so); 915 916 /* group neighbors and insert mov's to resolve conflicts: */ 917 void ir3_group(struct ir3 *ir); 918 919 /* scheduling: */ 920 int ir3_sched(struct ir3 *ir); 921 922 /* register assignment: */ 923 struct ir3_ra_reg_set * ir3_ra_alloc_reg_set(void *memctx); 924 int ir3_ra(struct ir3 *ir3, enum shader_t type, 925 bool frag_coord, bool frag_face); 926 927 /* legalize: */ 928 void ir3_legalize(struct ir3 *ir, bool *has_samp, int *max_bary); 929 930 /* ************************************************************************* */ 931 /* instruction helpers */ 932 933 static inline struct ir3_instruction * 934 ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type) 935 { 936 struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV); 937 ir3_reg_create(instr, 0, 0); /* dst */ 938 if (src->regs[0]->flags & IR3_REG_ARRAY) { 939 struct ir3_register *src_reg = 940 ir3_reg_create(instr, 0, IR3_REG_ARRAY); 941 src_reg->array = src->regs[0]->array; 942 src_reg->instr = src; 943 } else { 944 ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; 945 } 946 debug_assert(!(src->regs[0]->flags & IR3_REG_RELATIV)); 947 instr->cat1.src_type = type; 948 instr->cat1.dst_type = type; 949 return instr; 950 } 951 952 static inline struct ir3_instruction * 953 ir3_COV(struct ir3_block *block, struct ir3_instruction *src, 954 type_t src_type, type_t dst_type) 955 { 956 struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV); 957 ir3_reg_create(instr, 0, 0); /* dst */ 958 ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src; 959 instr->cat1.src_type = src_type; 960 instr->cat1.dst_type = dst_type; 961 debug_assert(!(src->regs[0]->flags & IR3_REG_ARRAY)); 962 return instr; 963 } 964 965 static inline struct ir3_instruction * 966 ir3_NOP(struct ir3_block *block) 967 { 968 return ir3_instr_create(block, OPC_NOP); 969 } 970 971 #define INSTR0(name) \ 972 static inline struct ir3_instruction * \ 973 ir3_##name(struct ir3_block *block) \ 974 { \ 975 struct ir3_instruction *instr = \ 976 ir3_instr_create(block, OPC_##name); \ 977 return instr; \ 978 } 979 980 #define INSTR1(name) \ 981 static inline struct ir3_instruction * \ 982 ir3_##name(struct ir3_block *block, \ 983 struct ir3_instruction *a, unsigned aflags) \ 984 { \ 985 struct ir3_instruction *instr = \ 986 ir3_instr_create(block, OPC_##name); \ 987 ir3_reg_create(instr, 0, 0); /* dst */ \ 988 ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \ 989 return instr; \ 990 } 991 992 #define INSTR2(name) \ 993 static inline struct ir3_instruction * \ 994 ir3_##name(struct ir3_block *block, \ 995 struct ir3_instruction *a, unsigned aflags, \ 996 struct ir3_instruction *b, unsigned bflags) \ 997 { \ 998 struct ir3_instruction *instr = \ 999 ir3_instr_create(block, OPC_##name); \ 1000 ir3_reg_create(instr, 0, 0); /* dst */ \ 1001 ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \ 1002 ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b; \ 1003 return instr; \ 1004 } 1005 1006 #define INSTR3(name) \ 1007 static inline struct ir3_instruction * \ 1008 ir3_##name(struct ir3_block *block, \ 1009 struct ir3_instruction *a, unsigned aflags, \ 1010 struct ir3_instruction *b, unsigned bflags, \ 1011 struct ir3_instruction *c, unsigned cflags) \ 1012 { \ 1013 struct ir3_instruction *instr = \ 1014 ir3_instr_create(block, OPC_##name); \ 1015 ir3_reg_create(instr, 0, 0); /* dst */ \ 1016 ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a; \ 1017 ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b; \ 1018 ir3_reg_create(instr, 0, IR3_REG_SSA | cflags)->instr = c; \ 1019 return instr; \ 1020 } 1021 1022 /* cat0 instructions: */ 1023 INSTR0(BR); 1024 INSTR0(JUMP); 1025 INSTR1(KILL); 1026 INSTR0(END); 1027 1028 /* cat2 instructions, most 2 src but some 1 src: */ 1029 INSTR2(ADD_F) 1030 INSTR2(MIN_F) 1031 INSTR2(MAX_F) 1032 INSTR2(MUL_F) 1033 INSTR1(SIGN_F) 1034 INSTR2(CMPS_F) 1035 INSTR1(ABSNEG_F) 1036 INSTR2(CMPV_F) 1037 INSTR1(FLOOR_F) 1038 INSTR1(CEIL_F) 1039 INSTR1(RNDNE_F) 1040 INSTR1(RNDAZ_F) 1041 INSTR1(TRUNC_F) 1042 INSTR2(ADD_U) 1043 INSTR2(ADD_S) 1044 INSTR2(SUB_U) 1045 INSTR2(SUB_S) 1046 INSTR2(CMPS_U) 1047 INSTR2(CMPS_S) 1048 INSTR2(MIN_U) 1049 INSTR2(MIN_S) 1050 INSTR2(MAX_U) 1051 INSTR2(MAX_S) 1052 INSTR1(ABSNEG_S) 1053 INSTR2(AND_B) 1054 INSTR2(OR_B) 1055 INSTR1(NOT_B) 1056 INSTR2(XOR_B) 1057 INSTR2(CMPV_U) 1058 INSTR2(CMPV_S) 1059 INSTR2(MUL_U) 1060 INSTR2(MUL_S) 1061 INSTR2(MULL_U) 1062 INSTR1(BFREV_B) 1063 INSTR1(CLZ_S) 1064 INSTR1(CLZ_B) 1065 INSTR2(SHL_B) 1066 INSTR2(SHR_B) 1067 INSTR2(ASHR_B) 1068 INSTR2(BARY_F) 1069 INSTR2(MGEN_B) 1070 INSTR2(GETBIT_B) 1071 INSTR1(SETRM) 1072 INSTR1(CBITS_B) 1073 INSTR2(SHB) 1074 INSTR2(MSAD) 1075 1076 /* cat3 instructions: */ 1077 INSTR3(MAD_U16) 1078 INSTR3(MADSH_U16) 1079 INSTR3(MAD_S16) 1080 INSTR3(MADSH_M16) 1081 INSTR3(MAD_U24) 1082 INSTR3(MAD_S24) 1083 INSTR3(MAD_F16) 1084 INSTR3(MAD_F32) 1085 INSTR3(SEL_B16) 1086 INSTR3(SEL_B32) 1087 INSTR3(SEL_S16) 1088 INSTR3(SEL_S32) 1089 INSTR3(SEL_F16) 1090 INSTR3(SEL_F32) 1091 INSTR3(SAD_S16) 1092 INSTR3(SAD_S32) 1093 1094 /* cat4 instructions: */ 1095 INSTR1(RCP) 1096 INSTR1(RSQ) 1097 INSTR1(LOG2) 1098 INSTR1(EXP2) 1099 INSTR1(SIN) 1100 INSTR1(COS) 1101 INSTR1(SQRT) 1102 1103 /* cat5 instructions: */ 1104 INSTR1(DSX) 1105 INSTR1(DSY) 1106 1107 static inline struct ir3_instruction * 1108 ir3_SAM(struct ir3_block *block, opc_t opc, type_t type, 1109 unsigned wrmask, unsigned flags, unsigned samp, unsigned tex, 1110 struct ir3_instruction *src0, struct ir3_instruction *src1) 1111 { 1112 struct ir3_instruction *sam; 1113 struct ir3_register *reg; 1114 1115 sam = ir3_instr_create(block, opc); 1116 sam->flags |= flags; 1117 ir3_reg_create(sam, 0, 0)->wrmask = wrmask; 1118 if (src0) { 1119 reg = ir3_reg_create(sam, 0, IR3_REG_SSA); 1120 reg->wrmask = (1 << (src0->regs_count - 1)) - 1; 1121 reg->instr = src0; 1122 } 1123 if (src1) { 1124 reg = ir3_reg_create(sam, 0, IR3_REG_SSA); 1125 reg->instr = src1; 1126 reg->wrmask = (1 << (src1->regs_count - 1)) - 1; 1127 } 1128 sam->cat5.samp = samp; 1129 sam->cat5.tex = tex; 1130 sam->cat5.type = type; 1131 1132 return sam; 1133 } 1134 1135 /* cat6 instructions: */ 1136 INSTR2(LDLV) 1137 INSTR2(LDG) 1138 INSTR3(STG) 1139 1140 /* ************************************************************************* */ 1141 /* split this out or find some helper to use.. like main/bitset.h.. */ 1142 1143 #include <string.h> 1144 1145 #define MAX_REG 256 1146 1147 typedef uint8_t regmask_t[2 * MAX_REG / 8]; 1148 1149 static inline unsigned regmask_idx(struct ir3_register *reg) 1150 { 1151 unsigned num = (reg->flags & IR3_REG_RELATIV) ? reg->array.offset : reg->num; 1152 debug_assert(num < MAX_REG); 1153 if (reg->flags & IR3_REG_HALF) 1154 num += MAX_REG; 1155 return num; 1156 } 1157 1158 static inline void regmask_init(regmask_t *regmask) 1159 { 1160 memset(regmask, 0, sizeof(*regmask)); 1161 } 1162 1163 static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg) 1164 { 1165 unsigned idx = regmask_idx(reg); 1166 if (reg->flags & IR3_REG_RELATIV) { 1167 unsigned i; 1168 for (i = 0; i < reg->size; i++, idx++) 1169 (*regmask)[idx / 8] |= 1 << (idx % 8); 1170 } else { 1171 unsigned mask; 1172 for (mask = reg->wrmask; mask; mask >>= 1, idx++) 1173 if (mask & 1) 1174 (*regmask)[idx / 8] |= 1 << (idx % 8); 1175 } 1176 } 1177 1178 static inline void regmask_or(regmask_t *dst, regmask_t *a, regmask_t *b) 1179 { 1180 unsigned i; 1181 for (i = 0; i < ARRAY_SIZE(*dst); i++) 1182 (*dst)[i] = (*a)[i] | (*b)[i]; 1183 } 1184 1185 /* set bits in a if not set in b, conceptually: 1186 * a |= (reg & ~b) 1187 */ 1188 static inline void regmask_set_if_not(regmask_t *a, 1189 struct ir3_register *reg, regmask_t *b) 1190 { 1191 unsigned idx = regmask_idx(reg); 1192 if (reg->flags & IR3_REG_RELATIV) { 1193 unsigned i; 1194 for (i = 0; i < reg->size; i++, idx++) 1195 if (!((*b)[idx / 8] & (1 << (idx % 8)))) 1196 (*a)[idx / 8] |= 1 << (idx % 8); 1197 } else { 1198 unsigned mask; 1199 for (mask = reg->wrmask; mask; mask >>= 1, idx++) 1200 if (mask & 1) 1201 if (!((*b)[idx / 8] & (1 << (idx % 8)))) 1202 (*a)[idx / 8] |= 1 << (idx % 8); 1203 } 1204 } 1205 1206 static inline bool regmask_get(regmask_t *regmask, 1207 struct ir3_register *reg) 1208 { 1209 unsigned idx = regmask_idx(reg); 1210 if (reg->flags & IR3_REG_RELATIV) { 1211 unsigned i; 1212 for (i = 0; i < reg->size; i++, idx++) 1213 if ((*regmask)[idx / 8] & (1 << (idx % 8))) 1214 return true; 1215 } else { 1216 unsigned mask; 1217 for (mask = reg->wrmask; mask; mask >>= 1, idx++) 1218 if (mask & 1) 1219 if ((*regmask)[idx / 8] & (1 << (idx % 8))) 1220 return true; 1221 } 1222 return false; 1223 } 1224 1225 /* ************************************************************************* */ 1226 1227 #endif /* IR3_H_ */ 1228