Lines Matching defs:alu
41 static inline bool alu_writes(struct r600_bytecode_alu *alu)
43 return alu->dst.write || alu->is_op3;
47 struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
49 return r600_isa_alu(alu->op)->src_count;
53 struct r600_bytecode_alu *alu, unsigned id);
62 LIST_INITHEAD(&cf->alu);
71 struct r600_bytecode_alu *alu = CALLOC_STRUCT(r600_bytecode_alu);
73 if (!alu)
75 LIST_INITHEAD(&alu->list);
76 return alu;
178 /* take into account extended alu size */
238 /* alu instructions that can ony exits once per group */
239 static int is_alu_once_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
241 return r600_isa_alu(alu->op)->flags & (AF_KILL | AF_PRED) || alu->is_lds_idx_op || alu->op == ALU_OP0_GROUP_BARRIER;
244 static int is_alu_reduction_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
246 return (r600_isa_alu(alu->op)->flags & AF_REPL) &&
247 (r600_isa_alu_slots(bc->isa->hw_class, alu->op) == AF_4V);
250 static int is_alu_mova_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
252 return r600_isa_alu(alu->op)->flags & AF_MOVA;
255 static int alu_uses_rel(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
257 unsigned num_src = r600_bytecode_get_num_operands(bc, alu);
260 if (alu->dst.rel) {
265 if (alu->src[src].rel) {
277 static int alu_uses_lds(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
279 unsigned num_src = r600_bytecode_get_num_operands(bc, alu);
283 if (is_lds_read(alu->src[src].sel)) {
290 static int is_alu_64bit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
292 const struct alu_op_info *op = r600_isa_alu(alu->op);
296 static int is_alu_vec_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
298 unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op);
302 static int is_alu_trans_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
304 unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op);
308 /* alu instructions that can execute on any unit */
309 static int is_alu_any_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
311 unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op);
315 static int is_nop_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
317 return alu->op == ALU_OP0_NOP;
323 struct r600_bytecode_alu *alu;
330 for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bytecode_alu, alu->list.next, list)) {
331 chan = alu->dst.chan;
334 else if (is_alu_trans_unit_inst(bc, alu))
336 else if (is_alu_vec_unit_inst(bc, alu))
345 assert(0); /* ALU.Trans has already been allocated. */
348 assignment[4] = alu;
351 assert(0); /* ALU.chan has already been allocated. */
354 assignment[chan] = alu;
357 if (alu->last)
434 /* CB constants start at 512, and get translated to a kcache index when ALU
451 static int check_vector(struct r600_bytecode *bc, struct r600_bytecode_alu *alu,
456 num_src = r600_bytecode_get_num_operands(bc, alu);
458 sel = alu->src[src].sel;
459 elem = alu->src[src].chan;
462 if (src == 1 && sel == alu->src[0].sel && elem == alu->src[0].chan)
472 r = reserve_cfile(bc, bs, (alu->src[src].kc_bank<<16) + sel, elem);
481 static int check_scalar(struct r600_bytecode *bc, struct r600_bytecode_alu *alu,
486 num_src = r600_bytecode_get_num_operands(bc, alu);
488 sel = alu->src[src].sel;
489 elem = alu->src[src].chan;
499 r = reserve_cfile(bc, bs, (alu->src[src].kc_bank<<16) + sel, elem);
505 sel = alu->src[src].sel;
506 elem = alu->src[src].chan;
637 struct r600_bytecode_alu *alu = slots[i];
638 if (!alu)
641 if (is_alu_64bit_inst(bc, alu))
643 num_src = r600_bytecode_get_num_operands(bc, alu);
645 if (!is_gpr(alu->src[src].sel) || alu->src[src].rel)
649 if (alu->src[src].sel == gpr[4] &&
650 alu->src[src].chan == chan[4] &&
651 alu_prev->pred_sel == alu->pred_sel) {
652 alu->src[src].sel = V_SQ_ALU_SRC_PS;
653 alu->src[src].chan = 0;
659 if (alu->src[src].sel == gpr[j] &&
660 alu->src[src].chan == j &&
661 alu_prev->pred_sel == alu->pred_sel) {
662 alu->src[src].sel = V_SQ_ALU_SRC_PV;
663 alu->src[src].chan = chan[j];
706 static int r600_bytecode_alu_nliterals(struct r600_bytecode *bc, struct r600_bytecode_alu *alu,
709 unsigned num_src = r600_bytecode_get_num_operands(bc, alu);
713 if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
714 uint32_t value = alu->src[i].value;
733 struct r600_bytecode_alu *alu,
736 unsigned num_src = r600_bytecode_get_num_operands(bc, alu);
740 if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
741 uint32_t value = alu->src[i].value;
744 alu->src[i].chan = j;
786 struct r600_bytecode_alu *alu;
852 alu = slots[i];
853 num_once_inst += is_alu_once_inst(bc, alu);
856 if (is_nop_inst(bc, alu))
859 if (is_alu_mova_inst(bc, alu)) {
866 if (alu_uses_rel(bc, alu)) {
873 if (alu->op == ALU_OP0_SET_CF_IDX0 ||
874 alu->op == ALU_OP0_SET_CF_IDX1)
878 num_src = r600_bytecode_get_num_operands(bc, alu);
882 if (!is_gpr(alu->src[src].sel))
890 if (prev[j]->dst.chan == alu->src[src].chan &&
891 (prev[j]->dst.sel == alu->src[src].sel ||
892 prev[j]->dst.rel || alu->src[src].rel))
918 LIST_ADDTAIL(&result[i]->list, &bc->cf_last->alu);
923 LIST_ENTRY(struct r600_bytecode_alu, bc->cf_last->alu.prev, list)->last = 1;
1004 struct r600_bytecode_alu *alu)
1009 unsigned bank, line, sel = alu->src[i].sel, index_mode;
1014 bank = alu->src[i].kc_bank;
1016 index_mode = alu->src[i].kc_rel ? 1 : 0; // V_SQ_CF_INDEX_0 / V_SQ_CF_INDEX_NONE
1025 struct r600_bytecode_alu *alu,
1033 unsigned int line, sel = alu->src[i].sel, found = 0;
1048 if (kcache[j].bank == alu->src[i].kc_bank &&
1051 alu->src[i].sel = sel - (kcache[j].addr<<4);
1052 alu->src[i].sel += base[j];
1062 struct r600_bytecode_alu *alu,
1071 if ((r = r600_bytecode_alloc_inst_kcache_lines(bc, kcache, alu))) {
1080 if ((r = r600_bytecode_alloc_inst_kcache_lines(bc, kcache, alu))) {
1102 struct r600_bytecode_alu alu;
1106 memset(&alu, 0, sizeof(alu));
1107 alu.op = ALU_OP0_NOP;
1108 alu.src[0].chan = i;
1109 alu.dst.chan = i;
1110 alu.last = (i == 3);
1111 r = r600_bytecode_add_alu(bc, &alu);
1121 struct r600_bytecode_alu alu;
1131 memset(&alu, 0, sizeof(alu));
1132 alu.op = ALU_OP1_MOVA_GPR_INT;
1133 alu.src[0].sel = bc->ar_reg;
1134 alu.src[0].chan = bc->ar_chan;
1135 alu.last = 1;
1136 alu.index_mode = INDEX_MODE_LOOP;
1137 r = r600_bytecode_add_alu(bc, &alu);
1149 struct r600_bytecode_alu alu;
1162 memset(&alu, 0, sizeof(alu));
1163 alu.op = ALU_OP1_MOVA_INT;
1164 alu.src[0].sel = bc->ar_reg;
1165 alu.src[0].chan = bc->ar_chan;
1166 alu.last = 1;
1167 r = r600_bytecode_add_alu(bc, &alu);
1177 const struct r600_bytecode_alu *alu, unsigned type)
1185 memcpy(nalu, alu, sizeof(struct r600_bytecode_alu));
1187 if (alu->is_op3) {
1188 /* will fail later since alu does not support it. */
1189 assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs);
1196 LIST_FOR_EACH_ENTRY(lalu, &bc->cf_last->alu, list) {
1206 /* cf can contains only alu or only vtx or only tex */
1231 /* Setup the kcache for this ALU instruction. This will start a new
1232 * ALU clause if needed. */
1241 /* number of gpr == the last gpr used in any alu */
1253 LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu);
1254 /* each alu use 2 dwords */
1258 /* process cur ALU instructions for bank swizzle */
1293 /* at most 128 slots, one add alu can add 5 slots + 4 constants(2 slots)
1310 int r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu)
1312 return r600_bytecode_add_alu_type(bc, alu, CF_OP_ALU);
1354 /* cf can contains only alu or only vtx or only tex */
1421 /* cf can contains only alu or only vtx or only tex */
1559 static int r600_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id)
1561 unsigned opcode = r600_isa_alu_opcode(bc->isa->hw_class, alu->op);
1564 bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
1565 S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
1566 S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
1567 S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) |
1568 S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
1569 S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) |
1570 S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
1571 S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) |
1572 S_SQ_ALU_WORD0_INDEX_MODE(alu->index_mode) |
1573 S_SQ_ALU_WORD0_PRED_SEL(alu->pred_sel) |
1574 S_SQ_ALU_WORD0_LAST(alu->last);
1576 if (alu->is_op3) {
1577 assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs);
1578 bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
1579 S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
1580 S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) |
1581 S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) |
1582 S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) |
1583 S_SQ_ALU_WORD1_OP3_SRC2_REL(alu->src[2].rel) |
1584 S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) |
1585 S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) |
1587 S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle);
1589 bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
1590 S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
1591 S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) |
1592 S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) |
1593 S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
1594 S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
1595 S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
1596 S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) |
1598 S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) |
1599 S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->execute_mask) |
1600 S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->update_pred);
1682 struct r600_bytecode_alu *alu;
1726 LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
1727 r = r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral);
1730 r600_bytecode_alu_adjust_literals(bc, alu, literal, nliteral);
1731 r600_bytecode_assign_kcache_banks(bc, alu, cf->kcache);
1735 r = r600_bytecode_alu_build(bc, alu, addr);
1738 r = r700_bytecode_alu_build(bc, alu, addr);
1742 r = eg_bytecode_alu_build(bc, alu, addr);
1751 if (alu->last) {
1801 struct r600_bytecode_alu *alu = NULL, *next_alu;
1806 LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) {
1807 free(alu);
1810 LIST_INITHEAD(&cf->alu);
1865 static int print_dst(struct r600_bytecode_alu *alu)
1868 unsigned sel = alu->dst.sel;
1875 if (alu_writes(alu)) {
1877 o += print_sel(alu->dst.sel, alu->dst.rel, alu->index_mode, 0);
1882 o += print_swizzle(alu->dst.chan);
1886 static int print_src(struct r600_bytecode_alu *alu, unsigned idx)
1889 struct r600_bytecode_alu_src *src = &alu->src[idx];
1984 o += print_sel(sel, src->rel, alu->index_mode, need_brackets);
2010 struct r600_bytecode_alu *alu = NULL;
2162 LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
2164 const struct alu_op_info *aop = r600_isa_alu(alu->op);
2167 r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral);
2173 o += fprintf(stderr, "%c%c %c ", alu->execute_mask ? 'M':' ',
2174 alu->update_pred ? 'P':' ',
2175 alu->pred_sel ? alu->pred_sel==2 ? '0':'1':' ');
2178 omod_str[alu->omod], alu->dst.clamp ? "_sat":"");
2181 o += print_dst(alu);
2184 o += print_src(alu, i);
2187 if (alu->bank_swizzle) {
2189 o += fprintf(stderr, " BS:%d", alu->bank_swizzle);
2195 if (alu->last) {
2205 last = alu->last;
2515 struct r600_bytecode_alu alu;
2516 memset(&alu, 0, sizeof(alu));
2517 alu.op = ALU_OP2_MULHI_UINT;
2518 alu.src[0].sel = 0;
2519 alu.src[0].chan = 3;
2520 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2521 alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1;
2522 alu.dst.sel = i + 1;
2523 alu.dst.chan = j;
2524 alu.dst.write = j == 3;
2525 alu.last = j == 3;
2526 if ((r = r600_bytecode_add_alu(&bc, &alu))) {
2532 struct r600_bytecode_alu alu;
2533 memset(&alu, 0, sizeof(alu));
2534 alu.op = ALU_OP2_MULHI_UINT;
2535 alu.src[0].sel = 0;
2536 alu.src[0].chan = 3;
2537 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2538 alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1;
2539 alu.dst.sel = i + 1;
2540 alu.dst.chan = 3;
2541 alu.dst.write = 1;
2542 alu.last = 1;
2543 if ((r = r600_bytecode_add_alu(&bc, &alu))) {
2651 struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1)
2654 alu->src[0].sel = G_SQ_ALU_WORD0_SRC0_SEL(word0);
2655 alu->src[0].rel = G_SQ_ALU_WORD0_SRC0_REL(word0);
2656 alu->src[0].chan = G_SQ_ALU_WORD0_SRC0_CHAN(word0);
2657 alu->src[0].neg = G_SQ_ALU_WORD0_SRC0_NEG(word0);
2658 alu->src[1].sel = G_SQ_ALU_WORD0_SRC1_SEL(word0);
2659 alu->src[1].rel = G_SQ_ALU_WORD0_SRC1_REL(word0);
2660 alu->src[1].chan = G_SQ_ALU_WORD0_SRC1_CHAN(word0);
2661 alu->src[1].neg = G_SQ_ALU_WORD0_SRC1_NEG(word0);
2662 alu->index_mode = G_SQ_ALU_WORD0_INDEX_MODE(word0);
2663 alu->pred_sel = G_SQ_ALU_WORD0_PRED_SEL(word0);
2664 alu->last = G_SQ_ALU_WORD0_LAST(word0);
2667 alu->bank_swizzle = G_SQ_ALU_WORD1_BANK_SWIZZLE(word1);
2668 if (alu->bank_swizzle)
2669 alu->bank_swizzle_force = alu->bank_swizzle;
2670 alu->dst.sel = G_SQ_ALU_WORD1_DST_GPR(word1);
2671 alu->dst.rel = G_SQ_ALU_WORD1_DST_REL(word1);
2672 alu->dst.chan = G_SQ_ALU_WORD1_DST_CHAN(word1);
2673 alu->dst.clamp = G_SQ_ALU_WORD1_CLAMP(word1);
2676 alu->is_op3 = 1;
2677 alu->src[2].sel = G_SQ_ALU_WORD1_OP3_SRC2_SEL(word1);
2678 alu->src[2].rel = G_SQ_ALU_WORD1_OP3_SRC2_REL(word1);
2679 alu->src[2].chan = G_SQ_ALU_WORD1_OP3_SRC2_CHAN(word1);
2680 alu->src[2].neg = G_SQ_ALU_WORD1_OP3_SRC2_NEG(word1);
2681 alu->op = r600_isa_alu_by_opcode(bc->isa,
2687 alu->src[0].abs = G_SQ_ALU_WORD1_OP2_SRC0_ABS(word1);
2688 alu->src[1].abs = G_SQ_ALU_WORD1_OP2_SRC1_ABS(word1);
2689 alu->op = r600_isa_alu_by_opcode(bc->isa,
2691 alu->omod = G_SQ_ALU_WORD1_OP2_OMOD(word1);
2692 alu->dst.write = G_SQ_ALU_WORD1_OP2_WRITE_MASK(word1);
2693 alu->update_pred = G_SQ_ALU_WORD1_OP2_UPDATE_PRED(word1);
2694 alu->execute_mask =