Home | History | Annotate | Download | only in vc4
      1 /*
      2  * Copyright  2014 Broadcom
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #include <stdbool.h>
     25 #include "util/ralloc.h"
     26 #include "vc4_qir.h"
     27 #include "vc4_qpu.h"
     28 
     29 #define QPU_MUX(mux, muxfield)                                  \
     30         QPU_SET_FIELD(mux != QPU_MUX_SMALL_IMM ? mux : QPU_MUX_B, muxfield)
     31 
     32 static uint64_t
     33 set_src_raddr(uint64_t inst, struct qpu_reg src)
     34 {
     35         if (src.mux == QPU_MUX_A) {
     36                 assert(QPU_GET_FIELD(inst, QPU_RADDR_A) == QPU_R_NOP ||
     37                        QPU_GET_FIELD(inst, QPU_RADDR_A) == src.addr);
     38                 return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_A);
     39         }
     40 
     41         if (src.mux == QPU_MUX_B) {
     42                 assert((QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP ||
     43                         QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr) &&
     44                        QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM);
     45                 return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_B);
     46         }
     47 
     48         if (src.mux == QPU_MUX_SMALL_IMM) {
     49                 if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM) {
     50                         assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr);
     51                 } else {
     52                         inst = qpu_set_sig(inst, QPU_SIG_SMALL_IMM);
     53                         assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP);
     54                 }
     55                 return ((inst & ~QPU_RADDR_B_MASK) |
     56                         QPU_SET_FIELD(src.addr, QPU_RADDR_B));
     57         }
     58 
     59         return inst;
     60 }
     61 
     62 uint64_t
     63 qpu_NOP()
     64 {
     65         uint64_t inst = 0;
     66 
     67         inst |= QPU_SET_FIELD(QPU_A_NOP, QPU_OP_ADD);
     68         inst |= QPU_SET_FIELD(QPU_M_NOP, QPU_OP_MUL);
     69 
     70         /* Note: These field values are actually non-zero */
     71         inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
     72         inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
     73         inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
     74         inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
     75         inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
     76 
     77         return inst;
     78 }
     79 
     80 static uint64_t
     81 qpu_a_dst(struct qpu_reg dst)
     82 {
     83         uint64_t inst = 0;
     84 
     85         if (dst.mux <= QPU_MUX_R5) {
     86                 /* Translate the mux to the ACCn values. */
     87                 inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_ADD);
     88         } else {
     89                 inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_ADD);
     90                 if (dst.mux == QPU_MUX_B)
     91                         inst |= QPU_WS;
     92         }
     93 
     94         return inst;
     95 }
     96 
     97 static uint64_t
     98 qpu_m_dst(struct qpu_reg dst)
     99 {
    100         uint64_t inst = 0;
    101 
    102         if (dst.mux <= QPU_MUX_R5) {
    103                 /* Translate the mux to the ACCn values. */
    104                 inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_MUL);
    105         } else {
    106                 inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_MUL);
    107                 if (dst.mux == QPU_MUX_A)
    108                         inst |= QPU_WS;
    109         }
    110 
    111         return inst;
    112 }
    113 
    114 uint64_t
    115 qpu_a_MOV(struct qpu_reg dst, struct qpu_reg src)
    116 {
    117         uint64_t inst = 0;
    118 
    119         inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
    120         inst |= QPU_SET_FIELD(QPU_A_OR, QPU_OP_ADD);
    121         inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
    122         inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
    123         inst |= qpu_a_dst(dst);
    124         inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
    125         inst |= QPU_MUX(src.mux, QPU_ADD_A);
    126         inst |= QPU_MUX(src.mux, QPU_ADD_B);
    127         inst = set_src_raddr(inst, src);
    128         inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
    129 
    130         return inst;
    131 }
    132 
    133 uint64_t
    134 qpu_m_MOV(struct qpu_reg dst, struct qpu_reg src)
    135 {
    136         uint64_t inst = 0;
    137 
    138         inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
    139         inst |= QPU_SET_FIELD(QPU_M_V8MIN, QPU_OP_MUL);
    140         inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
    141         inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
    142         inst |= qpu_m_dst(dst);
    143         inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
    144         inst |= QPU_MUX(src.mux, QPU_MUL_A);
    145         inst |= QPU_MUX(src.mux, QPU_MUL_B);
    146         inst = set_src_raddr(inst, src);
    147         inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
    148 
    149         return inst;
    150 }
    151 
    152 uint64_t
    153 qpu_load_imm_ui(struct qpu_reg dst, uint32_t val)
    154 {
    155         uint64_t inst = 0;
    156 
    157         inst |= qpu_a_dst(dst);
    158         inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
    159         inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
    160         inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
    161         inst |= QPU_SET_FIELD(QPU_SIG_LOAD_IMM, QPU_SIG);
    162         inst |= val;
    163 
    164         return inst;
    165 }
    166 
    167 uint64_t
    168 qpu_load_imm_u2(struct qpu_reg dst, uint32_t val)
    169 {
    170         return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_U2,
    171                                                          QPU_LOAD_IMM_MODE);
    172 }
    173 
    174 uint64_t
    175 qpu_load_imm_i2(struct qpu_reg dst, uint32_t val)
    176 {
    177         return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_I2,
    178                                                          QPU_LOAD_IMM_MODE);
    179 }
    180 
    181 uint64_t
    182 qpu_branch(uint32_t cond, uint32_t target)
    183 {
    184         uint64_t inst = 0;
    185 
    186         inst |= qpu_a_dst(qpu_ra(QPU_W_NOP));
    187         inst |= qpu_m_dst(qpu_rb(QPU_W_NOP));
    188         inst |= QPU_SET_FIELD(cond, QPU_BRANCH_COND);
    189         inst |= QPU_SET_FIELD(QPU_SIG_BRANCH, QPU_SIG);
    190         inst |= QPU_SET_FIELD(target, QPU_BRANCH_TARGET);
    191 
    192         return inst;
    193 }
    194 
    195 uint64_t
    196 qpu_a_alu2(enum qpu_op_add op,
    197            struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1)
    198 {
    199         uint64_t inst = 0;
    200 
    201         inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
    202         inst |= QPU_SET_FIELD(op, QPU_OP_ADD);
    203         inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
    204         inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
    205         inst |= qpu_a_dst(dst);
    206         inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
    207         inst |= QPU_MUX(src0.mux, QPU_ADD_A);
    208         inst = set_src_raddr(inst, src0);
    209         inst |= QPU_MUX(src1.mux, QPU_ADD_B);
    210         inst = set_src_raddr(inst, src1);
    211         inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
    212 
    213         return inst;
    214 }
    215 
    216 uint64_t
    217 qpu_m_alu2(enum qpu_op_mul op,
    218            struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1)
    219 {
    220         uint64_t inst = 0;
    221 
    222         inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
    223         inst |= QPU_SET_FIELD(op, QPU_OP_MUL);
    224         inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
    225         inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
    226         inst |= qpu_m_dst(dst);
    227         inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
    228         inst |= QPU_MUX(src0.mux, QPU_MUL_A);
    229         inst = set_src_raddr(inst, src0);
    230         inst |= QPU_MUX(src1.mux, QPU_MUL_B);
    231         inst = set_src_raddr(inst, src1);
    232         inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
    233 
    234         return inst;
    235 }
    236 
    237 uint64_t
    238 qpu_m_rot(struct qpu_reg dst, struct qpu_reg src0, int rot)
    239 {
    240 	uint64_t inst = 0;
    241 	inst = qpu_m_alu2(QPU_M_V8MIN, dst, src0, src0);
    242 
    243 	inst = QPU_UPDATE_FIELD(inst, QPU_SIG_SMALL_IMM, QPU_SIG);
    244 	inst = QPU_UPDATE_FIELD(inst, QPU_SMALL_IMM_MUL_ROT + rot,
    245                                 QPU_SMALL_IMM);
    246 
    247 	return inst;
    248 }
    249 
    250 static bool
    251 merge_fields(uint64_t *merge,
    252              uint64_t a, uint64_t b,
    253              uint64_t mask, uint64_t ignore)
    254 {
    255         if ((a & mask) == ignore) {
    256                 *merge = (*merge & ~mask) | (b & mask);
    257         } else if ((b & mask) == ignore) {
    258                 *merge = (*merge & ~mask) | (a & mask);
    259         } else {
    260                 if ((a & mask) != (b & mask))
    261                         return false;
    262         }
    263 
    264         return true;
    265 }
    266 
    267 int
    268 qpu_num_sf_accesses(uint64_t inst)
    269 {
    270         int accesses = 0;
    271         static const uint32_t specials[] = {
    272                 QPU_W_TLB_COLOR_MS,
    273                 QPU_W_TLB_COLOR_ALL,
    274                 QPU_W_TLB_Z,
    275                 QPU_W_TMU0_S,
    276                 QPU_W_TMU0_T,
    277                 QPU_W_TMU0_R,
    278                 QPU_W_TMU0_B,
    279                 QPU_W_TMU1_S,
    280                 QPU_W_TMU1_T,
    281                 QPU_W_TMU1_R,
    282                 QPU_W_TMU1_B,
    283                 QPU_W_SFU_RECIP,
    284                 QPU_W_SFU_RECIPSQRT,
    285                 QPU_W_SFU_EXP,
    286                 QPU_W_SFU_LOG,
    287         };
    288         uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
    289         uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
    290         uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
    291         uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
    292 
    293         for (int j = 0; j < ARRAY_SIZE(specials); j++) {
    294                 if (waddr_add == specials[j])
    295                         accesses++;
    296                 if (waddr_mul == specials[j])
    297                         accesses++;
    298         }
    299 
    300         if (raddr_a == QPU_R_MUTEX_ACQUIRE)
    301                 accesses++;
    302         if (raddr_b == QPU_R_MUTEX_ACQUIRE &&
    303             QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM)
    304                 accesses++;
    305 
    306         /* XXX: semaphore, combined color read/write? */
    307         switch (QPU_GET_FIELD(inst, QPU_SIG)) {
    308         case QPU_SIG_COLOR_LOAD:
    309         case QPU_SIG_COLOR_LOAD_END:
    310         case QPU_SIG_LOAD_TMU0:
    311         case QPU_SIG_LOAD_TMU1:
    312                 accesses++;
    313         }
    314 
    315         return accesses;
    316 }
    317 
    318 static bool
    319 qpu_waddr_ignores_ws(uint32_t waddr)
    320 {
    321         switch(waddr) {
    322         case QPU_W_ACC0:
    323         case QPU_W_ACC1:
    324         case QPU_W_ACC2:
    325         case QPU_W_ACC3:
    326         case QPU_W_NOP:
    327         case QPU_W_TLB_Z:
    328         case QPU_W_TLB_COLOR_MS:
    329         case QPU_W_TLB_COLOR_ALL:
    330         case QPU_W_TLB_ALPHA_MASK:
    331         case QPU_W_VPM:
    332         case QPU_W_SFU_RECIP:
    333         case QPU_W_SFU_RECIPSQRT:
    334         case QPU_W_SFU_EXP:
    335         case QPU_W_SFU_LOG:
    336         case QPU_W_TMU0_S:
    337         case QPU_W_TMU0_T:
    338         case QPU_W_TMU0_R:
    339         case QPU_W_TMU0_B:
    340         case QPU_W_TMU1_S:
    341         case QPU_W_TMU1_T:
    342         case QPU_W_TMU1_R:
    343         case QPU_W_TMU1_B:
    344                 return true;
    345         }
    346 
    347         return false;
    348 }
    349 
    350 static void
    351 swap_ra_file_mux_helper(uint64_t *merge, uint64_t *a, uint32_t mux_shift)
    352 {
    353         uint64_t mux_mask = (uint64_t)0x7 << mux_shift;
    354         uint64_t mux_a_val = (uint64_t)QPU_MUX_A << mux_shift;
    355         uint64_t mux_b_val = (uint64_t)QPU_MUX_B << mux_shift;
    356 
    357         if ((*a & mux_mask) == mux_a_val) {
    358                 *a = (*a & ~mux_mask) | mux_b_val;
    359                 *merge = (*merge & ~mux_mask) | mux_b_val;
    360         }
    361 }
    362 
    363 static bool
    364 try_swap_ra_file(uint64_t *merge, uint64_t *a, uint64_t *b)
    365 {
    366         uint32_t raddr_a_a = QPU_GET_FIELD(*a, QPU_RADDR_A);
    367         uint32_t raddr_a_b = QPU_GET_FIELD(*a, QPU_RADDR_B);
    368         uint32_t raddr_b_a = QPU_GET_FIELD(*b, QPU_RADDR_A);
    369         uint32_t raddr_b_b = QPU_GET_FIELD(*b, QPU_RADDR_B);
    370 
    371         if (raddr_a_b != QPU_R_NOP)
    372                 return false;
    373 
    374         switch (raddr_a_a) {
    375         case QPU_R_UNIF:
    376         case QPU_R_VARY:
    377                 break;
    378         default:
    379                 return false;
    380         }
    381 
    382         if (!(*merge & QPU_PM) &&
    383             QPU_GET_FIELD(*merge, QPU_UNPACK) != QPU_UNPACK_NOP) {
    384                 return false;
    385         }
    386 
    387         if (raddr_b_b != QPU_R_NOP &&
    388             raddr_b_b != raddr_a_a)
    389                 return false;
    390 
    391         /* Move raddr A to B in instruction a. */
    392         *a = (*a & ~QPU_RADDR_A_MASK) | QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
    393         *a = (*a & ~QPU_RADDR_B_MASK) | QPU_SET_FIELD(raddr_a_a, QPU_RADDR_B);
    394         *merge = QPU_UPDATE_FIELD(*merge, raddr_b_a, QPU_RADDR_A);
    395         *merge = QPU_UPDATE_FIELD(*merge, raddr_a_a, QPU_RADDR_B);
    396         swap_ra_file_mux_helper(merge, a, QPU_ADD_A_SHIFT);
    397         swap_ra_file_mux_helper(merge, a, QPU_ADD_B_SHIFT);
    398         swap_ra_file_mux_helper(merge, a, QPU_MUL_A_SHIFT);
    399         swap_ra_file_mux_helper(merge, a, QPU_MUL_B_SHIFT);
    400 
    401         return true;
    402 }
    403 
    404 static bool
    405 convert_mov(uint64_t *inst)
    406 {
    407         uint32_t add_a = QPU_GET_FIELD(*inst, QPU_ADD_A);
    408         uint32_t waddr_add = QPU_GET_FIELD(*inst, QPU_WADDR_ADD);
    409         uint32_t cond_add = QPU_GET_FIELD(*inst, QPU_COND_ADD);
    410 
    411         /* Is it a MOV? */
    412         if (QPU_GET_FIELD(*inst, QPU_OP_ADD) != QPU_A_OR ||
    413             (add_a != QPU_GET_FIELD(*inst, QPU_ADD_B))) {
    414                 return false;
    415         }
    416 
    417         if (QPU_GET_FIELD(*inst, QPU_SIG) != QPU_SIG_NONE)
    418                 return false;
    419 
    420         /* We could maybe support this in the .8888 and .8a-.8d cases. */
    421         if (*inst & QPU_PM)
    422                 return false;
    423 
    424         *inst = QPU_UPDATE_FIELD(*inst, QPU_A_NOP, QPU_OP_ADD);
    425         *inst = QPU_UPDATE_FIELD(*inst, QPU_M_V8MIN, QPU_OP_MUL);
    426 
    427         *inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_A);
    428         *inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_B);
    429         *inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_A);
    430         *inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_B);
    431 
    432         *inst = QPU_UPDATE_FIELD(*inst, waddr_add, QPU_WADDR_MUL);
    433         *inst = QPU_UPDATE_FIELD(*inst, QPU_W_NOP, QPU_WADDR_ADD);
    434 
    435         *inst = QPU_UPDATE_FIELD(*inst, cond_add, QPU_COND_MUL);
    436         *inst = QPU_UPDATE_FIELD(*inst, QPU_COND_NEVER, QPU_COND_ADD);
    437 
    438         if (!qpu_waddr_ignores_ws(waddr_add))
    439                 *inst ^= QPU_WS;
    440 
    441         return true;
    442 }
    443 
    444 static bool
    445 writes_a_file(uint64_t inst)
    446 {
    447         if (!(inst & QPU_WS))
    448                 return QPU_GET_FIELD(inst, QPU_WADDR_ADD) < 32;
    449         else
    450                 return QPU_GET_FIELD(inst, QPU_WADDR_MUL) < 32;
    451 }
    452 
    453 static bool
    454 reads_r4(uint64_t inst)
    455 {
    456         return (QPU_GET_FIELD(inst, QPU_ADD_A) == QPU_MUX_R4 ||
    457                 QPU_GET_FIELD(inst, QPU_ADD_B) == QPU_MUX_R4 ||
    458                 QPU_GET_FIELD(inst, QPU_MUL_A) == QPU_MUX_R4 ||
    459                 QPU_GET_FIELD(inst, QPU_MUL_B) == QPU_MUX_R4);
    460 }
    461 
    462 uint64_t
    463 qpu_merge_inst(uint64_t a, uint64_t b)
    464 {
    465         uint64_t merge = a | b;
    466         bool ok = true;
    467         uint32_t a_sig = QPU_GET_FIELD(a, QPU_SIG);
    468         uint32_t b_sig = QPU_GET_FIELD(b, QPU_SIG);
    469 
    470         if (QPU_GET_FIELD(a, QPU_OP_ADD) != QPU_A_NOP &&
    471             QPU_GET_FIELD(b, QPU_OP_ADD) != QPU_A_NOP) {
    472                 if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP ||
    473                     QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP ||
    474                     !(convert_mov(&a) || convert_mov(&b))) {
    475                         return 0;
    476                 } else {
    477                         merge = a | b;
    478                 }
    479         }
    480 
    481         if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP &&
    482             QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
    483                 return 0;
    484 
    485         if (qpu_num_sf_accesses(a) && qpu_num_sf_accesses(b))
    486                 return 0;
    487 
    488         if (a_sig == QPU_SIG_LOAD_IMM ||
    489             b_sig == QPU_SIG_LOAD_IMM ||
    490             a_sig == QPU_SIG_SMALL_IMM ||
    491             b_sig == QPU_SIG_SMALL_IMM ||
    492             a_sig == QPU_SIG_BRANCH ||
    493             b_sig == QPU_SIG_BRANCH) {
    494                 return 0;
    495         }
    496 
    497         ok = ok && merge_fields(&merge, a, b, QPU_SIG_MASK,
    498                                 QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));
    499 
    500         /* Misc fields that have to match exactly. */
    501         ok = ok && merge_fields(&merge, a, b, QPU_SF, ~0);
    502 
    503         if (!merge_fields(&merge, a, b, QPU_RADDR_A_MASK,
    504                           QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A))) {
    505                 /* Since we tend to use regfile A by default both for register
    506                  * allocation and for our special values (uniforms and
    507                  * varyings), try swapping uniforms and varyings to regfile B
    508                  * to resolve raddr A conflicts.
    509                  */
    510                 if (!try_swap_ra_file(&merge, &a, &b) &&
    511                     !try_swap_ra_file(&merge, &b, &a)) {
    512                         return 0;
    513                 }
    514         }
    515 
    516         ok = ok && merge_fields(&merge, a, b, QPU_RADDR_B_MASK,
    517                                 QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B));
    518 
    519         ok = ok && merge_fields(&merge, a, b, QPU_WADDR_ADD_MASK,
    520                                 QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD));
    521         ok = ok && merge_fields(&merge, a, b, QPU_WADDR_MUL_MASK,
    522                                 QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL));
    523 
    524         /* Allow disagreement on WS (swapping A vs B physical reg file as the
    525          * destination for ADD/MUL) if one of the original instructions
    526          * ignores it (probably because it's just writing to accumulators).
    527          */
    528         if (qpu_waddr_ignores_ws(QPU_GET_FIELD(a, QPU_WADDR_ADD)) &&
    529             qpu_waddr_ignores_ws(QPU_GET_FIELD(a, QPU_WADDR_MUL))) {
    530                 merge = (merge & ~QPU_WS) | (b & QPU_WS);
    531         } else if (qpu_waddr_ignores_ws(QPU_GET_FIELD(b, QPU_WADDR_ADD)) &&
    532                    qpu_waddr_ignores_ws(QPU_GET_FIELD(b, QPU_WADDR_MUL))) {
    533                 merge = (merge & ~QPU_WS) | (a & QPU_WS);
    534         } else {
    535                 if ((a & QPU_WS) != (b & QPU_WS))
    536                         return 0;
    537         }
    538 
    539         if (!merge_fields(&merge, a, b, QPU_PM, ~0)) {
    540                 /* If one instruction has PM bit set and the other not, the
    541                  * one without PM shouldn't do packing/unpacking, and we
    542                  * have to make sure non-NOP packing/unpacking from PM
    543                  * instruction aren't added to it.
    544                  */
    545                 uint64_t temp;
    546 
    547                 /* Let a be the one with PM bit */
    548                 if (!(a & QPU_PM)) {
    549                         temp = a;
    550                         a = b;
    551                         b = temp;
    552                 }
    553 
    554                 if ((b & (QPU_PACK_MASK | QPU_UNPACK_MASK)) != 0)
    555                         return 0;
    556 
    557                 if ((a & QPU_PACK_MASK) != 0 &&
    558                     QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
    559                         return 0;
    560 
    561                 if ((a & QPU_UNPACK_MASK) != 0 && reads_r4(b))
    562                         return 0;
    563         } else {
    564                 /* packing: Make sure that non-NOP packs agree, then deal with
    565                  * special-case failing of adding a non-NOP pack to something
    566                  * with a NOP pack.
    567                  */
    568                 if (!merge_fields(&merge, a, b, QPU_PACK_MASK, 0))
    569                         return 0;
    570                 bool new_a_pack = (QPU_GET_FIELD(a, QPU_PACK) !=
    571                                 QPU_GET_FIELD(merge, QPU_PACK));
    572                 bool new_b_pack = (QPU_GET_FIELD(b, QPU_PACK) !=
    573                                 QPU_GET_FIELD(merge, QPU_PACK));
    574                 if (!(merge & QPU_PM)) {
    575                         /* Make sure we're not going to be putting a new
    576                          * a-file packing on either half.
    577                          */
    578                         if (new_a_pack && writes_a_file(a))
    579                                 return 0;
    580 
    581                         if (new_b_pack && writes_a_file(b))
    582                                 return 0;
    583                 } else {
    584                         /* Make sure we're not going to be putting new MUL
    585                          * packing oneither half.
    586                          */
    587                         if (new_a_pack &&
    588                             QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP)
    589                                 return 0;
    590 
    591                         if (new_b_pack &&
    592                             QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
    593                                 return 0;
    594                 }
    595 
    596                 /* unpacking: Make sure that non-NOP unpacks agree, then deal
    597                  * with special-case failing of adding a non-NOP unpack to
    598                  * something with a NOP unpack.
    599                  */
    600                 if (!merge_fields(&merge, a, b, QPU_UNPACK_MASK, 0))
    601                         return 0;
    602                 bool new_a_unpack = (QPU_GET_FIELD(a, QPU_UNPACK) !=
    603                                 QPU_GET_FIELD(merge, QPU_UNPACK));
    604                 bool new_b_unpack = (QPU_GET_FIELD(b, QPU_UNPACK) !=
    605                                 QPU_GET_FIELD(merge, QPU_UNPACK));
    606                 if (!(merge & QPU_PM)) {
    607                         /* Make sure we're not going to be putting a new
    608                          * a-file packing on either half.
    609                          */
    610                         if (new_a_unpack &&
    611                             QPU_GET_FIELD(a, QPU_RADDR_A) != QPU_R_NOP)
    612                                 return 0;
    613 
    614                         if (new_b_unpack &&
    615                             QPU_GET_FIELD(b, QPU_RADDR_A) != QPU_R_NOP)
    616                                 return 0;
    617                 } else {
    618                         /* Make sure we're not going to be putting new r4
    619                          * unpack on either half.
    620                          */
    621                         if (new_a_unpack && reads_r4(a))
    622                                 return 0;
    623 
    624                         if (new_b_unpack && reads_r4(b))
    625                                 return 0;
    626                 }
    627         }
    628 
    629         if (ok)
    630                 return merge;
    631         else
    632                 return 0;
    633 }
    634 
    635 uint64_t
    636 qpu_set_sig(uint64_t inst, uint32_t sig)
    637 {
    638         assert(QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_NONE);
    639         return QPU_UPDATE_FIELD(inst, sig, QPU_SIG);
    640 }
    641 
    642 uint64_t
    643 qpu_set_cond_add(uint64_t inst, uint32_t cond)
    644 {
    645         assert(QPU_GET_FIELD(inst, QPU_COND_ADD) == QPU_COND_ALWAYS);
    646         return QPU_UPDATE_FIELD(inst, cond, QPU_COND_ADD);
    647 }
    648 
    649 uint64_t
    650 qpu_set_cond_mul(uint64_t inst, uint32_t cond)
    651 {
    652         assert(QPU_GET_FIELD(inst, QPU_COND_MUL) == QPU_COND_ALWAYS);
    653         return QPU_UPDATE_FIELD(inst, cond, QPU_COND_MUL);
    654 }
    655 
    656 bool
    657 qpu_waddr_is_tlb(uint32_t waddr)
    658 {
    659         switch (waddr) {
    660         case QPU_W_TLB_COLOR_ALL:
    661         case QPU_W_TLB_COLOR_MS:
    662         case QPU_W_TLB_Z:
    663                 return true;
    664         default:
    665                 return false;
    666         }
    667 }
    668 
    669 bool
    670 qpu_inst_is_tlb(uint64_t inst)
    671 {
    672         uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
    673 
    674         return (qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_ADD)) ||
    675                 qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_MUL)) ||
    676                 sig == QPU_SIG_COLOR_LOAD ||
    677                 sig == QPU_SIG_WAIT_FOR_SCOREBOARD);
    678 }
    679 
    680 /**
    681  * Returns the small immediate value to be encoded in to the raddr b field if
    682  * the argument can be represented as one, or ~0 otherwise.
    683  */
    684 uint32_t
    685 qpu_encode_small_immediate(uint32_t i)
    686 {
    687         if (i <= 15)
    688                 return i;
    689         if ((int)i < 0 && (int)i >= -16)
    690                 return i + 32;
    691 
    692         switch (i) {
    693         case 0x3f800000:
    694                 return 32;
    695         case 0x40000000:
    696                 return 33;
    697         case 0x40800000:
    698                 return 34;
    699         case 0x41000000:
    700                 return 35;
    701         case 0x41800000:
    702                 return 36;
    703         case 0x42000000:
    704                 return 37;
    705         case 0x42800000:
    706                 return 38;
    707         case 0x43000000:
    708                 return 39;
    709         case 0x3b800000:
    710                 return 40;
    711         case 0x3c000000:
    712                 return 41;
    713         case 0x3c800000:
    714                 return 42;
    715         case 0x3d000000:
    716                 return 43;
    717         case 0x3d800000:
    718                 return 44;
    719         case 0x3e000000:
    720                 return 45;
    721         case 0x3e800000:
    722                 return 46;
    723         case 0x3f000000:
    724                 return 47;
    725         }
    726 
    727         return ~0;
    728 }
    729 
    730 void
    731 qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst)
    732 {
    733         if (c->qpu_inst_count >= c->qpu_inst_size) {
    734                 c->qpu_inst_size = MAX2(16, c->qpu_inst_size * 2);
    735                 c->qpu_insts = reralloc(c, c->qpu_insts,
    736                                         uint64_t, c->qpu_inst_size);
    737         }
    738         c->qpu_insts[c->qpu_inst_count++] = inst;
    739 }
    740