Home | History | Annotate | Download | only in qpu
      1 /*
      2  * Copyright  2016 Broadcom
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #include <string.h>
     25 #include "util/macros.h"
     26 
     27 #include "broadcom/common/v3d_device_info.h"
     28 #include "qpu_instr.h"
     29 
     30 #ifndef QPU_MASK
     31 #define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
     32 /* Using the GNU statement expression extension */
     33 #define QPU_SET_FIELD(value, field)                                       \
     34         ({                                                                \
     35                 uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
     36                 assert((fieldval & ~ field ## _MASK) == 0);               \
     37                 fieldval & field ## _MASK;                                \
     38          })
     39 
     40 #define QPU_GET_FIELD(word, field) ((uint32_t)(((word)  & field ## _MASK) >> field ## _SHIFT))
     41 
     42 #define QPU_UPDATE_FIELD(inst, value, field)                              \
     43         (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))
     44 #endif /* QPU_MASK */
     45 
     46 #define VC5_QPU_OP_MUL_SHIFT                58
     47 #define VC5_QPU_OP_MUL_MASK                 QPU_MASK(63, 58)
     48 
     49 #define VC5_QPU_SIG_SHIFT                   53
     50 #define VC5_QPU_SIG_MASK                    QPU_MASK(57, 53)
     51 
     52 #define VC5_QPU_COND_SHIFT                  46
     53 #define VC5_QPU_COND_MASK                   QPU_MASK(52, 46)
     54 #define VC5_QPU_COND_SIG_MAGIC_ADDR         (1 << 6)
     55 
     56 #define VC5_QPU_MM                          QPU_MASK(45, 45)
     57 #define VC5_QPU_MA                          QPU_MASK(44, 44)
     58 
     59 #define V3D_QPU_WADDR_M_SHIFT               38
     60 #define V3D_QPU_WADDR_M_MASK                QPU_MASK(43, 38)
     61 
     62 #define VC5_QPU_BRANCH_ADDR_LOW_SHIFT       35
     63 #define VC5_QPU_BRANCH_ADDR_LOW_MASK        QPU_MASK(55, 35)
     64 
     65 #define V3D_QPU_WADDR_A_SHIFT               32
     66 #define V3D_QPU_WADDR_A_MASK                QPU_MASK(37, 32)
     67 
     68 #define VC5_QPU_BRANCH_COND_SHIFT           32
     69 #define VC5_QPU_BRANCH_COND_MASK            QPU_MASK(34, 32)
     70 
     71 #define VC5_QPU_BRANCH_ADDR_HIGH_SHIFT      24
     72 #define VC5_QPU_BRANCH_ADDR_HIGH_MASK       QPU_MASK(31, 24)
     73 
     74 #define VC5_QPU_OP_ADD_SHIFT                24
     75 #define VC5_QPU_OP_ADD_MASK                 QPU_MASK(31, 24)
     76 
     77 #define VC5_QPU_MUL_B_SHIFT                 21
     78 #define VC5_QPU_MUL_B_MASK                  QPU_MASK(23, 21)
     79 
     80 #define VC5_QPU_BRANCH_MSFIGN_SHIFT         21
     81 #define VC5_QPU_BRANCH_MSFIGN_MASK          QPU_MASK(22, 21)
     82 
     83 #define VC5_QPU_MUL_A_SHIFT                 18
     84 #define VC5_QPU_MUL_A_MASK                  QPU_MASK(20, 18)
     85 
     86 #define VC5_QPU_ADD_B_SHIFT                 15
     87 #define VC5_QPU_ADD_B_MASK                  QPU_MASK(17, 15)
     88 
     89 #define VC5_QPU_BRANCH_BDU_SHIFT            15
     90 #define VC5_QPU_BRANCH_BDU_MASK             QPU_MASK(17, 15)
     91 
     92 #define VC5_QPU_BRANCH_UB                   QPU_MASK(14, 14)
     93 
     94 #define VC5_QPU_ADD_A_SHIFT                 12
     95 #define VC5_QPU_ADD_A_MASK                  QPU_MASK(14, 12)
     96 
     97 #define VC5_QPU_BRANCH_BDI_SHIFT            12
     98 #define VC5_QPU_BRANCH_BDI_MASK             QPU_MASK(13, 12)
     99 
    100 #define VC5_QPU_RADDR_A_SHIFT               6
    101 #define VC5_QPU_RADDR_A_MASK                QPU_MASK(11, 6)
    102 
    103 #define VC5_QPU_RADDR_B_SHIFT               0
    104 #define VC5_QPU_RADDR_B_MASK                QPU_MASK(5, 0)
    105 
    106 #define THRSW .thrsw = true
    107 #define LDUNIF .ldunif = true
    108 #define LDUNIFRF .ldunifrf = true
    109 #define LDUNIFA .ldunifa = true
    110 #define LDUNIFARF .ldunifarf = true
    111 #define LDTMU .ldtmu = true
    112 #define LDVARY .ldvary = true
    113 #define LDVPM .ldvpm = true
    114 #define SMIMM .small_imm = true
    115 #define LDTLB .ldtlb = true
    116 #define LDTLBU .ldtlbu = true
    117 #define UCB .ucb = true
    118 #define ROT .rotate = true
    119 #define WRTMUC .wrtmuc = true
    120 
    121 static const struct v3d_qpu_sig v33_sig_map[] = {
    122         /*      MISC   R3       R4      R5 */
    123         [0]  = {                               },
    124         [1]  = { THRSW,                        },
    125         [2]  = {                        LDUNIF },
    126         [3]  = { THRSW,                 LDUNIF },
    127         [4]  = {                LDTMU,         },
    128         [5]  = { THRSW,         LDTMU,         },
    129         [6]  = {                LDTMU,  LDUNIF },
    130         [7]  = { THRSW,         LDTMU,  LDUNIF },
    131         [8]  = {        LDVARY,                },
    132         [9]  = { THRSW, LDVARY,                },
    133         [10] = {        LDVARY,         LDUNIF },
    134         [11] = { THRSW, LDVARY,         LDUNIF },
    135         [12] = {        LDVARY, LDTMU,         },
    136         [13] = { THRSW, LDVARY, LDTMU,         },
    137         [14] = { SMIMM, LDVARY,                },
    138         [15] = { SMIMM,                        },
    139         [16] = {        LDTLB,                 },
    140         [17] = {        LDTLBU,                },
    141         /* 18-21 reserved */
    142         [22] = { UCB,                          },
    143         [23] = { ROT,                          },
    144         [24] = {        LDVPM,                 },
    145         [25] = { THRSW, LDVPM,                 },
    146         [26] = {        LDVPM,          LDUNIF },
    147         [27] = { THRSW, LDVPM,          LDUNIF },
    148         [28] = {        LDVPM, LDTMU,          },
    149         [29] = { THRSW, LDVPM, LDTMU,          },
    150         [30] = { SMIMM, LDVPM,                 },
    151         [31] = { SMIMM,                        },
    152 };
    153 
    154 static const struct v3d_qpu_sig v40_sig_map[] = {
    155         /*      MISC    R3      R4      R5 */
    156         [0]  = {                               },
    157         [1]  = { THRSW,                        },
    158         [2]  = {                        LDUNIF },
    159         [3]  = { THRSW,                 LDUNIF },
    160         [4]  = {                LDTMU,         },
    161         [5]  = { THRSW,         LDTMU,         },
    162         [6]  = {                LDTMU,  LDUNIF },
    163         [7]  = { THRSW,         LDTMU,  LDUNIF },
    164         [8]  = {        LDVARY,                },
    165         [9]  = { THRSW, LDVARY,                },
    166         [10] = {        LDVARY,         LDUNIF },
    167         [11] = { THRSW, LDVARY,         LDUNIF },
    168         /* 12-13 reserved */
    169         [14] = { SMIMM, LDVARY,                },
    170         [15] = { SMIMM,                        },
    171         [16] = {        LDTLB,                 },
    172         [17] = {        LDTLBU,                },
    173         [18] = {                        WRTMUC },
    174         [19] = { THRSW,                 WRTMUC },
    175         [20] = {        LDVARY,         WRTMUC },
    176         [21] = { THRSW, LDVARY,         WRTMUC },
    177         [22] = { UCB,                          },
    178         [23] = { ROT,                          },
    179         /* 24-30 reserved */
    180         [31] = { SMIMM,         LDTMU,         },
    181 };
    182 
    183 static const struct v3d_qpu_sig v41_sig_map[] = {
    184         /*      MISC       phys    R5 */
    185         [0]  = {                          },
    186         [1]  = { THRSW,                   },
    187         [2]  = {                   LDUNIF },
    188         [3]  = { THRSW,            LDUNIF },
    189         [4]  = {           LDTMU,         },
    190         [5]  = { THRSW,    LDTMU,         },
    191         [6]  = {           LDTMU,  LDUNIF },
    192         [7]  = { THRSW,    LDTMU,  LDUNIF },
    193         [8]  = {           LDVARY,        },
    194         [9]  = { THRSW,    LDVARY,        },
    195         [10] = {           LDVARY, LDUNIF },
    196         [11] = { THRSW,    LDVARY, LDUNIF },
    197         [12] = { LDUNIFRF                 },
    198         [13] = { THRSW,    LDUNIFRF       },
    199         [14] = { SMIMM,    LDVARY,        },
    200         [15] = { SMIMM,                   },
    201         [16] = {           LDTLB,         },
    202         [17] = {           LDTLBU,        },
    203         [18] = {                          WRTMUC },
    204         [19] = { THRSW,                   WRTMUC },
    205         [20] = {           LDVARY,        WRTMUC },
    206         [21] = { THRSW,    LDVARY,        WRTMUC },
    207         [22] = { UCB,                     },
    208         [23] = { ROT,                     },
    209         /* 24-30 reserved */
    210         [24] = {                   LDUNIFA},
    211         [25] = { LDUNIFARF                },
    212         [31] = { SMIMM,            LDTMU, },
    213 };
    214 
    215 bool
    216 v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
    217                    uint32_t packed_sig,
    218                    struct v3d_qpu_sig *sig)
    219 {
    220         if (packed_sig >= ARRAY_SIZE(v33_sig_map))
    221                 return false;
    222 
    223         if (devinfo->ver >= 41)
    224                 *sig = v41_sig_map[packed_sig];
    225         else if (devinfo->ver == 40)
    226                 *sig = v40_sig_map[packed_sig];
    227         else
    228                 *sig = v33_sig_map[packed_sig];
    229 
    230         /* Signals with zeroed unpacked contents after element 0 are reserved. */
    231         return (packed_sig == 0 ||
    232                 memcmp(sig, &v33_sig_map[0], sizeof(*sig)) != 0);
    233 }
    234 
    235 bool
    236 v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
    237                  const struct v3d_qpu_sig *sig,
    238                  uint32_t *packed_sig)
    239 {
    240         static const struct v3d_qpu_sig *map;
    241 
    242         if (devinfo->ver >= 41)
    243                 map = v41_sig_map;
    244         else if (devinfo->ver == 40)
    245                 map = v40_sig_map;
    246         else
    247                 map = v33_sig_map;
    248 
    249         for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) {
    250                 if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {
    251                         *packed_sig = i;
    252                         return true;
    253                 }
    254         }
    255 
    256         return false;
    257 }
    258 static inline unsigned
    259 fui( float f )
    260 {
    261         union {float f; unsigned ui;} fi;
    262    fi.f = f;
    263    return fi.ui;
    264 }
    265 
    266 static const uint32_t small_immediates[] = {
    267         0, 1, 2, 3,
    268         4, 5, 6, 7,
    269         8, 9, 10, 11,
    270         12, 13, 14, 15,
    271         -16, -15, -14, -13,
    272         -12, -11, -10, -9,
    273         -8, -7, -6, -5,
    274         -4, -3, -2, -1,
    275         0x3b800000, /* 2.0^-8 */
    276         0x3c000000, /* 2.0^-7 */
    277         0x3c800000, /* 2.0^-6 */
    278         0x3d000000, /* 2.0^-5 */
    279         0x3d800000, /* 2.0^-4 */
    280         0x3e000000, /* 2.0^-3 */
    281         0x3e800000, /* 2.0^-2 */
    282         0x3f000000, /* 2.0^-1 */
    283         0x3f800000, /* 2.0^0 */
    284         0x40000000, /* 2.0^1 */
    285         0x40800000, /* 2.0^2 */
    286         0x41000000, /* 2.0^3 */
    287         0x41800000, /* 2.0^4 */
    288         0x42000000, /* 2.0^5 */
    289         0x42800000, /* 2.0^6 */
    290         0x43000000, /* 2.0^7 */
    291 };
    292 
    293 bool
    294 v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo,
    295                          uint32_t packed_small_immediate,
    296                          uint32_t *small_immediate)
    297 {
    298         if (packed_small_immediate >= ARRAY_SIZE(small_immediates))
    299                 return false;
    300 
    301         *small_immediate = small_immediates[packed_small_immediate];
    302         return true;
    303 }
    304 
    305 bool
    306 v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo,
    307                        uint32_t value,
    308                        uint32_t *packed_small_immediate)
    309 {
    310         STATIC_ASSERT(ARRAY_SIZE(small_immediates) == 48);
    311 
    312         for (int i = 0; i < ARRAY_SIZE(small_immediates); i++) {
    313                 if (small_immediates[i] == value) {
    314                         *packed_small_immediate = i;
    315                         return true;
    316                 }
    317         }
    318 
    319         return false;
    320 }
    321 
    322 bool
    323 v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,
    324                      uint32_t packed_cond,
    325                      struct v3d_qpu_flags *cond)
    326 {
    327         static const enum v3d_qpu_cond cond_map[4] = {
    328                 [0] = V3D_QPU_COND_IFA,
    329                 [1] = V3D_QPU_COND_IFB,
    330                 [2] = V3D_QPU_COND_IFNA,
    331                 [3] = V3D_QPU_COND_IFNB,
    332         };
    333 
    334         cond->ac = V3D_QPU_COND_NONE;
    335         cond->mc = V3D_QPU_COND_NONE;
    336         cond->apf = V3D_QPU_PF_NONE;
    337         cond->mpf = V3D_QPU_PF_NONE;
    338         cond->auf = V3D_QPU_UF_NONE;
    339         cond->muf = V3D_QPU_UF_NONE;
    340 
    341         if (packed_cond == 0) {
    342                 return true;
    343         } else if (packed_cond >> 2 == 0) {
    344                 cond->apf = packed_cond & 0x3;
    345         } else if (packed_cond >> 4 == 0) {
    346                 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
    347         } else if (packed_cond == 0x10) {
    348                 return false;
    349         } else if (packed_cond >> 2 == 0x4) {
    350                 cond->mpf = packed_cond & 0x3;
    351         } else if (packed_cond >> 4 == 0x1) {
    352                 cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
    353         } else if (packed_cond >> 4 == 0x2) {
    354                 cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
    355                 cond->mpf = packed_cond & 0x3;
    356         } else if (packed_cond >> 4 == 0x3) {
    357                 cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
    358                 cond->apf = packed_cond & 0x3;
    359         } else if (packed_cond >> 6) {
    360                 cond->mc = cond_map[(packed_cond >> 4) & 0x3];
    361                 if (((packed_cond >> 2) & 0x3) == 0) {
    362                         cond->ac = cond_map[packed_cond & 0x3];
    363                 } else {
    364                         cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
    365                 }
    366         }
    367 
    368         return true;
    369 }
    370 
    371 bool
    372 v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
    373                    const struct v3d_qpu_flags *cond,
    374                    uint32_t *packed_cond)
    375 {
    376 #define AC (1 << 0)
    377 #define MC (1 << 1)
    378 #define APF (1 << 2)
    379 #define MPF (1 << 3)
    380 #define AUF (1 << 4)
    381 #define MUF (1 << 5)
    382         static const struct {
    383                 uint8_t flags_present;
    384                 uint8_t bits;
    385         } flags_table[] = {
    386                 { 0,        0 },
    387                 { APF,      0 },
    388                 { AUF,      0 },
    389                 { MPF,      (1 << 4) },
    390                 { MUF,      (1 << 4) },
    391                 { AC,       (1 << 5) },
    392                 { AC | MPF, (1 << 5) },
    393                 { MC,       (1 << 5) | (1 << 4) },
    394                 { MC | APF, (1 << 5) | (1 << 4) },
    395                 { MC | AC,  (1 << 6) },
    396                 { MC | AUF, (1 << 6) },
    397         };
    398 
    399         uint8_t flags_present = 0;
    400         if (cond->ac != V3D_QPU_COND_NONE)
    401                 flags_present |= AC;
    402         if (cond->mc != V3D_QPU_COND_NONE)
    403                 flags_present |= MC;
    404         if (cond->apf != V3D_QPU_PF_NONE)
    405                 flags_present |= APF;
    406         if (cond->mpf != V3D_QPU_PF_NONE)
    407                 flags_present |= MPF;
    408         if (cond->auf != V3D_QPU_UF_NONE)
    409                 flags_present |= AUF;
    410         if (cond->muf != V3D_QPU_UF_NONE)
    411                 flags_present |= MUF;
    412 
    413         for (int i = 0; i < ARRAY_SIZE(flags_table); i++) {
    414                 if (flags_table[i].flags_present != flags_present)
    415                         continue;
    416 
    417                 *packed_cond = flags_table[i].bits;
    418 
    419                 *packed_cond |= cond->apf;
    420                 *packed_cond |= cond->mpf;
    421 
    422                 if (flags_present & AUF)
    423                         *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4;
    424                 if (flags_present & MUF)
    425                         *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4;
    426 
    427                 if (flags_present & AC)
    428                         *packed_cond |= (cond->ac - V3D_QPU_COND_IFA) << 2;
    429 
    430                 if (flags_present & MC) {
    431                         if (*packed_cond & (1 << 6))
    432                                 *packed_cond |= (cond->mc -
    433                                                  V3D_QPU_COND_IFA) << 4;
    434                         else
    435                                 *packed_cond |= (cond->mc -
    436                                                  V3D_QPU_COND_IFA) << 2;
    437                 }
    438 
    439                 return true;
    440         }
    441 
    442         return false;
    443 }
    444 
    445 /* Make a mapping of the table of opcodes in the spec.  The opcode is
    446  * determined by a combination of the opcode field, and in the case of 0 or
    447  * 1-arg opcodes, the mux_b field as well.
    448  */
    449 #define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1))
    450 #define ANYMUX MUX_MASK(0, 7)
    451 
    452 struct opcode_desc {
    453         uint8_t opcode_first;
    454         uint8_t opcode_last;
    455         uint8_t mux_b_mask;
    456         uint8_t mux_a_mask;
    457         uint8_t op;
    458         /* 0 if it's the same across V3D versions, or a specific V3D version. */
    459         uint8_t ver;
    460 };
    461 
    462 static const struct opcode_desc add_ops[] = {
    463         /* FADD is FADDNF depending on the order of the mux_a/mux_b. */
    464         { 0,   47,  ANYMUX, ANYMUX, V3D_QPU_A_FADD },
    465         { 0,   47,  ANYMUX, ANYMUX, V3D_QPU_A_FADDNF },
    466         { 53,  55,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
    467         { 56,  56,  ANYMUX, ANYMUX, V3D_QPU_A_ADD },
    468         { 57,  59,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
    469         { 60,  60,  ANYMUX, ANYMUX, V3D_QPU_A_SUB },
    470         { 61,  63,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
    471         { 64,  111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB },
    472         { 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN },
    473         { 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX },
    474         { 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN },
    475         { 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX },
    476         { 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL },
    477         { 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR },
    478         { 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR },
    479         { 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR },
    480         /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */
    481         { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN },
    482         { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX },
    483         { 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN },
    484 
    485         { 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND },
    486         { 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR },
    487         { 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR },
    488 
    489         { 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD },
    490         { 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB },
    491         { 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT },
    492         { 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG },
    493         { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH },
    494         { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH },
    495         { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLBPOP },
    496         { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF },
    497         { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF },
    498         { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 },
    499         { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX },
    500         { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX },
    501         { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR },
    502         { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA },
    503         { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA },
    504         { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB },
    505         { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB },
    506 
    507         { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD },
    508         { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD },
    509         { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD },
    510         { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD },
    511 
    512         { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF },
    513         { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF },
    514         { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT, 33 },
    515         { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_IID, 40 },
    516         { 187, 187, 1 << 2, 1 << 3, V3D_QPU_A_SAMPID, 40 },
    517         { 187, 187, 1 << 2, 1 << 4, V3D_QPU_A_PATCHID, 40 },
    518         { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT },
    519         { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT },
    520 
    521         { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 },
    522         { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 },
    523         { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 },
    524         { 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 },
    525         { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 },
    526 
    527         /* FIXME: MORE COMPLICATED */
    528         /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
    529 
    530         { 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP },
    531         { 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX },
    532 
    533         { 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND },
    534         { 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN },
    535         { 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC },
    536         { 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ },
    537         { 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR },
    538         { 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ },
    539         { 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL },
    540         { 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC },
    541 
    542         { 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX },
    543         { 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY },
    544 
    545         /* The stvpms are distinguished by the waddr field. */
    546         { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV },
    547         { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD },
    548         { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP },
    549 
    550         { 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF },
    551         { 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ },
    552         { 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF },
    553 };
    554 
    555 static const struct opcode_desc mul_ops[] = {
    556         { 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD },
    557         { 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB },
    558         { 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 },
    559         { 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL },
    560         { 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 },
    561         { 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP },
    562         { 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV },
    563         { 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV },
    564         { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 },
    565         { 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV },
    566         { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL },
    567 };
    568 
    569 static const struct opcode_desc *
    570 lookup_opcode(const struct opcode_desc *opcodes, size_t num_opcodes,
    571               uint32_t opcode, uint32_t mux_a, uint32_t mux_b)
    572 {
    573         for (int i = 0; i < num_opcodes; i++) {
    574                 const struct opcode_desc *op_desc = &opcodes[i];
    575 
    576                 if (opcode < op_desc->opcode_first ||
    577                     opcode > op_desc->opcode_last)
    578                         continue;
    579 
    580                 if (!(op_desc->mux_b_mask & (1 << mux_b)))
    581                         continue;
    582 
    583                 if (!(op_desc->mux_a_mask & (1 << mux_a)))
    584                         continue;
    585 
    586                 return op_desc;
    587         }
    588 
    589         return NULL;
    590 }
    591 
    592 static bool
    593 v3d_qpu_float32_unpack_unpack(uint32_t packed,
    594                               enum v3d_qpu_input_unpack *unpacked)
    595 {
    596         switch (packed) {
    597         case 0:
    598                 *unpacked = V3D_QPU_UNPACK_ABS;
    599                 return true;
    600         case 1:
    601                 *unpacked = V3D_QPU_UNPACK_NONE;
    602                 return true;
    603         case 2:
    604                 *unpacked = V3D_QPU_UNPACK_L;
    605                 return true;
    606         case 3:
    607                 *unpacked = V3D_QPU_UNPACK_H;
    608                 return true;
    609         default:
    610                 return false;
    611         }
    612 }
    613 
    614 static bool
    615 v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
    616                             uint32_t *packed)
    617 {
    618         switch (unpacked) {
    619         case V3D_QPU_UNPACK_ABS:
    620                 *packed = 0;
    621                 return true;
    622         case V3D_QPU_UNPACK_NONE:
    623                 *packed = 1;
    624                 return true;
    625         case V3D_QPU_UNPACK_L:
    626                 *packed = 2;
    627                 return true;
    628         case V3D_QPU_UNPACK_H:
    629                 *packed = 3;
    630                 return true;
    631         default:
    632                 return false;
    633         }
    634 }
    635 
    636 static bool
    637 v3d_qpu_float16_unpack_unpack(uint32_t packed,
    638                               enum v3d_qpu_input_unpack *unpacked)
    639 {
    640         switch (packed) {
    641         case 0:
    642                 *unpacked = V3D_QPU_UNPACK_NONE;
    643                 return true;
    644         case 1:
    645                 *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16;
    646                 return true;
    647         case 2:
    648                 *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16;
    649                 return true;
    650         case 3:
    651                 *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16;
    652                 return true;
    653         case 4:
    654                 *unpacked = V3D_QPU_UNPACK_SWAP_16;
    655                 return true;
    656         default:
    657                 return false;
    658         }
    659 }
    660 
    661 static bool
    662 v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,
    663                             uint32_t *packed)
    664 {
    665         switch (unpacked) {
    666         case V3D_QPU_UNPACK_NONE:
    667                 *packed = 0;
    668                 return true;
    669         case V3D_QPU_UNPACK_REPLICATE_32F_16:
    670                 *packed = 1;
    671                 return true;
    672         case V3D_QPU_UNPACK_REPLICATE_L_16:
    673                 *packed = 2;
    674                 return true;
    675         case V3D_QPU_UNPACK_REPLICATE_H_16:
    676                 *packed = 3;
    677                 return true;
    678         case V3D_QPU_UNPACK_SWAP_16:
    679                 *packed = 4;
    680                 return true;
    681         default:
    682                 return false;
    683         }
    684 }
    685 
    686 static bool
    687 v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked,
    688                           uint32_t *packed)
    689 {
    690         switch (unpacked) {
    691         case V3D_QPU_PACK_NONE:
    692                 *packed = 0;
    693                 return true;
    694         case V3D_QPU_PACK_L:
    695                 *packed = 1;
    696                 return true;
    697         case V3D_QPU_PACK_H:
    698                 *packed = 2;
    699                 return true;
    700         default:
    701                 return false;
    702         }
    703 }
    704 
    705 static bool
    706 v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
    707                    struct v3d_qpu_instr *instr)
    708 {
    709         uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_ADD);
    710         uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_A);
    711         uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_B);
    712         uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
    713 
    714         uint32_t map_op = op;
    715         /* Some big clusters of opcodes are replicated with unpack
    716          * flags
    717          */
    718         if (map_op >= 249 && map_op <= 251)
    719                 map_op = (map_op - 249 + 245);
    720         if (map_op >= 253 && map_op <= 255)
    721                 map_op = (map_op - 253 + 245);
    722 
    723         const struct opcode_desc *desc =
    724                 lookup_opcode(add_ops, ARRAY_SIZE(add_ops),
    725                               map_op, mux_a, mux_b);
    726         if (!desc)
    727                 return false;
    728 
    729         instr->alu.add.op = desc->op;
    730 
    731         /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
    732          * operands.
    733          */
    734         if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) {
    735                 if (instr->alu.add.op == V3D_QPU_A_FMIN)
    736                         instr->alu.add.op = V3D_QPU_A_FMAX;
    737                 if (instr->alu.add.op == V3D_QPU_A_FADD)
    738                         instr->alu.add.op = V3D_QPU_A_FADDNF;
    739         }
    740 
    741         /* Some QPU ops require a bit more than just basic opcode and mux a/b
    742          * comparisons to distinguish them.
    743          */
    744         switch (instr->alu.add.op) {
    745         case V3D_QPU_A_STVPMV:
    746         case V3D_QPU_A_STVPMD:
    747         case V3D_QPU_A_STVPMP:
    748                 switch (waddr) {
    749                 case 0:
    750                         instr->alu.add.op = V3D_QPU_A_STVPMV;
    751                         break;
    752                 case 1:
    753                         instr->alu.add.op = V3D_QPU_A_STVPMD;
    754                         break;
    755                 case 2:
    756                         instr->alu.add.op = V3D_QPU_A_STVPMP;
    757                         break;
    758                 default:
    759                         return false;
    760                 }
    761                 break;
    762         default:
    763                 break;
    764         }
    765 
    766         switch (instr->alu.add.op) {
    767         case V3D_QPU_A_FADD:
    768         case V3D_QPU_A_FADDNF:
    769         case V3D_QPU_A_FSUB:
    770         case V3D_QPU_A_FMIN:
    771         case V3D_QPU_A_FMAX:
    772         case V3D_QPU_A_FCMP:
    773                 instr->alu.add.output_pack = (op >> 4) & 0x3;
    774 
    775                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
    776                                                    &instr->alu.add.a_unpack)) {
    777                         return false;
    778                 }
    779 
    780                 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
    781                                                    &instr->alu.add.b_unpack)) {
    782                         return false;
    783                 }
    784                 break;
    785 
    786         case V3D_QPU_A_FFLOOR:
    787         case V3D_QPU_A_FROUND:
    788         case V3D_QPU_A_FTRUNC:
    789         case V3D_QPU_A_FCEIL:
    790         case V3D_QPU_A_FDX:
    791         case V3D_QPU_A_FDY:
    792                 instr->alu.add.output_pack = mux_b & 0x3;
    793 
    794                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
    795                                                    &instr->alu.add.a_unpack)) {
    796                         return false;
    797                 }
    798                 break;
    799 
    800         case V3D_QPU_A_FTOIN:
    801         case V3D_QPU_A_FTOIZ:
    802         case V3D_QPU_A_FTOUZ:
    803         case V3D_QPU_A_FTOC:
    804                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
    805 
    806                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
    807                                                    &instr->alu.add.a_unpack)) {
    808                         return false;
    809                 }
    810                 break;
    811 
    812         case V3D_QPU_A_VFMIN:
    813         case V3D_QPU_A_VFMAX:
    814                 if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
    815                                                    &instr->alu.add.a_unpack)) {
    816                         return false;
    817                 }
    818 
    819                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
    820                 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
    821                 break;
    822 
    823         default:
    824                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
    825                 instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE;
    826                 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
    827                 break;
    828         }
    829 
    830         instr->alu.add.a = mux_a;
    831         instr->alu.add.b = mux_b;
    832         instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
    833 
    834         instr->alu.add.magic_write = false;
    835         if (packed_inst & VC5_QPU_MA) {
    836                 switch (instr->alu.add.op) {
    837                 case V3D_QPU_A_LDVPMV_IN:
    838                         instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
    839                         break;
    840                 case V3D_QPU_A_LDVPMD_IN:
    841                         instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
    842                         break;
    843                 case V3D_QPU_A_LDVPMG_IN:
    844                         instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
    845                         break;
    846                 default:
    847                         instr->alu.add.magic_write = true;
    848                         break;
    849                 }
    850         }
    851 
    852         return true;
    853 }
    854 
    855 static bool
    856 v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
    857                    struct v3d_qpu_instr *instr)
    858 {
    859         uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_MUL);
    860         uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_A);
    861         uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_B);
    862 
    863         {
    864                 const struct opcode_desc *desc =
    865                         lookup_opcode(mul_ops, ARRAY_SIZE(mul_ops),
    866                                       op, mux_a, mux_b);
    867                 if (!desc)
    868                         return false;
    869 
    870                 instr->alu.mul.op = desc->op;
    871         }
    872 
    873         switch (instr->alu.mul.op) {
    874         case V3D_QPU_M_FMUL:
    875                 instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
    876 
    877                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
    878                                                    &instr->alu.mul.a_unpack)) {
    879                         return false;
    880                 }
    881 
    882                 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
    883                                                    &instr->alu.mul.b_unpack)) {
    884                         return false;
    885                 }
    886 
    887                 break;
    888 
    889         case V3D_QPU_M_FMOV:
    890                 instr->alu.mul.output_pack = (((op & 1) << 1) +
    891                                               ((mux_b >> 2) & 1));
    892 
    893                 if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3,
    894                                                    &instr->alu.mul.a_unpack)) {
    895                         return false;
    896                 }
    897 
    898                 break;
    899 
    900         case V3D_QPU_M_VFMUL:
    901                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
    902 
    903                 if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
    904                                                    &instr->alu.mul.a_unpack)) {
    905                         return false;
    906                 }
    907 
    908                 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
    909 
    910                 break;
    911 
    912         default:
    913                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
    914                 instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE;
    915                 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
    916                 break;
    917         }
    918 
    919         instr->alu.mul.a = mux_a;
    920         instr->alu.mul.b = mux_b;
    921         instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
    922         instr->alu.mul.magic_write = packed_inst & VC5_QPU_MM;
    923 
    924         return true;
    925 }
    926 
    927 static bool
    928 v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
    929                  const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
    930 {
    931         uint32_t waddr = instr->alu.add.waddr;
    932         uint32_t mux_a = instr->alu.add.a;
    933         uint32_t mux_b = instr->alu.add.b;
    934         int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
    935         const struct opcode_desc *desc;
    936 
    937         int opcode;
    938         for (desc = add_ops; desc != &add_ops[ARRAY_SIZE(add_ops)];
    939              desc++) {
    940                 if (desc->op == instr->alu.add.op)
    941                         break;
    942         }
    943         if (desc == &add_ops[ARRAY_SIZE(add_ops)])
    944                 return false;
    945 
    946         opcode = desc->opcode_first;
    947 
    948         /* If an operation doesn't use an arg, its mux values may be used to
    949          * identify the operation type.
    950          */
    951         if (nsrc < 2)
    952                 mux_b = ffs(desc->mux_b_mask) - 1;
    953 
    954         if (nsrc < 1)
    955                 mux_a = ffs(desc->mux_a_mask) - 1;
    956 
    957         bool no_magic_write = false;
    958 
    959         switch (instr->alu.add.op) {
    960         case V3D_QPU_A_STVPMV:
    961                 waddr = 0;
    962                 no_magic_write = true;
    963                 break;
    964         case V3D_QPU_A_STVPMD:
    965                 waddr = 1;
    966                 no_magic_write = true;
    967                 break;
    968         case V3D_QPU_A_STVPMP:
    969                 waddr = 2;
    970                 no_magic_write = true;
    971                 break;
    972 
    973         case V3D_QPU_A_LDVPMV_IN:
    974         case V3D_QPU_A_LDVPMD_IN:
    975         case V3D_QPU_A_LDVPMP:
    976         case V3D_QPU_A_LDVPMG_IN:
    977                 assert(!instr->alu.add.magic_write);
    978                 break;
    979 
    980         case V3D_QPU_A_LDVPMV_OUT:
    981         case V3D_QPU_A_LDVPMD_OUT:
    982         case V3D_QPU_A_LDVPMG_OUT:
    983                 assert(!instr->alu.add.magic_write);
    984                 *packed_instr |= VC5_QPU_MA;
    985                 break;
    986 
    987         default:
    988                 break;
    989         }
    990 
    991         switch (instr->alu.add.op) {
    992         case V3D_QPU_A_FADD:
    993         case V3D_QPU_A_FADDNF:
    994         case V3D_QPU_A_FSUB:
    995         case V3D_QPU_A_FMIN:
    996         case V3D_QPU_A_FMAX:
    997         case V3D_QPU_A_FCMP: {
    998                 uint32_t output_pack;
    999                 uint32_t a_unpack;
   1000                 uint32_t b_unpack;
   1001 
   1002                 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
   1003                                                &output_pack)) {
   1004                         return false;
   1005                 }
   1006                 opcode |= output_pack << 4;
   1007 
   1008                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
   1009                                                  &a_unpack)) {
   1010                         return false;
   1011                 }
   1012 
   1013                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
   1014                                                  &b_unpack)) {
   1015                         return false;
   1016                 }
   1017 
   1018                 /* These operations with commutative operands are
   1019                  * distinguished by which order their operands come in.
   1020                  */
   1021                 bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b;
   1022                 if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
   1023                       instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
   1024                     ((instr->alu.add.op == V3D_QPU_A_FMAX ||
   1025                       instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
   1026                         uint32_t temp;
   1027 
   1028                         temp = a_unpack;
   1029                         a_unpack = b_unpack;
   1030                         b_unpack = temp;
   1031 
   1032                         temp = mux_a;
   1033                         mux_a = mux_b;
   1034                         mux_b = temp;
   1035                 }
   1036 
   1037                 opcode |= a_unpack << 2;
   1038                 opcode |= b_unpack << 0;
   1039                 break;
   1040         }
   1041 
   1042         case V3D_QPU_A_FFLOOR:
   1043         case V3D_QPU_A_FROUND:
   1044         case V3D_QPU_A_FTRUNC:
   1045         case V3D_QPU_A_FCEIL:
   1046         case V3D_QPU_A_FDX:
   1047         case V3D_QPU_A_FDY: {
   1048                 uint32_t packed;
   1049 
   1050                 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
   1051                                                &packed)) {
   1052                         return false;
   1053                 }
   1054                 mux_b |= packed;
   1055 
   1056                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
   1057                                                  &packed)) {
   1058                         return false;
   1059                 }
   1060                 if (packed == 0)
   1061                         return false;
   1062                 opcode |= packed << 2;
   1063                 break;
   1064         }
   1065 
   1066         case V3D_QPU_A_FTOIN:
   1067         case V3D_QPU_A_FTOIZ:
   1068         case V3D_QPU_A_FTOUZ:
   1069         case V3D_QPU_A_FTOC:
   1070                 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
   1071                         return false;
   1072 
   1073                 uint32_t packed;
   1074                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
   1075                                                  &packed)) {
   1076                         return false;
   1077                 }
   1078                 if (packed == 0)
   1079                         return false;
   1080                 opcode |= packed << 2;
   1081 
   1082                 break;
   1083 
   1084         case V3D_QPU_A_VFMIN:
   1085         case V3D_QPU_A_VFMAX:
   1086                 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
   1087                     instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) {
   1088                         return false;
   1089                 }
   1090 
   1091                 if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack,
   1092                                                  &packed)) {
   1093                         return false;
   1094                 }
   1095                 opcode |= packed;
   1096                 break;
   1097 
   1098         default:
   1099                 if (instr->alu.add.op != V3D_QPU_A_NOP &&
   1100                     (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
   1101                      instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
   1102                      instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) {
   1103                         return false;
   1104                 }
   1105                 break;
   1106         }
   1107 
   1108         *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_ADD_A);
   1109         *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_ADD_B);
   1110         *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_ADD);
   1111         *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
   1112         if (instr->alu.add.magic_write && !no_magic_write)
   1113                 *packed_instr |= VC5_QPU_MA;
   1114 
   1115         return true;
   1116 }
   1117 
   1118 static bool
   1119 v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
   1120                  const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
   1121 {
   1122         uint32_t mux_a = instr->alu.mul.a;
   1123         uint32_t mux_b = instr->alu.mul.b;
   1124         int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
   1125         const struct opcode_desc *desc;
   1126 
   1127         for (desc = mul_ops; desc != &mul_ops[ARRAY_SIZE(mul_ops)];
   1128              desc++) {
   1129                 if (desc->op == instr->alu.mul.op)
   1130                         break;
   1131         }
   1132         if (desc == &mul_ops[ARRAY_SIZE(mul_ops)])
   1133                 return false;
   1134 
   1135         uint32_t opcode = desc->opcode_first;
   1136 
   1137         /* Some opcodes have a single valid value for their mux a/b, so set
   1138          * that here.  If mux a/b determine packing, it will be set below.
   1139          */
   1140         if (nsrc < 2)
   1141                 mux_b = ffs(desc->mux_b_mask) - 1;
   1142 
   1143         if (nsrc < 1)
   1144                 mux_a = ffs(desc->mux_a_mask) - 1;
   1145 
   1146         switch (instr->alu.mul.op) {
   1147         case V3D_QPU_M_FMUL: {
   1148                 uint32_t packed;
   1149 
   1150                 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
   1151                                                &packed)) {
   1152                         return false;
   1153                 }
   1154                 /* No need for a +1 because desc->opcode_first has a 1 in this
   1155                  * field.
   1156                  */
   1157                 opcode += packed << 4;
   1158 
   1159                 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
   1160                                                  &packed)) {
   1161                         return false;
   1162                 }
   1163                 opcode |= packed << 2;
   1164 
   1165                 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack,
   1166                                                  &packed)) {
   1167                         return false;
   1168                 }
   1169                 opcode |= packed << 0;
   1170                 break;
   1171         }
   1172 
   1173         case V3D_QPU_M_FMOV: {
   1174                 uint32_t packed;
   1175 
   1176                 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
   1177                                                &packed)) {
   1178                         return false;
   1179                 }
   1180                 opcode |= (packed >> 1) & 1;
   1181                 mux_b = (packed & 1) << 2;
   1182 
   1183                 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
   1184                                                  &packed)) {
   1185                         return false;
   1186                 }
   1187                 mux_b |= packed;
   1188                 break;
   1189         }
   1190 
   1191         case V3D_QPU_M_VFMUL: {
   1192                 uint32_t packed;
   1193 
   1194                 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
   1195                         return false;
   1196 
   1197                 if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack,
   1198                                                  &packed)) {
   1199                         return false;
   1200                 }
   1201                 if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16)
   1202                         opcode = 8;
   1203                 else
   1204                         opcode |= (packed + 4) & 7;
   1205 
   1206                 if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE)
   1207                         return false;
   1208 
   1209                 break;
   1210         }
   1211 
   1212         default:
   1213                 break;
   1214         }
   1215 
   1216         *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_MUL_A);
   1217         *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_MUL_B);
   1218 
   1219         *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_MUL);
   1220         *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
   1221         if (instr->alu.mul.magic_write)
   1222                 *packed_instr |= VC5_QPU_MM;
   1223 
   1224         return true;
   1225 }
   1226 
   1227 static bool
   1228 v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
   1229                          uint64_t packed_instr,
   1230                          struct v3d_qpu_instr *instr)
   1231 {
   1232         instr->type = V3D_QPU_INSTR_TYPE_ALU;
   1233 
   1234         if (!v3d_qpu_sig_unpack(devinfo,
   1235                                 QPU_GET_FIELD(packed_instr, VC5_QPU_SIG),
   1236                                 &instr->sig))
   1237                 return false;
   1238 
   1239         uint32_t packed_cond = QPU_GET_FIELD(packed_instr, VC5_QPU_COND);
   1240         if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
   1241                 instr->sig_addr = packed_cond & ~VC5_QPU_COND_SIG_MAGIC_ADDR;
   1242                 instr->sig_magic = packed_cond & VC5_QPU_COND_SIG_MAGIC_ADDR;
   1243 
   1244                 instr->flags.ac = V3D_QPU_COND_NONE;
   1245                 instr->flags.mc = V3D_QPU_COND_NONE;
   1246                 instr->flags.apf = V3D_QPU_PF_NONE;
   1247                 instr->flags.mpf = V3D_QPU_PF_NONE;
   1248                 instr->flags.auf = V3D_QPU_UF_NONE;
   1249                 instr->flags.muf = V3D_QPU_UF_NONE;
   1250         } else {
   1251                 if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags))
   1252                         return false;
   1253         }
   1254 
   1255         instr->raddr_a = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_A);
   1256         instr->raddr_b = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_B);
   1257 
   1258         if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr))
   1259                 return false;
   1260 
   1261         if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr))
   1262                 return false;
   1263 
   1264         return true;
   1265 }
   1266 
   1267 static bool
   1268 v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo,
   1269                             uint64_t packed_instr,
   1270                             struct v3d_qpu_instr *instr)
   1271 {
   1272         instr->type = V3D_QPU_INSTR_TYPE_BRANCH;
   1273 
   1274         uint32_t cond = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_COND);
   1275         if (cond == 0)
   1276                 instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS;
   1277         else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <=
   1278                  V3D_QPU_BRANCH_COND_ALLNA)
   1279                 instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2);
   1280         else
   1281                 return false;
   1282 
   1283         uint32_t msfign = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_MSFIGN);
   1284         if (msfign == 3)
   1285                 return false;
   1286         instr->branch.msfign = msfign;
   1287 
   1288         instr->branch.bdi = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_BDI);
   1289 
   1290         instr->branch.ub = packed_instr & VC5_QPU_BRANCH_UB;
   1291         if (instr->branch.ub) {
   1292                 instr->branch.bdu = QPU_GET_FIELD(packed_instr,
   1293                                                   VC5_QPU_BRANCH_BDU);
   1294         }
   1295 
   1296         instr->branch.raddr_a = QPU_GET_FIELD(packed_instr,
   1297                                               VC5_QPU_RADDR_A);
   1298 
   1299         instr->branch.offset = 0;
   1300 
   1301         instr->branch.offset +=
   1302                 QPU_GET_FIELD(packed_instr,
   1303                               VC5_QPU_BRANCH_ADDR_LOW) << 3;
   1304 
   1305         instr->branch.offset +=
   1306                 QPU_GET_FIELD(packed_instr,
   1307                               VC5_QPU_BRANCH_ADDR_HIGH) << 24;
   1308 
   1309         return true;
   1310 }
   1311 
   1312 bool
   1313 v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
   1314                      uint64_t packed_instr,
   1315                      struct v3d_qpu_instr *instr)
   1316 {
   1317         if (QPU_GET_FIELD(packed_instr, VC5_QPU_OP_MUL) != 0) {
   1318                 return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr);
   1319         } else {
   1320                 uint32_t sig = QPU_GET_FIELD(packed_instr, VC5_QPU_SIG);
   1321 
   1322                 if ((sig & 24) == 16) {
   1323                         return v3d_qpu_instr_unpack_branch(devinfo, packed_instr,
   1324                                                            instr);
   1325                 } else {
   1326                         return false;
   1327                 }
   1328         }
   1329 }
   1330 
   1331 static bool
   1332 v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
   1333                        const struct v3d_qpu_instr *instr,
   1334                        uint64_t *packed_instr)
   1335 {
   1336         uint32_t sig;
   1337         if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig))
   1338                 return false;
   1339         *packed_instr |= QPU_SET_FIELD(sig, VC5_QPU_SIG);
   1340 
   1341         if (instr->type == V3D_QPU_INSTR_TYPE_ALU) {
   1342                 *packed_instr |= QPU_SET_FIELD(instr->raddr_a, VC5_QPU_RADDR_A);
   1343                 *packed_instr |= QPU_SET_FIELD(instr->raddr_b, VC5_QPU_RADDR_B);
   1344 
   1345                 if (!v3d_qpu_add_pack(devinfo, instr, packed_instr))
   1346                         return false;
   1347                 if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr))
   1348                         return false;
   1349 
   1350                 uint32_t flags;
   1351                 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
   1352                         if (instr->flags.ac != V3D_QPU_COND_NONE ||
   1353                             instr->flags.mc != V3D_QPU_COND_NONE ||
   1354                             instr->flags.apf != V3D_QPU_PF_NONE ||
   1355                             instr->flags.mpf != V3D_QPU_PF_NONE ||
   1356                             instr->flags.auf != V3D_QPU_UF_NONE ||
   1357                             instr->flags.muf != V3D_QPU_UF_NONE) {
   1358                                 return false;
   1359                         }
   1360 
   1361                         flags = instr->sig_addr;
   1362                         if (instr->sig_magic)
   1363                                 flags |= VC5_QPU_COND_SIG_MAGIC_ADDR;
   1364                 } else {
   1365                         if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
   1366                                 return false;
   1367                 }
   1368 
   1369                 *packed_instr |= QPU_SET_FIELD(flags, VC5_QPU_COND);
   1370         } else {
   1371                 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig))
   1372                         return false;
   1373         }
   1374 
   1375         return true;
   1376 }
   1377 
   1378 static bool
   1379 v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo,
   1380                           const struct v3d_qpu_instr *instr,
   1381                           uint64_t *packed_instr)
   1382 {
   1383         *packed_instr |= QPU_SET_FIELD(16, VC5_QPU_SIG);
   1384 
   1385         if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {
   1386                 *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond -
   1387                                                     V3D_QPU_BRANCH_COND_A0),
   1388                                                VC5_QPU_BRANCH_COND);
   1389         }
   1390 
   1391         *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
   1392                                        VC5_QPU_BRANCH_MSFIGN);
   1393 
   1394         *packed_instr |= QPU_SET_FIELD(instr->branch.bdi,
   1395                                        VC5_QPU_BRANCH_BDI);
   1396 
   1397         if (instr->branch.ub) {
   1398                 *packed_instr |= VC5_QPU_BRANCH_UB;
   1399                 *packed_instr |= QPU_SET_FIELD(instr->branch.bdu,
   1400                                                VC5_QPU_BRANCH_BDU);
   1401         }
   1402 
   1403         switch (instr->branch.bdi) {
   1404         case V3D_QPU_BRANCH_DEST_ABS:
   1405         case V3D_QPU_BRANCH_DEST_REL:
   1406                 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
   1407                                                VC5_QPU_BRANCH_MSFIGN);
   1408 
   1409                 *packed_instr |= QPU_SET_FIELD((instr->branch.offset &
   1410                                                 ~0xff000000) >> 3,
   1411                                                VC5_QPU_BRANCH_ADDR_LOW);
   1412 
   1413                 *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24,
   1414                                                VC5_QPU_BRANCH_ADDR_HIGH);
   1415 
   1416         case V3D_QPU_BRANCH_DEST_REGFILE:
   1417                 *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a,
   1418                                                VC5_QPU_RADDR_A);
   1419                 break;
   1420 
   1421         default:
   1422                 break;
   1423         }
   1424 
   1425         return true;
   1426 }
   1427 
   1428 bool
   1429 v3d_qpu_instr_pack(const struct v3d_device_info *devinfo,
   1430                    const struct v3d_qpu_instr *instr,
   1431                    uint64_t *packed_instr)
   1432 {
   1433         *packed_instr = 0;
   1434 
   1435         switch (instr->type) {
   1436         case V3D_QPU_INSTR_TYPE_ALU:
   1437                 return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr);
   1438         case V3D_QPU_INSTR_TYPE_BRANCH:
   1439                 return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr);
   1440         default:
   1441                 return false;
   1442         }
   1443 }
   1444