1 /* 2 * Copyright 2016 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include <string.h> 25 #include "util/macros.h" 26 27 #include "broadcom/common/v3d_device_info.h" 28 #include "qpu_instr.h" 29 30 #ifndef QPU_MASK 31 #define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low)) 32 /* Using the GNU statement expression extension */ 33 #define QPU_SET_FIELD(value, field) \ 34 ({ \ 35 uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \ 36 assert((fieldval & ~ field ## _MASK) == 0); \ 37 fieldval & field ## _MASK; \ 38 }) 39 40 #define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT)) 41 42 #define QPU_UPDATE_FIELD(inst, value, field) \ 43 (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field)) 44 #endif /* QPU_MASK */ 45 46 #define VC5_QPU_OP_MUL_SHIFT 58 47 #define VC5_QPU_OP_MUL_MASK QPU_MASK(63, 58) 48 49 #define VC5_QPU_SIG_SHIFT 53 50 #define VC5_QPU_SIG_MASK QPU_MASK(57, 53) 51 52 #define VC5_QPU_COND_SHIFT 46 53 #define VC5_QPU_COND_MASK QPU_MASK(52, 46) 54 #define VC5_QPU_COND_SIG_MAGIC_ADDR (1 << 6) 55 56 #define VC5_QPU_MM QPU_MASK(45, 45) 57 #define VC5_QPU_MA QPU_MASK(44, 44) 58 59 #define V3D_QPU_WADDR_M_SHIFT 38 60 #define V3D_QPU_WADDR_M_MASK QPU_MASK(43, 38) 61 62 #define VC5_QPU_BRANCH_ADDR_LOW_SHIFT 35 63 #define VC5_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35) 64 65 #define V3D_QPU_WADDR_A_SHIFT 32 66 #define V3D_QPU_WADDR_A_MASK QPU_MASK(37, 32) 67 68 #define VC5_QPU_BRANCH_COND_SHIFT 32 69 #define VC5_QPU_BRANCH_COND_MASK QPU_MASK(34, 32) 70 71 #define VC5_QPU_BRANCH_ADDR_HIGH_SHIFT 24 72 #define VC5_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24) 73 74 #define VC5_QPU_OP_ADD_SHIFT 24 75 #define VC5_QPU_OP_ADD_MASK QPU_MASK(31, 24) 76 77 #define VC5_QPU_MUL_B_SHIFT 21 78 #define VC5_QPU_MUL_B_MASK QPU_MASK(23, 21) 79 80 #define VC5_QPU_BRANCH_MSFIGN_SHIFT 21 81 #define VC5_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21) 82 83 #define VC5_QPU_MUL_A_SHIFT 18 84 #define VC5_QPU_MUL_A_MASK QPU_MASK(20, 18) 85 86 #define VC5_QPU_ADD_B_SHIFT 15 87 #define VC5_QPU_ADD_B_MASK QPU_MASK(17, 15) 88 89 #define VC5_QPU_BRANCH_BDU_SHIFT 15 90 #define VC5_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15) 91 92 #define VC5_QPU_BRANCH_UB QPU_MASK(14, 14) 93 94 #define VC5_QPU_ADD_A_SHIFT 12 95 #define VC5_QPU_ADD_A_MASK QPU_MASK(14, 12) 96 97 #define VC5_QPU_BRANCH_BDI_SHIFT 12 98 #define VC5_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12) 99 100 #define VC5_QPU_RADDR_A_SHIFT 6 101 #define VC5_QPU_RADDR_A_MASK QPU_MASK(11, 6) 102 103 #define VC5_QPU_RADDR_B_SHIFT 0 104 #define VC5_QPU_RADDR_B_MASK QPU_MASK(5, 0) 105 106 #define THRSW .thrsw = true 107 #define LDUNIF .ldunif = true 108 #define LDUNIFRF .ldunifrf = true 109 #define LDUNIFA .ldunifa = true 110 #define LDUNIFARF .ldunifarf = true 111 #define LDTMU .ldtmu = true 112 #define LDVARY .ldvary = true 113 #define LDVPM .ldvpm = true 114 #define SMIMM .small_imm = true 115 #define LDTLB .ldtlb = true 116 #define LDTLBU .ldtlbu = true 117 #define UCB .ucb = true 118 #define ROT .rotate = true 119 #define WRTMUC .wrtmuc = true 120 121 static const struct v3d_qpu_sig v33_sig_map[] = { 122 /* MISC R3 R4 R5 */ 123 [0] = { }, 124 [1] = { THRSW, }, 125 [2] = { LDUNIF }, 126 [3] = { THRSW, LDUNIF }, 127 [4] = { LDTMU, }, 128 [5] = { THRSW, LDTMU, }, 129 [6] = { LDTMU, LDUNIF }, 130 [7] = { THRSW, LDTMU, LDUNIF }, 131 [8] = { LDVARY, }, 132 [9] = { THRSW, LDVARY, }, 133 [10] = { LDVARY, LDUNIF }, 134 [11] = { THRSW, LDVARY, LDUNIF }, 135 [12] = { LDVARY, LDTMU, }, 136 [13] = { THRSW, LDVARY, LDTMU, }, 137 [14] = { SMIMM, LDVARY, }, 138 [15] = { SMIMM, }, 139 [16] = { LDTLB, }, 140 [17] = { LDTLBU, }, 141 /* 18-21 reserved */ 142 [22] = { UCB, }, 143 [23] = { ROT, }, 144 [24] = { LDVPM, }, 145 [25] = { THRSW, LDVPM, }, 146 [26] = { LDVPM, LDUNIF }, 147 [27] = { THRSW, LDVPM, LDUNIF }, 148 [28] = { LDVPM, LDTMU, }, 149 [29] = { THRSW, LDVPM, LDTMU, }, 150 [30] = { SMIMM, LDVPM, }, 151 [31] = { SMIMM, }, 152 }; 153 154 static const struct v3d_qpu_sig v40_sig_map[] = { 155 /* MISC R3 R4 R5 */ 156 [0] = { }, 157 [1] = { THRSW, }, 158 [2] = { LDUNIF }, 159 [3] = { THRSW, LDUNIF }, 160 [4] = { LDTMU, }, 161 [5] = { THRSW, LDTMU, }, 162 [6] = { LDTMU, LDUNIF }, 163 [7] = { THRSW, LDTMU, LDUNIF }, 164 [8] = { LDVARY, }, 165 [9] = { THRSW, LDVARY, }, 166 [10] = { LDVARY, LDUNIF }, 167 [11] = { THRSW, LDVARY, LDUNIF }, 168 /* 12-13 reserved */ 169 [14] = { SMIMM, LDVARY, }, 170 [15] = { SMIMM, }, 171 [16] = { LDTLB, }, 172 [17] = { LDTLBU, }, 173 [18] = { WRTMUC }, 174 [19] = { THRSW, WRTMUC }, 175 [20] = { LDVARY, WRTMUC }, 176 [21] = { THRSW, LDVARY, WRTMUC }, 177 [22] = { UCB, }, 178 [23] = { ROT, }, 179 /* 24-30 reserved */ 180 [31] = { SMIMM, LDTMU, }, 181 }; 182 183 static const struct v3d_qpu_sig v41_sig_map[] = { 184 /* MISC phys R5 */ 185 [0] = { }, 186 [1] = { THRSW, }, 187 [2] = { LDUNIF }, 188 [3] = { THRSW, LDUNIF }, 189 [4] = { LDTMU, }, 190 [5] = { THRSW, LDTMU, }, 191 [6] = { LDTMU, LDUNIF }, 192 [7] = { THRSW, LDTMU, LDUNIF }, 193 [8] = { LDVARY, }, 194 [9] = { THRSW, LDVARY, }, 195 [10] = { LDVARY, LDUNIF }, 196 [11] = { THRSW, LDVARY, LDUNIF }, 197 [12] = { LDUNIFRF }, 198 [13] = { THRSW, LDUNIFRF }, 199 [14] = { SMIMM, LDVARY, }, 200 [15] = { SMIMM, }, 201 [16] = { LDTLB, }, 202 [17] = { LDTLBU, }, 203 [18] = { WRTMUC }, 204 [19] = { THRSW, WRTMUC }, 205 [20] = { LDVARY, WRTMUC }, 206 [21] = { THRSW, LDVARY, WRTMUC }, 207 [22] = { UCB, }, 208 [23] = { ROT, }, 209 /* 24-30 reserved */ 210 [24] = { LDUNIFA}, 211 [25] = { LDUNIFARF }, 212 [31] = { SMIMM, LDTMU, }, 213 }; 214 215 bool 216 v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo, 217 uint32_t packed_sig, 218 struct v3d_qpu_sig *sig) 219 { 220 if (packed_sig >= ARRAY_SIZE(v33_sig_map)) 221 return false; 222 223 if (devinfo->ver >= 41) 224 *sig = v41_sig_map[packed_sig]; 225 else if (devinfo->ver == 40) 226 *sig = v40_sig_map[packed_sig]; 227 else 228 *sig = v33_sig_map[packed_sig]; 229 230 /* Signals with zeroed unpacked contents after element 0 are reserved. */ 231 return (packed_sig == 0 || 232 memcmp(sig, &v33_sig_map[0], sizeof(*sig)) != 0); 233 } 234 235 bool 236 v3d_qpu_sig_pack(const struct v3d_device_info *devinfo, 237 const struct v3d_qpu_sig *sig, 238 uint32_t *packed_sig) 239 { 240 static const struct v3d_qpu_sig *map; 241 242 if (devinfo->ver >= 41) 243 map = v41_sig_map; 244 else if (devinfo->ver == 40) 245 map = v40_sig_map; 246 else 247 map = v33_sig_map; 248 249 for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) { 250 if (memcmp(&map[i], sig, sizeof(*sig)) == 0) { 251 *packed_sig = i; 252 return true; 253 } 254 } 255 256 return false; 257 } 258 static inline unsigned 259 fui( float f ) 260 { 261 union {float f; unsigned ui;} fi; 262 fi.f = f; 263 return fi.ui; 264 } 265 266 static const uint32_t small_immediates[] = { 267 0, 1, 2, 3, 268 4, 5, 6, 7, 269 8, 9, 10, 11, 270 12, 13, 14, 15, 271 -16, -15, -14, -13, 272 -12, -11, -10, -9, 273 -8, -7, -6, -5, 274 -4, -3, -2, -1, 275 0x3b800000, /* 2.0^-8 */ 276 0x3c000000, /* 2.0^-7 */ 277 0x3c800000, /* 2.0^-6 */ 278 0x3d000000, /* 2.0^-5 */ 279 0x3d800000, /* 2.0^-4 */ 280 0x3e000000, /* 2.0^-3 */ 281 0x3e800000, /* 2.0^-2 */ 282 0x3f000000, /* 2.0^-1 */ 283 0x3f800000, /* 2.0^0 */ 284 0x40000000, /* 2.0^1 */ 285 0x40800000, /* 2.0^2 */ 286 0x41000000, /* 2.0^3 */ 287 0x41800000, /* 2.0^4 */ 288 0x42000000, /* 2.0^5 */ 289 0x42800000, /* 2.0^6 */ 290 0x43000000, /* 2.0^7 */ 291 }; 292 293 bool 294 v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo, 295 uint32_t packed_small_immediate, 296 uint32_t *small_immediate) 297 { 298 if (packed_small_immediate >= ARRAY_SIZE(small_immediates)) 299 return false; 300 301 *small_immediate = small_immediates[packed_small_immediate]; 302 return true; 303 } 304 305 bool 306 v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo, 307 uint32_t value, 308 uint32_t *packed_small_immediate) 309 { 310 STATIC_ASSERT(ARRAY_SIZE(small_immediates) == 48); 311 312 for (int i = 0; i < ARRAY_SIZE(small_immediates); i++) { 313 if (small_immediates[i] == value) { 314 *packed_small_immediate = i; 315 return true; 316 } 317 } 318 319 return false; 320 } 321 322 bool 323 v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo, 324 uint32_t packed_cond, 325 struct v3d_qpu_flags *cond) 326 { 327 static const enum v3d_qpu_cond cond_map[4] = { 328 [0] = V3D_QPU_COND_IFA, 329 [1] = V3D_QPU_COND_IFB, 330 [2] = V3D_QPU_COND_IFNA, 331 [3] = V3D_QPU_COND_IFNB, 332 }; 333 334 cond->ac = V3D_QPU_COND_NONE; 335 cond->mc = V3D_QPU_COND_NONE; 336 cond->apf = V3D_QPU_PF_NONE; 337 cond->mpf = V3D_QPU_PF_NONE; 338 cond->auf = V3D_QPU_UF_NONE; 339 cond->muf = V3D_QPU_UF_NONE; 340 341 if (packed_cond == 0) { 342 return true; 343 } else if (packed_cond >> 2 == 0) { 344 cond->apf = packed_cond & 0x3; 345 } else if (packed_cond >> 4 == 0) { 346 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ; 347 } else if (packed_cond == 0x10) { 348 return false; 349 } else if (packed_cond >> 2 == 0x4) { 350 cond->mpf = packed_cond & 0x3; 351 } else if (packed_cond >> 4 == 0x1) { 352 cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ; 353 } else if (packed_cond >> 4 == 0x2) { 354 cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA; 355 cond->mpf = packed_cond & 0x3; 356 } else if (packed_cond >> 4 == 0x3) { 357 cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA; 358 cond->apf = packed_cond & 0x3; 359 } else if (packed_cond >> 6) { 360 cond->mc = cond_map[(packed_cond >> 4) & 0x3]; 361 if (((packed_cond >> 2) & 0x3) == 0) { 362 cond->ac = cond_map[packed_cond & 0x3]; 363 } else { 364 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ; 365 } 366 } 367 368 return true; 369 } 370 371 bool 372 v3d_qpu_flags_pack(const struct v3d_device_info *devinfo, 373 const struct v3d_qpu_flags *cond, 374 uint32_t *packed_cond) 375 { 376 #define AC (1 << 0) 377 #define MC (1 << 1) 378 #define APF (1 << 2) 379 #define MPF (1 << 3) 380 #define AUF (1 << 4) 381 #define MUF (1 << 5) 382 static const struct { 383 uint8_t flags_present; 384 uint8_t bits; 385 } flags_table[] = { 386 { 0, 0 }, 387 { APF, 0 }, 388 { AUF, 0 }, 389 { MPF, (1 << 4) }, 390 { MUF, (1 << 4) }, 391 { AC, (1 << 5) }, 392 { AC | MPF, (1 << 5) }, 393 { MC, (1 << 5) | (1 << 4) }, 394 { MC | APF, (1 << 5) | (1 << 4) }, 395 { MC | AC, (1 << 6) }, 396 { MC | AUF, (1 << 6) }, 397 }; 398 399 uint8_t flags_present = 0; 400 if (cond->ac != V3D_QPU_COND_NONE) 401 flags_present |= AC; 402 if (cond->mc != V3D_QPU_COND_NONE) 403 flags_present |= MC; 404 if (cond->apf != V3D_QPU_PF_NONE) 405 flags_present |= APF; 406 if (cond->mpf != V3D_QPU_PF_NONE) 407 flags_present |= MPF; 408 if (cond->auf != V3D_QPU_UF_NONE) 409 flags_present |= AUF; 410 if (cond->muf != V3D_QPU_UF_NONE) 411 flags_present |= MUF; 412 413 for (int i = 0; i < ARRAY_SIZE(flags_table); i++) { 414 if (flags_table[i].flags_present != flags_present) 415 continue; 416 417 *packed_cond = flags_table[i].bits; 418 419 *packed_cond |= cond->apf; 420 *packed_cond |= cond->mpf; 421 422 if (flags_present & AUF) 423 *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4; 424 if (flags_present & MUF) 425 *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4; 426 427 if (flags_present & AC) 428 *packed_cond |= (cond->ac - V3D_QPU_COND_IFA) << 2; 429 430 if (flags_present & MC) { 431 if (*packed_cond & (1 << 6)) 432 *packed_cond |= (cond->mc - 433 V3D_QPU_COND_IFA) << 4; 434 else 435 *packed_cond |= (cond->mc - 436 V3D_QPU_COND_IFA) << 2; 437 } 438 439 return true; 440 } 441 442 return false; 443 } 444 445 /* Make a mapping of the table of opcodes in the spec. The opcode is 446 * determined by a combination of the opcode field, and in the case of 0 or 447 * 1-arg opcodes, the mux_b field as well. 448 */ 449 #define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1)) 450 #define ANYMUX MUX_MASK(0, 7) 451 452 struct opcode_desc { 453 uint8_t opcode_first; 454 uint8_t opcode_last; 455 uint8_t mux_b_mask; 456 uint8_t mux_a_mask; 457 uint8_t op; 458 /* 0 if it's the same across V3D versions, or a specific V3D version. */ 459 uint8_t ver; 460 }; 461 462 static const struct opcode_desc add_ops[] = { 463 /* FADD is FADDNF depending on the order of the mux_a/mux_b. */ 464 { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADD }, 465 { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADDNF }, 466 { 53, 55, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, 467 { 56, 56, ANYMUX, ANYMUX, V3D_QPU_A_ADD }, 468 { 57, 59, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, 469 { 60, 60, ANYMUX, ANYMUX, V3D_QPU_A_SUB }, 470 { 61, 63, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, 471 { 64, 111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB }, 472 { 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN }, 473 { 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX }, 474 { 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN }, 475 { 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX }, 476 { 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL }, 477 { 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR }, 478 { 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR }, 479 { 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR }, 480 /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */ 481 { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN }, 482 { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX }, 483 { 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN }, 484 485 { 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND }, 486 { 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR }, 487 { 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR }, 488 489 { 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD }, 490 { 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB }, 491 { 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT }, 492 { 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG }, 493 { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH }, 494 { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH }, 495 { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLBPOP }, 496 { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF }, 497 { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF }, 498 { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 }, 499 { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX }, 500 { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX }, 501 { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR }, 502 { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA }, 503 { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA }, 504 { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB }, 505 { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB }, 506 507 { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD }, 508 { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD }, 509 { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD }, 510 { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD }, 511 512 { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF }, 513 { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF }, 514 { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT, 33 }, 515 { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_IID, 40 }, 516 { 187, 187, 1 << 2, 1 << 3, V3D_QPU_A_SAMPID, 40 }, 517 { 187, 187, 1 << 2, 1 << 4, V3D_QPU_A_PATCHID, 40 }, 518 { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT }, 519 { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT }, 520 521 { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 }, 522 { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 }, 523 { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 }, 524 { 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 }, 525 { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 }, 526 527 /* FIXME: MORE COMPLICATED */ 528 /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */ 529 530 { 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP }, 531 { 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX }, 532 533 { 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND }, 534 { 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN }, 535 { 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC }, 536 { 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ }, 537 { 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR }, 538 { 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ }, 539 { 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL }, 540 { 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC }, 541 542 { 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX }, 543 { 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY }, 544 545 /* The stvpms are distinguished by the waddr field. */ 546 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV }, 547 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD }, 548 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP }, 549 550 { 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF }, 551 { 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ }, 552 { 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF }, 553 }; 554 555 static const struct opcode_desc mul_ops[] = { 556 { 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD }, 557 { 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB }, 558 { 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 }, 559 { 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL }, 560 { 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 }, 561 { 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP }, 562 { 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV }, 563 { 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV }, 564 { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 }, 565 { 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV }, 566 { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL }, 567 }; 568 569 static const struct opcode_desc * 570 lookup_opcode(const struct opcode_desc *opcodes, size_t num_opcodes, 571 uint32_t opcode, uint32_t mux_a, uint32_t mux_b) 572 { 573 for (int i = 0; i < num_opcodes; i++) { 574 const struct opcode_desc *op_desc = &opcodes[i]; 575 576 if (opcode < op_desc->opcode_first || 577 opcode > op_desc->opcode_last) 578 continue; 579 580 if (!(op_desc->mux_b_mask & (1 << mux_b))) 581 continue; 582 583 if (!(op_desc->mux_a_mask & (1 << mux_a))) 584 continue; 585 586 return op_desc; 587 } 588 589 return NULL; 590 } 591 592 static bool 593 v3d_qpu_float32_unpack_unpack(uint32_t packed, 594 enum v3d_qpu_input_unpack *unpacked) 595 { 596 switch (packed) { 597 case 0: 598 *unpacked = V3D_QPU_UNPACK_ABS; 599 return true; 600 case 1: 601 *unpacked = V3D_QPU_UNPACK_NONE; 602 return true; 603 case 2: 604 *unpacked = V3D_QPU_UNPACK_L; 605 return true; 606 case 3: 607 *unpacked = V3D_QPU_UNPACK_H; 608 return true; 609 default: 610 return false; 611 } 612 } 613 614 static bool 615 v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked, 616 uint32_t *packed) 617 { 618 switch (unpacked) { 619 case V3D_QPU_UNPACK_ABS: 620 *packed = 0; 621 return true; 622 case V3D_QPU_UNPACK_NONE: 623 *packed = 1; 624 return true; 625 case V3D_QPU_UNPACK_L: 626 *packed = 2; 627 return true; 628 case V3D_QPU_UNPACK_H: 629 *packed = 3; 630 return true; 631 default: 632 return false; 633 } 634 } 635 636 static bool 637 v3d_qpu_float16_unpack_unpack(uint32_t packed, 638 enum v3d_qpu_input_unpack *unpacked) 639 { 640 switch (packed) { 641 case 0: 642 *unpacked = V3D_QPU_UNPACK_NONE; 643 return true; 644 case 1: 645 *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16; 646 return true; 647 case 2: 648 *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16; 649 return true; 650 case 3: 651 *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16; 652 return true; 653 case 4: 654 *unpacked = V3D_QPU_UNPACK_SWAP_16; 655 return true; 656 default: 657 return false; 658 } 659 } 660 661 static bool 662 v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked, 663 uint32_t *packed) 664 { 665 switch (unpacked) { 666 case V3D_QPU_UNPACK_NONE: 667 *packed = 0; 668 return true; 669 case V3D_QPU_UNPACK_REPLICATE_32F_16: 670 *packed = 1; 671 return true; 672 case V3D_QPU_UNPACK_REPLICATE_L_16: 673 *packed = 2; 674 return true; 675 case V3D_QPU_UNPACK_REPLICATE_H_16: 676 *packed = 3; 677 return true; 678 case V3D_QPU_UNPACK_SWAP_16: 679 *packed = 4; 680 return true; 681 default: 682 return false; 683 } 684 } 685 686 static bool 687 v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked, 688 uint32_t *packed) 689 { 690 switch (unpacked) { 691 case V3D_QPU_PACK_NONE: 692 *packed = 0; 693 return true; 694 case V3D_QPU_PACK_L: 695 *packed = 1; 696 return true; 697 case V3D_QPU_PACK_H: 698 *packed = 2; 699 return true; 700 default: 701 return false; 702 } 703 } 704 705 static bool 706 v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, 707 struct v3d_qpu_instr *instr) 708 { 709 uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_ADD); 710 uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_A); 711 uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_B); 712 uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A); 713 714 uint32_t map_op = op; 715 /* Some big clusters of opcodes are replicated with unpack 716 * flags 717 */ 718 if (map_op >= 249 && map_op <= 251) 719 map_op = (map_op - 249 + 245); 720 if (map_op >= 253 && map_op <= 255) 721 map_op = (map_op - 253 + 245); 722 723 const struct opcode_desc *desc = 724 lookup_opcode(add_ops, ARRAY_SIZE(add_ops), 725 map_op, mux_a, mux_b); 726 if (!desc) 727 return false; 728 729 instr->alu.add.op = desc->op; 730 731 /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the 732 * operands. 733 */ 734 if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) { 735 if (instr->alu.add.op == V3D_QPU_A_FMIN) 736 instr->alu.add.op = V3D_QPU_A_FMAX; 737 if (instr->alu.add.op == V3D_QPU_A_FADD) 738 instr->alu.add.op = V3D_QPU_A_FADDNF; 739 } 740 741 /* Some QPU ops require a bit more than just basic opcode and mux a/b 742 * comparisons to distinguish them. 743 */ 744 switch (instr->alu.add.op) { 745 case V3D_QPU_A_STVPMV: 746 case V3D_QPU_A_STVPMD: 747 case V3D_QPU_A_STVPMP: 748 switch (waddr) { 749 case 0: 750 instr->alu.add.op = V3D_QPU_A_STVPMV; 751 break; 752 case 1: 753 instr->alu.add.op = V3D_QPU_A_STVPMD; 754 break; 755 case 2: 756 instr->alu.add.op = V3D_QPU_A_STVPMP; 757 break; 758 default: 759 return false; 760 } 761 break; 762 default: 763 break; 764 } 765 766 switch (instr->alu.add.op) { 767 case V3D_QPU_A_FADD: 768 case V3D_QPU_A_FADDNF: 769 case V3D_QPU_A_FSUB: 770 case V3D_QPU_A_FMIN: 771 case V3D_QPU_A_FMAX: 772 case V3D_QPU_A_FCMP: 773 instr->alu.add.output_pack = (op >> 4) & 0x3; 774 775 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, 776 &instr->alu.add.a_unpack)) { 777 return false; 778 } 779 780 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3, 781 &instr->alu.add.b_unpack)) { 782 return false; 783 } 784 break; 785 786 case V3D_QPU_A_FFLOOR: 787 case V3D_QPU_A_FROUND: 788 case V3D_QPU_A_FTRUNC: 789 case V3D_QPU_A_FCEIL: 790 case V3D_QPU_A_FDX: 791 case V3D_QPU_A_FDY: 792 instr->alu.add.output_pack = mux_b & 0x3; 793 794 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, 795 &instr->alu.add.a_unpack)) { 796 return false; 797 } 798 break; 799 800 case V3D_QPU_A_FTOIN: 801 case V3D_QPU_A_FTOIZ: 802 case V3D_QPU_A_FTOUZ: 803 case V3D_QPU_A_FTOC: 804 instr->alu.add.output_pack = V3D_QPU_PACK_NONE; 805 806 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, 807 &instr->alu.add.a_unpack)) { 808 return false; 809 } 810 break; 811 812 case V3D_QPU_A_VFMIN: 813 case V3D_QPU_A_VFMAX: 814 if (!v3d_qpu_float16_unpack_unpack(op & 0x7, 815 &instr->alu.add.a_unpack)) { 816 return false; 817 } 818 819 instr->alu.add.output_pack = V3D_QPU_PACK_NONE; 820 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE; 821 break; 822 823 default: 824 instr->alu.add.output_pack = V3D_QPU_PACK_NONE; 825 instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE; 826 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE; 827 break; 828 } 829 830 instr->alu.add.a = mux_a; 831 instr->alu.add.b = mux_b; 832 instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A); 833 834 instr->alu.add.magic_write = false; 835 if (packed_inst & VC5_QPU_MA) { 836 switch (instr->alu.add.op) { 837 case V3D_QPU_A_LDVPMV_IN: 838 instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT; 839 break; 840 case V3D_QPU_A_LDVPMD_IN: 841 instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT; 842 break; 843 case V3D_QPU_A_LDVPMG_IN: 844 instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT; 845 break; 846 default: 847 instr->alu.add.magic_write = true; 848 break; 849 } 850 } 851 852 return true; 853 } 854 855 static bool 856 v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, 857 struct v3d_qpu_instr *instr) 858 { 859 uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_MUL); 860 uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_A); 861 uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_B); 862 863 { 864 const struct opcode_desc *desc = 865 lookup_opcode(mul_ops, ARRAY_SIZE(mul_ops), 866 op, mux_a, mux_b); 867 if (!desc) 868 return false; 869 870 instr->alu.mul.op = desc->op; 871 } 872 873 switch (instr->alu.mul.op) { 874 case V3D_QPU_M_FMUL: 875 instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1; 876 877 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, 878 &instr->alu.mul.a_unpack)) { 879 return false; 880 } 881 882 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3, 883 &instr->alu.mul.b_unpack)) { 884 return false; 885 } 886 887 break; 888 889 case V3D_QPU_M_FMOV: 890 instr->alu.mul.output_pack = (((op & 1) << 1) + 891 ((mux_b >> 2) & 1)); 892 893 if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3, 894 &instr->alu.mul.a_unpack)) { 895 return false; 896 } 897 898 break; 899 900 case V3D_QPU_M_VFMUL: 901 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE; 902 903 if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7, 904 &instr->alu.mul.a_unpack)) { 905 return false; 906 } 907 908 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE; 909 910 break; 911 912 default: 913 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE; 914 instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE; 915 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE; 916 break; 917 } 918 919 instr->alu.mul.a = mux_a; 920 instr->alu.mul.b = mux_b; 921 instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M); 922 instr->alu.mul.magic_write = packed_inst & VC5_QPU_MM; 923 924 return true; 925 } 926 927 static bool 928 v3d_qpu_add_pack(const struct v3d_device_info *devinfo, 929 const struct v3d_qpu_instr *instr, uint64_t *packed_instr) 930 { 931 uint32_t waddr = instr->alu.add.waddr; 932 uint32_t mux_a = instr->alu.add.a; 933 uint32_t mux_b = instr->alu.add.b; 934 int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op); 935 const struct opcode_desc *desc; 936 937 int opcode; 938 for (desc = add_ops; desc != &add_ops[ARRAY_SIZE(add_ops)]; 939 desc++) { 940 if (desc->op == instr->alu.add.op) 941 break; 942 } 943 if (desc == &add_ops[ARRAY_SIZE(add_ops)]) 944 return false; 945 946 opcode = desc->opcode_first; 947 948 /* If an operation doesn't use an arg, its mux values may be used to 949 * identify the operation type. 950 */ 951 if (nsrc < 2) 952 mux_b = ffs(desc->mux_b_mask) - 1; 953 954 if (nsrc < 1) 955 mux_a = ffs(desc->mux_a_mask) - 1; 956 957 bool no_magic_write = false; 958 959 switch (instr->alu.add.op) { 960 case V3D_QPU_A_STVPMV: 961 waddr = 0; 962 no_magic_write = true; 963 break; 964 case V3D_QPU_A_STVPMD: 965 waddr = 1; 966 no_magic_write = true; 967 break; 968 case V3D_QPU_A_STVPMP: 969 waddr = 2; 970 no_magic_write = true; 971 break; 972 973 case V3D_QPU_A_LDVPMV_IN: 974 case V3D_QPU_A_LDVPMD_IN: 975 case V3D_QPU_A_LDVPMP: 976 case V3D_QPU_A_LDVPMG_IN: 977 assert(!instr->alu.add.magic_write); 978 break; 979 980 case V3D_QPU_A_LDVPMV_OUT: 981 case V3D_QPU_A_LDVPMD_OUT: 982 case V3D_QPU_A_LDVPMG_OUT: 983 assert(!instr->alu.add.magic_write); 984 *packed_instr |= VC5_QPU_MA; 985 break; 986 987 default: 988 break; 989 } 990 991 switch (instr->alu.add.op) { 992 case V3D_QPU_A_FADD: 993 case V3D_QPU_A_FADDNF: 994 case V3D_QPU_A_FSUB: 995 case V3D_QPU_A_FMIN: 996 case V3D_QPU_A_FMAX: 997 case V3D_QPU_A_FCMP: { 998 uint32_t output_pack; 999 uint32_t a_unpack; 1000 uint32_t b_unpack; 1001 1002 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack, 1003 &output_pack)) { 1004 return false; 1005 } 1006 opcode |= output_pack << 4; 1007 1008 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, 1009 &a_unpack)) { 1010 return false; 1011 } 1012 1013 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack, 1014 &b_unpack)) { 1015 return false; 1016 } 1017 1018 /* These operations with commutative operands are 1019 * distinguished by which order their operands come in. 1020 */ 1021 bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b; 1022 if (((instr->alu.add.op == V3D_QPU_A_FMIN || 1023 instr->alu.add.op == V3D_QPU_A_FADD) && ordering) || 1024 ((instr->alu.add.op == V3D_QPU_A_FMAX || 1025 instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) { 1026 uint32_t temp; 1027 1028 temp = a_unpack; 1029 a_unpack = b_unpack; 1030 b_unpack = temp; 1031 1032 temp = mux_a; 1033 mux_a = mux_b; 1034 mux_b = temp; 1035 } 1036 1037 opcode |= a_unpack << 2; 1038 opcode |= b_unpack << 0; 1039 break; 1040 } 1041 1042 case V3D_QPU_A_FFLOOR: 1043 case V3D_QPU_A_FROUND: 1044 case V3D_QPU_A_FTRUNC: 1045 case V3D_QPU_A_FCEIL: 1046 case V3D_QPU_A_FDX: 1047 case V3D_QPU_A_FDY: { 1048 uint32_t packed; 1049 1050 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack, 1051 &packed)) { 1052 return false; 1053 } 1054 mux_b |= packed; 1055 1056 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, 1057 &packed)) { 1058 return false; 1059 } 1060 if (packed == 0) 1061 return false; 1062 opcode |= packed << 2; 1063 break; 1064 } 1065 1066 case V3D_QPU_A_FTOIN: 1067 case V3D_QPU_A_FTOIZ: 1068 case V3D_QPU_A_FTOUZ: 1069 case V3D_QPU_A_FTOC: 1070 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE) 1071 return false; 1072 1073 uint32_t packed; 1074 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, 1075 &packed)) { 1076 return false; 1077 } 1078 if (packed == 0) 1079 return false; 1080 opcode |= packed << 2; 1081 1082 break; 1083 1084 case V3D_QPU_A_VFMIN: 1085 case V3D_QPU_A_VFMAX: 1086 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE || 1087 instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) { 1088 return false; 1089 } 1090 1091 if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack, 1092 &packed)) { 1093 return false; 1094 } 1095 opcode |= packed; 1096 break; 1097 1098 default: 1099 if (instr->alu.add.op != V3D_QPU_A_NOP && 1100 (instr->alu.add.output_pack != V3D_QPU_PACK_NONE || 1101 instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE || 1102 instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) { 1103 return false; 1104 } 1105 break; 1106 } 1107 1108 *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_ADD_A); 1109 *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_ADD_B); 1110 *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_ADD); 1111 *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A); 1112 if (instr->alu.add.magic_write && !no_magic_write) 1113 *packed_instr |= VC5_QPU_MA; 1114 1115 return true; 1116 } 1117 1118 static bool 1119 v3d_qpu_mul_pack(const struct v3d_device_info *devinfo, 1120 const struct v3d_qpu_instr *instr, uint64_t *packed_instr) 1121 { 1122 uint32_t mux_a = instr->alu.mul.a; 1123 uint32_t mux_b = instr->alu.mul.b; 1124 int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op); 1125 const struct opcode_desc *desc; 1126 1127 for (desc = mul_ops; desc != &mul_ops[ARRAY_SIZE(mul_ops)]; 1128 desc++) { 1129 if (desc->op == instr->alu.mul.op) 1130 break; 1131 } 1132 if (desc == &mul_ops[ARRAY_SIZE(mul_ops)]) 1133 return false; 1134 1135 uint32_t opcode = desc->opcode_first; 1136 1137 /* Some opcodes have a single valid value for their mux a/b, so set 1138 * that here. If mux a/b determine packing, it will be set below. 1139 */ 1140 if (nsrc < 2) 1141 mux_b = ffs(desc->mux_b_mask) - 1; 1142 1143 if (nsrc < 1) 1144 mux_a = ffs(desc->mux_a_mask) - 1; 1145 1146 switch (instr->alu.mul.op) { 1147 case V3D_QPU_M_FMUL: { 1148 uint32_t packed; 1149 1150 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack, 1151 &packed)) { 1152 return false; 1153 } 1154 /* No need for a +1 because desc->opcode_first has a 1 in this 1155 * field. 1156 */ 1157 opcode += packed << 4; 1158 1159 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack, 1160 &packed)) { 1161 return false; 1162 } 1163 opcode |= packed << 2; 1164 1165 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack, 1166 &packed)) { 1167 return false; 1168 } 1169 opcode |= packed << 0; 1170 break; 1171 } 1172 1173 case V3D_QPU_M_FMOV: { 1174 uint32_t packed; 1175 1176 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack, 1177 &packed)) { 1178 return false; 1179 } 1180 opcode |= (packed >> 1) & 1; 1181 mux_b = (packed & 1) << 2; 1182 1183 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack, 1184 &packed)) { 1185 return false; 1186 } 1187 mux_b |= packed; 1188 break; 1189 } 1190 1191 case V3D_QPU_M_VFMUL: { 1192 uint32_t packed; 1193 1194 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE) 1195 return false; 1196 1197 if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack, 1198 &packed)) { 1199 return false; 1200 } 1201 if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16) 1202 opcode = 8; 1203 else 1204 opcode |= (packed + 4) & 7; 1205 1206 if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE) 1207 return false; 1208 1209 break; 1210 } 1211 1212 default: 1213 break; 1214 } 1215 1216 *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_MUL_A); 1217 *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_MUL_B); 1218 1219 *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_MUL); 1220 *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M); 1221 if (instr->alu.mul.magic_write) 1222 *packed_instr |= VC5_QPU_MM; 1223 1224 return true; 1225 } 1226 1227 static bool 1228 v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo, 1229 uint64_t packed_instr, 1230 struct v3d_qpu_instr *instr) 1231 { 1232 instr->type = V3D_QPU_INSTR_TYPE_ALU; 1233 1234 if (!v3d_qpu_sig_unpack(devinfo, 1235 QPU_GET_FIELD(packed_instr, VC5_QPU_SIG), 1236 &instr->sig)) 1237 return false; 1238 1239 uint32_t packed_cond = QPU_GET_FIELD(packed_instr, VC5_QPU_COND); 1240 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) { 1241 instr->sig_addr = packed_cond & ~VC5_QPU_COND_SIG_MAGIC_ADDR; 1242 instr->sig_magic = packed_cond & VC5_QPU_COND_SIG_MAGIC_ADDR; 1243 1244 instr->flags.ac = V3D_QPU_COND_NONE; 1245 instr->flags.mc = V3D_QPU_COND_NONE; 1246 instr->flags.apf = V3D_QPU_PF_NONE; 1247 instr->flags.mpf = V3D_QPU_PF_NONE; 1248 instr->flags.auf = V3D_QPU_UF_NONE; 1249 instr->flags.muf = V3D_QPU_UF_NONE; 1250 } else { 1251 if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags)) 1252 return false; 1253 } 1254 1255 instr->raddr_a = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_A); 1256 instr->raddr_b = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_B); 1257 1258 if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr)) 1259 return false; 1260 1261 if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr)) 1262 return false; 1263 1264 return true; 1265 } 1266 1267 static bool 1268 v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo, 1269 uint64_t packed_instr, 1270 struct v3d_qpu_instr *instr) 1271 { 1272 instr->type = V3D_QPU_INSTR_TYPE_BRANCH; 1273 1274 uint32_t cond = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_COND); 1275 if (cond == 0) 1276 instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS; 1277 else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <= 1278 V3D_QPU_BRANCH_COND_ALLNA) 1279 instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2); 1280 else 1281 return false; 1282 1283 uint32_t msfign = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_MSFIGN); 1284 if (msfign == 3) 1285 return false; 1286 instr->branch.msfign = msfign; 1287 1288 instr->branch.bdi = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_BDI); 1289 1290 instr->branch.ub = packed_instr & VC5_QPU_BRANCH_UB; 1291 if (instr->branch.ub) { 1292 instr->branch.bdu = QPU_GET_FIELD(packed_instr, 1293 VC5_QPU_BRANCH_BDU); 1294 } 1295 1296 instr->branch.raddr_a = QPU_GET_FIELD(packed_instr, 1297 VC5_QPU_RADDR_A); 1298 1299 instr->branch.offset = 0; 1300 1301 instr->branch.offset += 1302 QPU_GET_FIELD(packed_instr, 1303 VC5_QPU_BRANCH_ADDR_LOW) << 3; 1304 1305 instr->branch.offset += 1306 QPU_GET_FIELD(packed_instr, 1307 VC5_QPU_BRANCH_ADDR_HIGH) << 24; 1308 1309 return true; 1310 } 1311 1312 bool 1313 v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo, 1314 uint64_t packed_instr, 1315 struct v3d_qpu_instr *instr) 1316 { 1317 if (QPU_GET_FIELD(packed_instr, VC5_QPU_OP_MUL) != 0) { 1318 return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr); 1319 } else { 1320 uint32_t sig = QPU_GET_FIELD(packed_instr, VC5_QPU_SIG); 1321 1322 if ((sig & 24) == 16) { 1323 return v3d_qpu_instr_unpack_branch(devinfo, packed_instr, 1324 instr); 1325 } else { 1326 return false; 1327 } 1328 } 1329 } 1330 1331 static bool 1332 v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo, 1333 const struct v3d_qpu_instr *instr, 1334 uint64_t *packed_instr) 1335 { 1336 uint32_t sig; 1337 if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig)) 1338 return false; 1339 *packed_instr |= QPU_SET_FIELD(sig, VC5_QPU_SIG); 1340 1341 if (instr->type == V3D_QPU_INSTR_TYPE_ALU) { 1342 *packed_instr |= QPU_SET_FIELD(instr->raddr_a, VC5_QPU_RADDR_A); 1343 *packed_instr |= QPU_SET_FIELD(instr->raddr_b, VC5_QPU_RADDR_B); 1344 1345 if (!v3d_qpu_add_pack(devinfo, instr, packed_instr)) 1346 return false; 1347 if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr)) 1348 return false; 1349 1350 uint32_t flags; 1351 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) { 1352 if (instr->flags.ac != V3D_QPU_COND_NONE || 1353 instr->flags.mc != V3D_QPU_COND_NONE || 1354 instr->flags.apf != V3D_QPU_PF_NONE || 1355 instr->flags.mpf != V3D_QPU_PF_NONE || 1356 instr->flags.auf != V3D_QPU_UF_NONE || 1357 instr->flags.muf != V3D_QPU_UF_NONE) { 1358 return false; 1359 } 1360 1361 flags = instr->sig_addr; 1362 if (instr->sig_magic) 1363 flags |= VC5_QPU_COND_SIG_MAGIC_ADDR; 1364 } else { 1365 if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags)) 1366 return false; 1367 } 1368 1369 *packed_instr |= QPU_SET_FIELD(flags, VC5_QPU_COND); 1370 } else { 1371 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) 1372 return false; 1373 } 1374 1375 return true; 1376 } 1377 1378 static bool 1379 v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo, 1380 const struct v3d_qpu_instr *instr, 1381 uint64_t *packed_instr) 1382 { 1383 *packed_instr |= QPU_SET_FIELD(16, VC5_QPU_SIG); 1384 1385 if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) { 1386 *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond - 1387 V3D_QPU_BRANCH_COND_A0), 1388 VC5_QPU_BRANCH_COND); 1389 } 1390 1391 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign, 1392 VC5_QPU_BRANCH_MSFIGN); 1393 1394 *packed_instr |= QPU_SET_FIELD(instr->branch.bdi, 1395 VC5_QPU_BRANCH_BDI); 1396 1397 if (instr->branch.ub) { 1398 *packed_instr |= VC5_QPU_BRANCH_UB; 1399 *packed_instr |= QPU_SET_FIELD(instr->branch.bdu, 1400 VC5_QPU_BRANCH_BDU); 1401 } 1402 1403 switch (instr->branch.bdi) { 1404 case V3D_QPU_BRANCH_DEST_ABS: 1405 case V3D_QPU_BRANCH_DEST_REL: 1406 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign, 1407 VC5_QPU_BRANCH_MSFIGN); 1408 1409 *packed_instr |= QPU_SET_FIELD((instr->branch.offset & 1410 ~0xff000000) >> 3, 1411 VC5_QPU_BRANCH_ADDR_LOW); 1412 1413 *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24, 1414 VC5_QPU_BRANCH_ADDR_HIGH); 1415 1416 case V3D_QPU_BRANCH_DEST_REGFILE: 1417 *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a, 1418 VC5_QPU_RADDR_A); 1419 break; 1420 1421 default: 1422 break; 1423 } 1424 1425 return true; 1426 } 1427 1428 bool 1429 v3d_qpu_instr_pack(const struct v3d_device_info *devinfo, 1430 const struct v3d_qpu_instr *instr, 1431 uint64_t *packed_instr) 1432 { 1433 *packed_instr = 0; 1434 1435 switch (instr->type) { 1436 case V3D_QPU_INSTR_TYPE_ALU: 1437 return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr); 1438 case V3D_QPU_INSTR_TYPE_BRANCH: 1439 return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr); 1440 default: 1441 return false; 1442 } 1443 } 1444