1 /* 2 * Copyright 2014 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include <stdbool.h> 25 #include "util/ralloc.h" 26 #include "vc4_qir.h" 27 #include "vc4_qpu.h" 28 29 #define QPU_MUX(mux, muxfield) \ 30 QPU_SET_FIELD(mux != QPU_MUX_SMALL_IMM ? mux : QPU_MUX_B, muxfield) 31 32 static uint64_t 33 set_src_raddr(uint64_t inst, struct qpu_reg src) 34 { 35 if (src.mux == QPU_MUX_A) { 36 assert(QPU_GET_FIELD(inst, QPU_RADDR_A) == QPU_R_NOP || 37 QPU_GET_FIELD(inst, QPU_RADDR_A) == src.addr); 38 return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_A); 39 } 40 41 if (src.mux == QPU_MUX_B) { 42 assert((QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP || 43 QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr) && 44 QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM); 45 return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_B); 46 } 47 48 if (src.mux == QPU_MUX_SMALL_IMM) { 49 if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM) { 50 assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr); 51 } else { 52 inst = qpu_set_sig(inst, QPU_SIG_SMALL_IMM); 53 assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP); 54 } 55 return ((inst & ~QPU_RADDR_B_MASK) | 56 QPU_SET_FIELD(src.addr, QPU_RADDR_B)); 57 } 58 59 return inst; 60 } 61 62 uint64_t 63 qpu_NOP() 64 { 65 uint64_t inst = 0; 66 67 inst |= QPU_SET_FIELD(QPU_A_NOP, QPU_OP_ADD); 68 inst |= QPU_SET_FIELD(QPU_M_NOP, QPU_OP_MUL); 69 70 /* Note: These field values are actually non-zero */ 71 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD); 72 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL); 73 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); 74 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B); 75 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); 76 77 return inst; 78 } 79 80 static uint64_t 81 qpu_a_dst(struct qpu_reg dst) 82 { 83 uint64_t inst = 0; 84 85 if (dst.mux <= QPU_MUX_R5) { 86 /* Translate the mux to the ACCn values. */ 87 inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_ADD); 88 } else { 89 inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_ADD); 90 if (dst.mux == QPU_MUX_B) 91 inst |= QPU_WS; 92 } 93 94 return inst; 95 } 96 97 static uint64_t 98 qpu_m_dst(struct qpu_reg dst) 99 { 100 uint64_t inst = 0; 101 102 if (dst.mux <= QPU_MUX_R5) { 103 /* Translate the mux to the ACCn values. */ 104 inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_MUL); 105 } else { 106 inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_MUL); 107 if (dst.mux == QPU_MUX_A) 108 inst |= QPU_WS; 109 } 110 111 return inst; 112 } 113 114 uint64_t 115 qpu_a_MOV(struct qpu_reg dst, struct qpu_reg src) 116 { 117 uint64_t inst = 0; 118 119 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); 120 inst |= QPU_SET_FIELD(QPU_A_OR, QPU_OP_ADD); 121 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); 122 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B); 123 inst |= qpu_a_dst(dst); 124 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD); 125 inst |= QPU_MUX(src.mux, QPU_ADD_A); 126 inst |= QPU_MUX(src.mux, QPU_ADD_B); 127 inst = set_src_raddr(inst, src); 128 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL); 129 130 return inst; 131 } 132 133 uint64_t 134 qpu_m_MOV(struct qpu_reg dst, struct qpu_reg src) 135 { 136 uint64_t inst = 0; 137 138 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); 139 inst |= QPU_SET_FIELD(QPU_M_V8MIN, QPU_OP_MUL); 140 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); 141 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B); 142 inst |= qpu_m_dst(dst); 143 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL); 144 inst |= QPU_MUX(src.mux, QPU_MUL_A); 145 inst |= QPU_MUX(src.mux, QPU_MUL_B); 146 inst = set_src_raddr(inst, src); 147 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD); 148 149 return inst; 150 } 151 152 uint64_t 153 qpu_load_imm_ui(struct qpu_reg dst, uint32_t val) 154 { 155 uint64_t inst = 0; 156 157 inst |= qpu_a_dst(dst); 158 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL); 159 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD); 160 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL); 161 inst |= QPU_SET_FIELD(QPU_SIG_LOAD_IMM, QPU_SIG); 162 inst |= val; 163 164 return inst; 165 } 166 167 uint64_t 168 qpu_load_imm_u2(struct qpu_reg dst, uint32_t val) 169 { 170 return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_U2, 171 QPU_LOAD_IMM_MODE); 172 } 173 174 uint64_t 175 qpu_load_imm_i2(struct qpu_reg dst, uint32_t val) 176 { 177 return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_I2, 178 QPU_LOAD_IMM_MODE); 179 } 180 181 uint64_t 182 qpu_branch(uint32_t cond, uint32_t target) 183 { 184 uint64_t inst = 0; 185 186 inst |= qpu_a_dst(qpu_ra(QPU_W_NOP)); 187 inst |= qpu_m_dst(qpu_rb(QPU_W_NOP)); 188 inst |= QPU_SET_FIELD(cond, QPU_BRANCH_COND); 189 inst |= QPU_SET_FIELD(QPU_SIG_BRANCH, QPU_SIG); 190 inst |= QPU_SET_FIELD(target, QPU_BRANCH_TARGET); 191 192 return inst; 193 } 194 195 uint64_t 196 qpu_a_alu2(enum qpu_op_add op, 197 struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1) 198 { 199 uint64_t inst = 0; 200 201 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); 202 inst |= QPU_SET_FIELD(op, QPU_OP_ADD); 203 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); 204 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B); 205 inst |= qpu_a_dst(dst); 206 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD); 207 inst |= QPU_MUX(src0.mux, QPU_ADD_A); 208 inst = set_src_raddr(inst, src0); 209 inst |= QPU_MUX(src1.mux, QPU_ADD_B); 210 inst = set_src_raddr(inst, src1); 211 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL); 212 213 return inst; 214 } 215 216 uint64_t 217 qpu_m_alu2(enum qpu_op_mul op, 218 struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1) 219 { 220 uint64_t inst = 0; 221 222 inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); 223 inst |= QPU_SET_FIELD(op, QPU_OP_MUL); 224 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); 225 inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B); 226 inst |= qpu_m_dst(dst); 227 inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL); 228 inst |= QPU_MUX(src0.mux, QPU_MUL_A); 229 inst = set_src_raddr(inst, src0); 230 inst |= QPU_MUX(src1.mux, QPU_MUL_B); 231 inst = set_src_raddr(inst, src1); 232 inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD); 233 234 return inst; 235 } 236 237 uint64_t 238 qpu_m_rot(struct qpu_reg dst, struct qpu_reg src0, int rot) 239 { 240 uint64_t inst = 0; 241 inst = qpu_m_alu2(QPU_M_V8MIN, dst, src0, src0); 242 243 inst = QPU_UPDATE_FIELD(inst, QPU_SIG_SMALL_IMM, QPU_SIG); 244 inst = QPU_UPDATE_FIELD(inst, QPU_SMALL_IMM_MUL_ROT + rot, 245 QPU_SMALL_IMM); 246 247 return inst; 248 } 249 250 static bool 251 merge_fields(uint64_t *merge, 252 uint64_t a, uint64_t b, 253 uint64_t mask, uint64_t ignore) 254 { 255 if ((a & mask) == ignore) { 256 *merge = (*merge & ~mask) | (b & mask); 257 } else if ((b & mask) == ignore) { 258 *merge = (*merge & ~mask) | (a & mask); 259 } else { 260 if ((a & mask) != (b & mask)) 261 return false; 262 } 263 264 return true; 265 } 266 267 int 268 qpu_num_sf_accesses(uint64_t inst) 269 { 270 int accesses = 0; 271 static const uint32_t specials[] = { 272 QPU_W_TLB_COLOR_MS, 273 QPU_W_TLB_COLOR_ALL, 274 QPU_W_TLB_Z, 275 QPU_W_TMU0_S, 276 QPU_W_TMU0_T, 277 QPU_W_TMU0_R, 278 QPU_W_TMU0_B, 279 QPU_W_TMU1_S, 280 QPU_W_TMU1_T, 281 QPU_W_TMU1_R, 282 QPU_W_TMU1_B, 283 QPU_W_SFU_RECIP, 284 QPU_W_SFU_RECIPSQRT, 285 QPU_W_SFU_EXP, 286 QPU_W_SFU_LOG, 287 }; 288 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 289 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 290 uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 291 uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 292 293 for (int j = 0; j < ARRAY_SIZE(specials); j++) { 294 if (waddr_add == specials[j]) 295 accesses++; 296 if (waddr_mul == specials[j]) 297 accesses++; 298 } 299 300 if (raddr_a == QPU_R_MUTEX_ACQUIRE) 301 accesses++; 302 if (raddr_b == QPU_R_MUTEX_ACQUIRE && 303 QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM) 304 accesses++; 305 306 /* XXX: semaphore, combined color read/write? */ 307 switch (QPU_GET_FIELD(inst, QPU_SIG)) { 308 case QPU_SIG_COLOR_LOAD: 309 case QPU_SIG_COLOR_LOAD_END: 310 case QPU_SIG_LOAD_TMU0: 311 case QPU_SIG_LOAD_TMU1: 312 accesses++; 313 } 314 315 return accesses; 316 } 317 318 static bool 319 qpu_waddr_ignores_ws(uint32_t waddr) 320 { 321 switch(waddr) { 322 case QPU_W_ACC0: 323 case QPU_W_ACC1: 324 case QPU_W_ACC2: 325 case QPU_W_ACC3: 326 case QPU_W_NOP: 327 case QPU_W_TLB_Z: 328 case QPU_W_TLB_COLOR_MS: 329 case QPU_W_TLB_COLOR_ALL: 330 case QPU_W_TLB_ALPHA_MASK: 331 case QPU_W_VPM: 332 case QPU_W_SFU_RECIP: 333 case QPU_W_SFU_RECIPSQRT: 334 case QPU_W_SFU_EXP: 335 case QPU_W_SFU_LOG: 336 case QPU_W_TMU0_S: 337 case QPU_W_TMU0_T: 338 case QPU_W_TMU0_R: 339 case QPU_W_TMU0_B: 340 case QPU_W_TMU1_S: 341 case QPU_W_TMU1_T: 342 case QPU_W_TMU1_R: 343 case QPU_W_TMU1_B: 344 return true; 345 } 346 347 return false; 348 } 349 350 static void 351 swap_ra_file_mux_helper(uint64_t *merge, uint64_t *a, uint32_t mux_shift) 352 { 353 uint64_t mux_mask = (uint64_t)0x7 << mux_shift; 354 uint64_t mux_a_val = (uint64_t)QPU_MUX_A << mux_shift; 355 uint64_t mux_b_val = (uint64_t)QPU_MUX_B << mux_shift; 356 357 if ((*a & mux_mask) == mux_a_val) { 358 *a = (*a & ~mux_mask) | mux_b_val; 359 *merge = (*merge & ~mux_mask) | mux_b_val; 360 } 361 } 362 363 static bool 364 try_swap_ra_file(uint64_t *merge, uint64_t *a, uint64_t *b) 365 { 366 uint32_t raddr_a_a = QPU_GET_FIELD(*a, QPU_RADDR_A); 367 uint32_t raddr_a_b = QPU_GET_FIELD(*a, QPU_RADDR_B); 368 uint32_t raddr_b_a = QPU_GET_FIELD(*b, QPU_RADDR_A); 369 uint32_t raddr_b_b = QPU_GET_FIELD(*b, QPU_RADDR_B); 370 371 if (raddr_a_b != QPU_R_NOP) 372 return false; 373 374 switch (raddr_a_a) { 375 case QPU_R_UNIF: 376 case QPU_R_VARY: 377 break; 378 default: 379 return false; 380 } 381 382 if (!(*merge & QPU_PM) && 383 QPU_GET_FIELD(*merge, QPU_UNPACK) != QPU_UNPACK_NOP) { 384 return false; 385 } 386 387 if (raddr_b_b != QPU_R_NOP && 388 raddr_b_b != raddr_a_a) 389 return false; 390 391 /* Move raddr A to B in instruction a. */ 392 *a = (*a & ~QPU_RADDR_A_MASK) | QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); 393 *a = (*a & ~QPU_RADDR_B_MASK) | QPU_SET_FIELD(raddr_a_a, QPU_RADDR_B); 394 *merge = QPU_UPDATE_FIELD(*merge, raddr_b_a, QPU_RADDR_A); 395 *merge = QPU_UPDATE_FIELD(*merge, raddr_a_a, QPU_RADDR_B); 396 swap_ra_file_mux_helper(merge, a, QPU_ADD_A_SHIFT); 397 swap_ra_file_mux_helper(merge, a, QPU_ADD_B_SHIFT); 398 swap_ra_file_mux_helper(merge, a, QPU_MUL_A_SHIFT); 399 swap_ra_file_mux_helper(merge, a, QPU_MUL_B_SHIFT); 400 401 return true; 402 } 403 404 static bool 405 convert_mov(uint64_t *inst) 406 { 407 uint32_t add_a = QPU_GET_FIELD(*inst, QPU_ADD_A); 408 uint32_t waddr_add = QPU_GET_FIELD(*inst, QPU_WADDR_ADD); 409 uint32_t cond_add = QPU_GET_FIELD(*inst, QPU_COND_ADD); 410 411 /* Is it a MOV? */ 412 if (QPU_GET_FIELD(*inst, QPU_OP_ADD) != QPU_A_OR || 413 (add_a != QPU_GET_FIELD(*inst, QPU_ADD_B))) { 414 return false; 415 } 416 417 if (QPU_GET_FIELD(*inst, QPU_SIG) != QPU_SIG_NONE) 418 return false; 419 420 /* We could maybe support this in the .8888 and .8a-.8d cases. */ 421 if (*inst & QPU_PM) 422 return false; 423 424 *inst = QPU_UPDATE_FIELD(*inst, QPU_A_NOP, QPU_OP_ADD); 425 *inst = QPU_UPDATE_FIELD(*inst, QPU_M_V8MIN, QPU_OP_MUL); 426 427 *inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_A); 428 *inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_B); 429 *inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_A); 430 *inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_B); 431 432 *inst = QPU_UPDATE_FIELD(*inst, waddr_add, QPU_WADDR_MUL); 433 *inst = QPU_UPDATE_FIELD(*inst, QPU_W_NOP, QPU_WADDR_ADD); 434 435 *inst = QPU_UPDATE_FIELD(*inst, cond_add, QPU_COND_MUL); 436 *inst = QPU_UPDATE_FIELD(*inst, QPU_COND_NEVER, QPU_COND_ADD); 437 438 if (!qpu_waddr_ignores_ws(waddr_add)) 439 *inst ^= QPU_WS; 440 441 return true; 442 } 443 444 static bool 445 writes_a_file(uint64_t inst) 446 { 447 if (!(inst & QPU_WS)) 448 return QPU_GET_FIELD(inst, QPU_WADDR_ADD) < 32; 449 else 450 return QPU_GET_FIELD(inst, QPU_WADDR_MUL) < 32; 451 } 452 453 static bool 454 reads_r4(uint64_t inst) 455 { 456 return (QPU_GET_FIELD(inst, QPU_ADD_A) == QPU_MUX_R4 || 457 QPU_GET_FIELD(inst, QPU_ADD_B) == QPU_MUX_R4 || 458 QPU_GET_FIELD(inst, QPU_MUL_A) == QPU_MUX_R4 || 459 QPU_GET_FIELD(inst, QPU_MUL_B) == QPU_MUX_R4); 460 } 461 462 uint64_t 463 qpu_merge_inst(uint64_t a, uint64_t b) 464 { 465 uint64_t merge = a | b; 466 bool ok = true; 467 uint32_t a_sig = QPU_GET_FIELD(a, QPU_SIG); 468 uint32_t b_sig = QPU_GET_FIELD(b, QPU_SIG); 469 470 if (QPU_GET_FIELD(a, QPU_OP_ADD) != QPU_A_NOP && 471 QPU_GET_FIELD(b, QPU_OP_ADD) != QPU_A_NOP) { 472 if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP || 473 QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP || 474 !(convert_mov(&a) || convert_mov(&b))) { 475 return 0; 476 } else { 477 merge = a | b; 478 } 479 } 480 481 if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP && 482 QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP) 483 return 0; 484 485 if (qpu_num_sf_accesses(a) && qpu_num_sf_accesses(b)) 486 return 0; 487 488 if (a_sig == QPU_SIG_LOAD_IMM || 489 b_sig == QPU_SIG_LOAD_IMM || 490 a_sig == QPU_SIG_SMALL_IMM || 491 b_sig == QPU_SIG_SMALL_IMM || 492 a_sig == QPU_SIG_BRANCH || 493 b_sig == QPU_SIG_BRANCH) { 494 return 0; 495 } 496 497 ok = ok && merge_fields(&merge, a, b, QPU_SIG_MASK, 498 QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG)); 499 500 /* Misc fields that have to match exactly. */ 501 ok = ok && merge_fields(&merge, a, b, QPU_SF, ~0); 502 503 if (!merge_fields(&merge, a, b, QPU_RADDR_A_MASK, 504 QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A))) { 505 /* Since we tend to use regfile A by default both for register 506 * allocation and for our special values (uniforms and 507 * varyings), try swapping uniforms and varyings to regfile B 508 * to resolve raddr A conflicts. 509 */ 510 if (!try_swap_ra_file(&merge, &a, &b) && 511 !try_swap_ra_file(&merge, &b, &a)) { 512 return 0; 513 } 514 } 515 516 ok = ok && merge_fields(&merge, a, b, QPU_RADDR_B_MASK, 517 QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B)); 518 519 ok = ok && merge_fields(&merge, a, b, QPU_WADDR_ADD_MASK, 520 QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD)); 521 ok = ok && merge_fields(&merge, a, b, QPU_WADDR_MUL_MASK, 522 QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL)); 523 524 /* Allow disagreement on WS (swapping A vs B physical reg file as the 525 * destination for ADD/MUL) if one of the original instructions 526 * ignores it (probably because it's just writing to accumulators). 527 */ 528 if (qpu_waddr_ignores_ws(QPU_GET_FIELD(a, QPU_WADDR_ADD)) && 529 qpu_waddr_ignores_ws(QPU_GET_FIELD(a, QPU_WADDR_MUL))) { 530 merge = (merge & ~QPU_WS) | (b & QPU_WS); 531 } else if (qpu_waddr_ignores_ws(QPU_GET_FIELD(b, QPU_WADDR_ADD)) && 532 qpu_waddr_ignores_ws(QPU_GET_FIELD(b, QPU_WADDR_MUL))) { 533 merge = (merge & ~QPU_WS) | (a & QPU_WS); 534 } else { 535 if ((a & QPU_WS) != (b & QPU_WS)) 536 return 0; 537 } 538 539 if (!merge_fields(&merge, a, b, QPU_PM, ~0)) { 540 /* If one instruction has PM bit set and the other not, the 541 * one without PM shouldn't do packing/unpacking, and we 542 * have to make sure non-NOP packing/unpacking from PM 543 * instruction aren't added to it. 544 */ 545 uint64_t temp; 546 547 /* Let a be the one with PM bit */ 548 if (!(a & QPU_PM)) { 549 temp = a; 550 a = b; 551 b = temp; 552 } 553 554 if ((b & (QPU_PACK_MASK | QPU_UNPACK_MASK)) != 0) 555 return 0; 556 557 if ((a & QPU_PACK_MASK) != 0 && 558 QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP) 559 return 0; 560 561 if ((a & QPU_UNPACK_MASK) != 0 && reads_r4(b)) 562 return 0; 563 } else { 564 /* packing: Make sure that non-NOP packs agree, then deal with 565 * special-case failing of adding a non-NOP pack to something 566 * with a NOP pack. 567 */ 568 if (!merge_fields(&merge, a, b, QPU_PACK_MASK, 0)) 569 return 0; 570 bool new_a_pack = (QPU_GET_FIELD(a, QPU_PACK) != 571 QPU_GET_FIELD(merge, QPU_PACK)); 572 bool new_b_pack = (QPU_GET_FIELD(b, QPU_PACK) != 573 QPU_GET_FIELD(merge, QPU_PACK)); 574 if (!(merge & QPU_PM)) { 575 /* Make sure we're not going to be putting a new 576 * a-file packing on either half. 577 */ 578 if (new_a_pack && writes_a_file(a)) 579 return 0; 580 581 if (new_b_pack && writes_a_file(b)) 582 return 0; 583 } else { 584 /* Make sure we're not going to be putting new MUL 585 * packing oneither half. 586 */ 587 if (new_a_pack && 588 QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP) 589 return 0; 590 591 if (new_b_pack && 592 QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP) 593 return 0; 594 } 595 596 /* unpacking: Make sure that non-NOP unpacks agree, then deal 597 * with special-case failing of adding a non-NOP unpack to 598 * something with a NOP unpack. 599 */ 600 if (!merge_fields(&merge, a, b, QPU_UNPACK_MASK, 0)) 601 return 0; 602 bool new_a_unpack = (QPU_GET_FIELD(a, QPU_UNPACK) != 603 QPU_GET_FIELD(merge, QPU_UNPACK)); 604 bool new_b_unpack = (QPU_GET_FIELD(b, QPU_UNPACK) != 605 QPU_GET_FIELD(merge, QPU_UNPACK)); 606 if (!(merge & QPU_PM)) { 607 /* Make sure we're not going to be putting a new 608 * a-file packing on either half. 609 */ 610 if (new_a_unpack && 611 QPU_GET_FIELD(a, QPU_RADDR_A) != QPU_R_NOP) 612 return 0; 613 614 if (new_b_unpack && 615 QPU_GET_FIELD(b, QPU_RADDR_A) != QPU_R_NOP) 616 return 0; 617 } else { 618 /* Make sure we're not going to be putting new r4 619 * unpack on either half. 620 */ 621 if (new_a_unpack && reads_r4(a)) 622 return 0; 623 624 if (new_b_unpack && reads_r4(b)) 625 return 0; 626 } 627 } 628 629 if (ok) 630 return merge; 631 else 632 return 0; 633 } 634 635 uint64_t 636 qpu_set_sig(uint64_t inst, uint32_t sig) 637 { 638 assert(QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_NONE); 639 return QPU_UPDATE_FIELD(inst, sig, QPU_SIG); 640 } 641 642 uint64_t 643 qpu_set_cond_add(uint64_t inst, uint32_t cond) 644 { 645 assert(QPU_GET_FIELD(inst, QPU_COND_ADD) == QPU_COND_ALWAYS); 646 return QPU_UPDATE_FIELD(inst, cond, QPU_COND_ADD); 647 } 648 649 uint64_t 650 qpu_set_cond_mul(uint64_t inst, uint32_t cond) 651 { 652 assert(QPU_GET_FIELD(inst, QPU_COND_MUL) == QPU_COND_ALWAYS); 653 return QPU_UPDATE_FIELD(inst, cond, QPU_COND_MUL); 654 } 655 656 bool 657 qpu_waddr_is_tlb(uint32_t waddr) 658 { 659 switch (waddr) { 660 case QPU_W_TLB_COLOR_ALL: 661 case QPU_W_TLB_COLOR_MS: 662 case QPU_W_TLB_Z: 663 return true; 664 default: 665 return false; 666 } 667 } 668 669 bool 670 qpu_inst_is_tlb(uint64_t inst) 671 { 672 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 673 674 return (qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_ADD)) || 675 qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_MUL)) || 676 sig == QPU_SIG_COLOR_LOAD || 677 sig == QPU_SIG_WAIT_FOR_SCOREBOARD); 678 } 679 680 /** 681 * Returns the small immediate value to be encoded in to the raddr b field if 682 * the argument can be represented as one, or ~0 otherwise. 683 */ 684 uint32_t 685 qpu_encode_small_immediate(uint32_t i) 686 { 687 if (i <= 15) 688 return i; 689 if ((int)i < 0 && (int)i >= -16) 690 return i + 32; 691 692 switch (i) { 693 case 0x3f800000: 694 return 32; 695 case 0x40000000: 696 return 33; 697 case 0x40800000: 698 return 34; 699 case 0x41000000: 700 return 35; 701 case 0x41800000: 702 return 36; 703 case 0x42000000: 704 return 37; 705 case 0x42800000: 706 return 38; 707 case 0x43000000: 708 return 39; 709 case 0x3b800000: 710 return 40; 711 case 0x3c000000: 712 return 41; 713 case 0x3c800000: 714 return 42; 715 case 0x3d000000: 716 return 43; 717 case 0x3d800000: 718 return 44; 719 case 0x3e000000: 720 return 45; 721 case 0x3e800000: 722 return 46; 723 case 0x3f000000: 724 return 47; 725 } 726 727 return ~0; 728 } 729 730 void 731 qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst) 732 { 733 if (c->qpu_inst_count >= c->qpu_inst_size) { 734 c->qpu_inst_size = MAX2(16, c->qpu_inst_size * 2); 735 c->qpu_insts = reralloc(c, c->qpu_insts, 736 uint64_t, c->qpu_inst_size); 737 } 738 c->qpu_insts[c->qpu_inst_count++] = inst; 739 } 740