1 /* 2 * Copyright (c) 2013 Rob Clark <robdclark (at) gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24 #ifndef INSTR_A3XX_H_ 25 #define INSTR_A3XX_H_ 26 27 #define PACKED __attribute__((__packed__)) 28 29 #include <stdint.h> 30 #include <assert.h> 31 32 /* size of largest OPC field of all the instruction categories: */ 33 #define NOPC_BITS 6 34 35 #define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc) 36 37 typedef enum { 38 /* category 0: */ 39 OPC_NOP = _OPC(0, 0), 40 OPC_BR = _OPC(0, 1), 41 OPC_JUMP = _OPC(0, 2), 42 OPC_CALL = _OPC(0, 3), 43 OPC_RET = _OPC(0, 4), 44 OPC_KILL = _OPC(0, 5), 45 OPC_END = _OPC(0, 6), 46 OPC_EMIT = _OPC(0, 7), 47 OPC_CUT = _OPC(0, 8), 48 OPC_CHMASK = _OPC(0, 9), 49 OPC_CHSH = _OPC(0, 10), 50 OPC_FLOW_REV = _OPC(0, 11), 51 52 /* category 1: */ 53 OPC_MOV = _OPC(1, 0), 54 55 /* category 2: */ 56 OPC_ADD_F = _OPC(2, 0), 57 OPC_MIN_F = _OPC(2, 1), 58 OPC_MAX_F = _OPC(2, 2), 59 OPC_MUL_F = _OPC(2, 3), 60 OPC_SIGN_F = _OPC(2, 4), 61 OPC_CMPS_F = _OPC(2, 5), 62 OPC_ABSNEG_F = _OPC(2, 6), 63 OPC_CMPV_F = _OPC(2, 7), 64 /* 8 - invalid */ 65 OPC_FLOOR_F = _OPC(2, 9), 66 OPC_CEIL_F = _OPC(2, 10), 67 OPC_RNDNE_F = _OPC(2, 11), 68 OPC_RNDAZ_F = _OPC(2, 12), 69 OPC_TRUNC_F = _OPC(2, 13), 70 /* 14-15 - invalid */ 71 OPC_ADD_U = _OPC(2, 16), 72 OPC_ADD_S = _OPC(2, 17), 73 OPC_SUB_U = _OPC(2, 18), 74 OPC_SUB_S = _OPC(2, 19), 75 OPC_CMPS_U = _OPC(2, 20), 76 OPC_CMPS_S = _OPC(2, 21), 77 OPC_MIN_U = _OPC(2, 22), 78 OPC_MIN_S = _OPC(2, 23), 79 OPC_MAX_U = _OPC(2, 24), 80 OPC_MAX_S = _OPC(2, 25), 81 OPC_ABSNEG_S = _OPC(2, 26), 82 /* 27 - invalid */ 83 OPC_AND_B = _OPC(2, 28), 84 OPC_OR_B = _OPC(2, 29), 85 OPC_NOT_B = _OPC(2, 30), 86 OPC_XOR_B = _OPC(2, 31), 87 /* 32 - invalid */ 88 OPC_CMPV_U = _OPC(2, 33), 89 OPC_CMPV_S = _OPC(2, 34), 90 /* 35-47 - invalid */ 91 OPC_MUL_U = _OPC(2, 48), 92 OPC_MUL_S = _OPC(2, 49), 93 OPC_MULL_U = _OPC(2, 50), 94 OPC_BFREV_B = _OPC(2, 51), 95 OPC_CLZ_S = _OPC(2, 52), 96 OPC_CLZ_B = _OPC(2, 53), 97 OPC_SHL_B = _OPC(2, 54), 98 OPC_SHR_B = _OPC(2, 55), 99 OPC_ASHR_B = _OPC(2, 56), 100 OPC_BARY_F = _OPC(2, 57), 101 OPC_MGEN_B = _OPC(2, 58), 102 OPC_GETBIT_B = _OPC(2, 59), 103 OPC_SETRM = _OPC(2, 60), 104 OPC_CBITS_B = _OPC(2, 61), 105 OPC_SHB = _OPC(2, 62), 106 OPC_MSAD = _OPC(2, 63), 107 108 /* category 3: */ 109 OPC_MAD_U16 = _OPC(3, 0), 110 OPC_MADSH_U16 = _OPC(3, 1), 111 OPC_MAD_S16 = _OPC(3, 2), 112 OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */ 113 OPC_MAD_U24 = _OPC(3, 4), 114 OPC_MAD_S24 = _OPC(3, 5), 115 OPC_MAD_F16 = _OPC(3, 6), 116 OPC_MAD_F32 = _OPC(3, 7), 117 OPC_SEL_B16 = _OPC(3, 8), 118 OPC_SEL_B32 = _OPC(3, 9), 119 OPC_SEL_S16 = _OPC(3, 10), 120 OPC_SEL_S32 = _OPC(3, 11), 121 OPC_SEL_F16 = _OPC(3, 12), 122 OPC_SEL_F32 = _OPC(3, 13), 123 OPC_SAD_S16 = _OPC(3, 14), 124 OPC_SAD_S32 = _OPC(3, 15), 125 126 /* category 4: */ 127 OPC_RCP = _OPC(4, 0), 128 OPC_RSQ = _OPC(4, 1), 129 OPC_LOG2 = _OPC(4, 2), 130 OPC_EXP2 = _OPC(4, 3), 131 OPC_SIN = _OPC(4, 4), 132 OPC_COS = _OPC(4, 5), 133 OPC_SQRT = _OPC(4, 6), 134 // 7-63 - invalid 135 136 /* category 5: */ 137 OPC_ISAM = _OPC(5, 0), 138 OPC_ISAML = _OPC(5, 1), 139 OPC_ISAMM = _OPC(5, 2), 140 OPC_SAM = _OPC(5, 3), 141 OPC_SAMB = _OPC(5, 4), 142 OPC_SAML = _OPC(5, 5), 143 OPC_SAMGQ = _OPC(5, 6), 144 OPC_GETLOD = _OPC(5, 7), 145 OPC_CONV = _OPC(5, 8), 146 OPC_CONVM = _OPC(5, 9), 147 OPC_GETSIZE = _OPC(5, 10), 148 OPC_GETBUF = _OPC(5, 11), 149 OPC_GETPOS = _OPC(5, 12), 150 OPC_GETINFO = _OPC(5, 13), 151 OPC_DSX = _OPC(5, 14), 152 OPC_DSY = _OPC(5, 15), 153 OPC_GATHER4R = _OPC(5, 16), 154 OPC_GATHER4G = _OPC(5, 17), 155 OPC_GATHER4B = _OPC(5, 18), 156 OPC_GATHER4A = _OPC(5, 19), 157 OPC_SAMGP0 = _OPC(5, 20), 158 OPC_SAMGP1 = _OPC(5, 21), 159 OPC_SAMGP2 = _OPC(5, 22), 160 OPC_SAMGP3 = _OPC(5, 23), 161 OPC_DSXPP_1 = _OPC(5, 24), 162 OPC_DSYPP_1 = _OPC(5, 25), 163 OPC_RGETPOS = _OPC(5, 26), 164 OPC_RGETINFO = _OPC(5, 27), 165 166 /* category 6: */ 167 OPC_LDG = _OPC(6, 0), /* load-global */ 168 OPC_LDL = _OPC(6, 1), 169 OPC_LDP = _OPC(6, 2), 170 OPC_STG = _OPC(6, 3), /* store-global */ 171 OPC_STL = _OPC(6, 4), 172 OPC_STP = _OPC(6, 5), 173 OPC_STI = _OPC(6, 6), 174 OPC_G2L = _OPC(6, 7), 175 OPC_L2G = _OPC(6, 8), 176 OPC_PREFETCH = _OPC(6, 9), 177 OPC_LDLW = _OPC(6, 10), 178 OPC_STLW = _OPC(6, 11), 179 OPC_RESFMT = _OPC(6, 14), 180 OPC_RESINFO = _OPC(6, 15), 181 OPC_ATOMIC_ADD = _OPC(6, 16), 182 OPC_ATOMIC_SUB = _OPC(6, 17), 183 OPC_ATOMIC_XCHG = _OPC(6, 18), 184 OPC_ATOMIC_INC = _OPC(6, 19), 185 OPC_ATOMIC_DEC = _OPC(6, 20), 186 OPC_ATOMIC_CMPXCHG = _OPC(6, 21), 187 OPC_ATOMIC_MIN = _OPC(6, 22), 188 OPC_ATOMIC_MAX = _OPC(6, 23), 189 OPC_ATOMIC_AND = _OPC(6, 24), 190 OPC_ATOMIC_OR = _OPC(6, 25), 191 OPC_ATOMIC_XOR = _OPC(6, 26), 192 OPC_LDGB = _OPC(6, 27), 193 OPC_STGB = _OPC(6, 28), 194 OPC_STIB = _OPC(6, 29), 195 OPC_LDC = _OPC(6, 30), 196 OPC_LDLV = _OPC(6, 31), 197 198 /* category 7: */ 199 OPC_BAR = _OPC(7, 0), 200 OPC_FENCE = _OPC(7, 1), 201 202 /* meta instructions (category -1): */ 203 /* placeholder instr to mark shader inputs: */ 204 OPC_META_INPUT = _OPC(-1, 0), 205 OPC_META_PHI = _OPC(-1, 1), 206 /* The "fan-in" and "fan-out" instructions are used for keeping 207 * track of instructions that write to multiple dst registers 208 * (fan-out) like texture sample instructions, or read multiple 209 * consecutive scalar registers (fan-in) (bary.f, texture samp) 210 */ 211 OPC_META_FO = _OPC(-1, 2), 212 OPC_META_FI = _OPC(-1, 3), 213 214 } opc_t; 215 216 #define opc_cat(opc) ((int)((opc) >> NOPC_BITS)) 217 #define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1))) 218 219 typedef enum { 220 TYPE_F16 = 0, 221 TYPE_F32 = 1, 222 TYPE_U16 = 2, 223 TYPE_U32 = 3, 224 TYPE_S16 = 4, 225 TYPE_S32 = 5, 226 TYPE_U8 = 6, 227 TYPE_S8 = 7, // XXX I assume? 228 } type_t; 229 230 static inline uint32_t type_size(type_t type) 231 { 232 switch (type) { 233 case TYPE_F32: 234 case TYPE_U32: 235 case TYPE_S32: 236 return 32; 237 case TYPE_F16: 238 case TYPE_U16: 239 case TYPE_S16: 240 return 16; 241 case TYPE_U8: 242 case TYPE_S8: 243 return 8; 244 default: 245 assert(0); /* invalid type */ 246 return 0; 247 } 248 } 249 250 static inline int type_float(type_t type) 251 { 252 return (type == TYPE_F32) || (type == TYPE_F16); 253 } 254 255 static inline int type_uint(type_t type) 256 { 257 return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8); 258 } 259 260 static inline int type_sint(type_t type) 261 { 262 return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8); 263 } 264 265 typedef union PACKED { 266 /* normal gpr or const src register: */ 267 struct PACKED { 268 uint32_t comp : 2; 269 uint32_t num : 10; 270 }; 271 /* for immediate val: */ 272 int32_t iim_val : 11; 273 /* to make compiler happy: */ 274 uint32_t dummy32; 275 uint32_t dummy10 : 10; 276 int32_t idummy10 : 10; 277 uint32_t dummy11 : 11; 278 uint32_t dummy12 : 12; 279 uint32_t dummy13 : 13; 280 uint32_t dummy8 : 8; 281 } reg_t; 282 283 /* special registers: */ 284 #define REG_A0 61 /* address register */ 285 #define REG_P0 62 /* predicate register */ 286 287 static inline int reg_special(reg_t reg) 288 { 289 return (reg.num == REG_A0) || (reg.num == REG_P0); 290 } 291 292 typedef struct PACKED { 293 /* dword0: */ 294 union PACKED { 295 struct PACKED { 296 int16_t immed : 16; 297 uint32_t dummy1 : 16; 298 } a3xx; 299 struct PACKED { 300 int32_t immed : 20; 301 uint32_t dummy1 : 12; 302 } a4xx; 303 struct PACKED { 304 uint32_t immed : 32; 305 } a5xx; 306 }; 307 308 /* dword1: */ 309 uint32_t dummy2 : 8; 310 uint32_t repeat : 3; 311 uint32_t dummy3 : 1; 312 uint32_t ss : 1; 313 uint32_t dummy4 : 7; 314 uint32_t inv : 1; 315 uint32_t comp : 2; 316 uint32_t opc : 4; 317 uint32_t jmp_tgt : 1; 318 uint32_t sync : 1; 319 uint32_t opc_cat : 3; 320 } instr_cat0_t; 321 322 typedef struct PACKED { 323 /* dword0: */ 324 union PACKED { 325 /* for normal src register: */ 326 struct PACKED { 327 uint32_t src : 11; 328 /* at least low bit of pad must be zero or it will 329 * look like a address relative src 330 */ 331 uint32_t pad : 21; 332 }; 333 /* for address relative: */ 334 struct PACKED { 335 int32_t off : 10; 336 uint32_t src_rel_c : 1; 337 uint32_t src_rel : 1; 338 uint32_t unknown : 20; 339 }; 340 /* for immediate: */ 341 int32_t iim_val; 342 uint32_t uim_val; 343 float fim_val; 344 }; 345 346 /* dword1: */ 347 uint32_t dst : 8; 348 uint32_t repeat : 3; 349 uint32_t src_r : 1; 350 uint32_t ss : 1; 351 uint32_t ul : 1; 352 uint32_t dst_type : 3; 353 uint32_t dst_rel : 1; 354 uint32_t src_type : 3; 355 uint32_t src_c : 1; 356 uint32_t src_im : 1; 357 uint32_t even : 1; 358 uint32_t pos_inf : 1; 359 uint32_t must_be_0 : 2; 360 uint32_t jmp_tgt : 1; 361 uint32_t sync : 1; 362 uint32_t opc_cat : 3; 363 } instr_cat1_t; 364 365 typedef struct PACKED { 366 /* dword0: */ 367 union PACKED { 368 struct PACKED { 369 uint32_t src1 : 11; 370 uint32_t must_be_zero1: 2; 371 uint32_t src1_im : 1; /* immediate */ 372 uint32_t src1_neg : 1; /* negate */ 373 uint32_t src1_abs : 1; /* absolute value */ 374 }; 375 struct PACKED { 376 uint32_t src1 : 10; 377 uint32_t src1_c : 1; /* relative-const */ 378 uint32_t src1_rel : 1; /* relative address */ 379 uint32_t must_be_zero : 1; 380 uint32_t dummy : 3; 381 } rel1; 382 struct PACKED { 383 uint32_t src1 : 12; 384 uint32_t src1_c : 1; /* const */ 385 uint32_t dummy : 3; 386 } c1; 387 }; 388 389 union PACKED { 390 struct PACKED { 391 uint32_t src2 : 11; 392 uint32_t must_be_zero2: 2; 393 uint32_t src2_im : 1; /* immediate */ 394 uint32_t src2_neg : 1; /* negate */ 395 uint32_t src2_abs : 1; /* absolute value */ 396 }; 397 struct PACKED { 398 uint32_t src2 : 10; 399 uint32_t src2_c : 1; /* relative-const */ 400 uint32_t src2_rel : 1; /* relative address */ 401 uint32_t must_be_zero : 1; 402 uint32_t dummy : 3; 403 } rel2; 404 struct PACKED { 405 uint32_t src2 : 12; 406 uint32_t src2_c : 1; /* const */ 407 uint32_t dummy : 3; 408 } c2; 409 }; 410 411 /* dword1: */ 412 uint32_t dst : 8; 413 uint32_t repeat : 3; 414 uint32_t src1_r : 1; 415 uint32_t ss : 1; 416 uint32_t ul : 1; /* dunno */ 417 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ 418 uint32_t ei : 1; 419 uint32_t cond : 3; 420 uint32_t src2_r : 1; 421 uint32_t full : 1; /* not half */ 422 uint32_t opc : 6; 423 uint32_t jmp_tgt : 1; 424 uint32_t sync : 1; 425 uint32_t opc_cat : 3; 426 } instr_cat2_t; 427 428 typedef struct PACKED { 429 /* dword0: */ 430 union PACKED { 431 struct PACKED { 432 uint32_t src1 : 11; 433 uint32_t must_be_zero1: 2; 434 uint32_t src2_c : 1; 435 uint32_t src1_neg : 1; 436 uint32_t src2_r : 1; 437 }; 438 struct PACKED { 439 uint32_t src1 : 10; 440 uint32_t src1_c : 1; 441 uint32_t src1_rel : 1; 442 uint32_t must_be_zero : 1; 443 uint32_t dummy : 3; 444 } rel1; 445 struct PACKED { 446 uint32_t src1 : 12; 447 uint32_t src1_c : 1; 448 uint32_t dummy : 3; 449 } c1; 450 }; 451 452 union PACKED { 453 struct PACKED { 454 uint32_t src3 : 11; 455 uint32_t must_be_zero2: 2; 456 uint32_t src3_r : 1; 457 uint32_t src2_neg : 1; 458 uint32_t src3_neg : 1; 459 }; 460 struct PACKED { 461 uint32_t src3 : 10; 462 uint32_t src3_c : 1; 463 uint32_t src3_rel : 1; 464 uint32_t must_be_zero : 1; 465 uint32_t dummy : 3; 466 } rel2; 467 struct PACKED { 468 uint32_t src3 : 12; 469 uint32_t src3_c : 1; 470 uint32_t dummy : 3; 471 } c2; 472 }; 473 474 /* dword1: */ 475 uint32_t dst : 8; 476 uint32_t repeat : 3; 477 uint32_t src1_r : 1; 478 uint32_t ss : 1; 479 uint32_t ul : 1; 480 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ 481 uint32_t src2 : 8; 482 uint32_t opc : 4; 483 uint32_t jmp_tgt : 1; 484 uint32_t sync : 1; 485 uint32_t opc_cat : 3; 486 } instr_cat3_t; 487 488 static inline bool instr_cat3_full(instr_cat3_t *cat3) 489 { 490 switch (_OPC(3, cat3->opc)) { 491 case OPC_MAD_F16: 492 case OPC_MAD_U16: 493 case OPC_MAD_S16: 494 case OPC_SEL_B16: 495 case OPC_SEL_S16: 496 case OPC_SEL_F16: 497 case OPC_SAD_S16: 498 case OPC_SAD_S32: // really?? 499 return false; 500 default: 501 return true; 502 } 503 } 504 505 typedef struct PACKED { 506 /* dword0: */ 507 union PACKED { 508 struct PACKED { 509 uint32_t src : 11; 510 uint32_t must_be_zero1: 2; 511 uint32_t src_im : 1; /* immediate */ 512 uint32_t src_neg : 1; /* negate */ 513 uint32_t src_abs : 1; /* absolute value */ 514 }; 515 struct PACKED { 516 uint32_t src : 10; 517 uint32_t src_c : 1; /* relative-const */ 518 uint32_t src_rel : 1; /* relative address */ 519 uint32_t must_be_zero : 1; 520 uint32_t dummy : 3; 521 } rel; 522 struct PACKED { 523 uint32_t src : 12; 524 uint32_t src_c : 1; /* const */ 525 uint32_t dummy : 3; 526 } c; 527 }; 528 uint32_t dummy1 : 16; /* seem to be ignored */ 529 530 /* dword1: */ 531 uint32_t dst : 8; 532 uint32_t repeat : 3; 533 uint32_t src_r : 1; 534 uint32_t ss : 1; 535 uint32_t ul : 1; 536 uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */ 537 uint32_t dummy2 : 5; /* seem to be ignored */ 538 uint32_t full : 1; /* not half */ 539 uint32_t opc : 6; 540 uint32_t jmp_tgt : 1; 541 uint32_t sync : 1; 542 uint32_t opc_cat : 3; 543 } instr_cat4_t; 544 545 typedef struct PACKED { 546 /* dword0: */ 547 union PACKED { 548 /* normal case: */ 549 struct PACKED { 550 uint32_t full : 1; /* not half */ 551 uint32_t src1 : 8; 552 uint32_t src2 : 8; 553 uint32_t dummy1 : 4; /* seem to be ignored */ 554 uint32_t samp : 4; 555 uint32_t tex : 7; 556 } norm; 557 /* s2en case: */ 558 struct PACKED { 559 uint32_t full : 1; /* not half */ 560 uint32_t src1 : 8; 561 uint32_t src2 : 11; 562 uint32_t dummy1 : 1; 563 uint32_t src3 : 8; 564 uint32_t dummy2 : 3; 565 } s2en; 566 /* same in either case: */ 567 // XXX I think, confirm this 568 struct PACKED { 569 uint32_t full : 1; /* not half */ 570 uint32_t src1 : 8; 571 uint32_t pad : 23; 572 }; 573 }; 574 575 /* dword1: */ 576 uint32_t dst : 8; 577 uint32_t wrmask : 4; /* write-mask */ 578 uint32_t type : 3; 579 uint32_t dummy2 : 1; /* seems to be ignored */ 580 uint32_t is_3d : 1; 581 582 uint32_t is_a : 1; 583 uint32_t is_s : 1; 584 uint32_t is_s2en : 1; 585 uint32_t is_o : 1; 586 uint32_t is_p : 1; 587 588 uint32_t opc : 5; 589 uint32_t jmp_tgt : 1; 590 uint32_t sync : 1; 591 uint32_t opc_cat : 3; 592 } instr_cat5_t; 593 594 /* dword0 encoding for src_off: [src1 + off], src2: */ 595 typedef struct PACKED { 596 /* dword0: */ 597 uint32_t mustbe1 : 1; 598 int32_t off : 13; 599 uint32_t src1 : 8; 600 uint32_t src1_im : 1; 601 uint32_t src2_im : 1; 602 uint32_t src2 : 8; 603 604 /* dword1: */ 605 uint32_t dword1; 606 } instr_cat6a_t; 607 608 /* dword0 encoding for !src_off: [src1], src2 */ 609 typedef struct PACKED { 610 /* dword0: */ 611 uint32_t mustbe0 : 1; 612 uint32_t src1 : 13; 613 uint32_t ignore0 : 8; 614 uint32_t src1_im : 1; 615 uint32_t src2_im : 1; 616 uint32_t src2 : 8; 617 618 /* dword1: */ 619 uint32_t dword1; 620 } instr_cat6b_t; 621 622 /* dword1 encoding for dst_off: */ 623 typedef struct PACKED { 624 /* dword0: */ 625 uint32_t dword0; 626 627 /* note: there is some weird stuff going on where sometimes 628 * cat6->a.off is involved.. but that seems like a bug in 629 * the blob, since it is used even if !cat6->src_off 630 * It would make sense for there to be some more bits to 631 * bring us to 11 bits worth of offset, but not sure.. 632 */ 633 int32_t off : 8; 634 uint32_t mustbe1 : 1; 635 uint32_t dst : 8; 636 uint32_t pad1 : 15; 637 } instr_cat6c_t; 638 639 /* dword1 encoding for !dst_off: */ 640 typedef struct PACKED { 641 /* dword0: */ 642 uint32_t dword0; 643 644 uint32_t dst : 8; 645 uint32_t mustbe0 : 1; 646 uint32_t idx : 8; 647 uint32_t pad0 : 15; 648 } instr_cat6d_t; 649 650 /* ldgb and atomics.. 651 * 652 * ldgb: pad0=0, pad3=1 653 * atomic .g: pad0=1, pad3=1 654 * .l: pad0=1, pad3=0 655 */ 656 typedef struct PACKED { 657 /* dword0: */ 658 uint32_t pad0 : 1; 659 uint32_t src3 : 8; 660 uint32_t d : 2; 661 uint32_t typed : 1; 662 uint32_t type_size : 2; 663 uint32_t src1 : 8; 664 uint32_t src1_im : 1; 665 uint32_t src2_im : 1; 666 uint32_t src2 : 8; 667 668 /* dword1: */ 669 uint32_t dst : 8; 670 uint32_t mustbe0 : 1; 671 uint32_t src_ssbo : 8; 672 uint32_t pad2 : 3; // type 673 uint32_t g : 1; 674 uint32_t pad3 : 1; 675 uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat 676 } instr_cat6ldgb_t; 677 678 /* stgb, pad0=0, pad3=2 679 */ 680 typedef struct PACKED { 681 /* dword0: */ 682 uint32_t mustbe1 : 1; // ??? 683 uint32_t src1 : 8; 684 uint32_t d : 2; 685 uint32_t typed : 1; 686 uint32_t type_size : 2; 687 uint32_t pad0 : 9; 688 uint32_t src2_im : 1; 689 uint32_t src2 : 8; 690 691 /* dword1: */ 692 uint32_t src3 : 8; 693 uint32_t src3_im : 1; 694 uint32_t dst_ssbo : 8; 695 uint32_t pad2 : 3; // type 696 uint32_t pad3 : 2; 697 uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat 698 } instr_cat6stgb_t; 699 700 typedef union PACKED { 701 instr_cat6a_t a; 702 instr_cat6b_t b; 703 instr_cat6c_t c; 704 instr_cat6d_t d; 705 instr_cat6ldgb_t ldgb; 706 instr_cat6stgb_t stgb; 707 struct PACKED { 708 /* dword0: */ 709 uint32_t src_off : 1; 710 uint32_t pad1 : 31; 711 712 /* dword1: */ 713 uint32_t pad2 : 8; 714 uint32_t dst_off : 1; 715 uint32_t pad3 : 8; 716 uint32_t type : 3; 717 uint32_t g : 1; /* or in some cases it means dst immed */ 718 uint32_t pad4 : 1; 719 uint32_t opc : 5; 720 uint32_t jmp_tgt : 1; 721 uint32_t sync : 1; 722 uint32_t opc_cat : 3; 723 }; 724 } instr_cat6_t; 725 726 typedef struct PACKED { 727 /* dword0: */ 728 uint32_t pad1 : 32; 729 730 /* dword1: */ 731 uint32_t pad2 : 12; 732 uint32_t ss : 1; /* maybe in the encoding, but blob only uses (sy) */ 733 uint32_t pad3 : 6; 734 uint32_t w : 1; /* write */ 735 uint32_t r : 1; /* read */ 736 uint32_t l : 1; /* local */ 737 uint32_t g : 1; /* global */ 738 uint32_t opc : 4; /* presumed, but only a couple known OPCs */ 739 uint32_t jmp_tgt : 1; /* (jp) */ 740 uint32_t sync : 1; /* (sy) */ 741 uint32_t opc_cat : 3; 742 } instr_cat7_t; 743 744 typedef union PACKED { 745 instr_cat0_t cat0; 746 instr_cat1_t cat1; 747 instr_cat2_t cat2; 748 instr_cat3_t cat3; 749 instr_cat4_t cat4; 750 instr_cat5_t cat5; 751 instr_cat6_t cat6; 752 instr_cat7_t cat7; 753 struct PACKED { 754 /* dword0: */ 755 uint64_t pad1 : 40; 756 uint32_t repeat : 3; /* cat0-cat4 */ 757 uint32_t pad2 : 1; 758 uint32_t ss : 1; /* cat1-cat4 (cat0??) and cat7 (?) */ 759 uint32_t ul : 1; /* cat2-cat4 (and cat1 in blob.. which may be bug??) */ 760 uint32_t pad3 : 13; 761 uint32_t jmp_tgt : 1; 762 uint32_t sync : 1; 763 uint32_t opc_cat : 3; 764 765 }; 766 } instr_t; 767 768 static inline uint32_t instr_opc(instr_t *instr) 769 { 770 switch (instr->opc_cat) { 771 case 0: return instr->cat0.opc; 772 case 1: return 0; 773 case 2: return instr->cat2.opc; 774 case 3: return instr->cat3.opc; 775 case 4: return instr->cat4.opc; 776 case 5: return instr->cat5.opc; 777 case 6: return instr->cat6.opc; 778 case 7: return instr->cat7.opc; 779 default: return 0; 780 } 781 } 782 783 static inline bool is_mad(opc_t opc) 784 { 785 switch (opc) { 786 case OPC_MAD_U16: 787 case OPC_MAD_S16: 788 case OPC_MAD_U24: 789 case OPC_MAD_S24: 790 case OPC_MAD_F16: 791 case OPC_MAD_F32: 792 return true; 793 default: 794 return false; 795 } 796 } 797 798 static inline bool is_madsh(opc_t opc) 799 { 800 switch (opc) { 801 case OPC_MADSH_U16: 802 case OPC_MADSH_M16: 803 return true; 804 default: 805 return false; 806 } 807 } 808 809 static inline bool is_atomic(opc_t opc) 810 { 811 switch (opc) { 812 case OPC_ATOMIC_ADD: 813 case OPC_ATOMIC_SUB: 814 case OPC_ATOMIC_XCHG: 815 case OPC_ATOMIC_INC: 816 case OPC_ATOMIC_DEC: 817 case OPC_ATOMIC_CMPXCHG: 818 case OPC_ATOMIC_MIN: 819 case OPC_ATOMIC_MAX: 820 case OPC_ATOMIC_AND: 821 case OPC_ATOMIC_OR: 822 case OPC_ATOMIC_XOR: 823 return true; 824 default: 825 return false; 826 } 827 } 828 829 static inline bool is_ssbo(opc_t opc) 830 { 831 switch (opc) { 832 case OPC_RESFMT: 833 case OPC_RESINFO: 834 case OPC_LDGB: 835 case OPC_STGB: 836 case OPC_STIB: 837 return true; 838 default: 839 return false; 840 } 841 } 842 843 #endif /* INSTR_A3XX_H_ */ 844