Home | History | Annotate | Download | only in ir3
      1 /*
      2  * Copyright (c) 2013 Rob Clark <robdclark (at) gmail.com>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     21  * SOFTWARE.
     22  */
     23 
     24 #ifndef INSTR_A3XX_H_
     25 #define INSTR_A3XX_H_
     26 
     27 #define PACKED __attribute__((__packed__))
     28 
     29 #include <stdint.h>
     30 #include <assert.h>
     31 
     32 /* size of largest OPC field of all the instruction categories: */
     33 #define NOPC_BITS 6
     34 
     35 #define _OPC(cat, opc)   (((cat) << NOPC_BITS) | opc)
     36 
     37 typedef enum {
     38 	/* category 0: */
     39 	OPC_NOP             = _OPC(0, 0),
     40 	OPC_BR              = _OPC(0, 1),
     41 	OPC_JUMP            = _OPC(0, 2),
     42 	OPC_CALL            = _OPC(0, 3),
     43 	OPC_RET             = _OPC(0, 4),
     44 	OPC_KILL            = _OPC(0, 5),
     45 	OPC_END             = _OPC(0, 6),
     46 	OPC_EMIT            = _OPC(0, 7),
     47 	OPC_CUT             = _OPC(0, 8),
     48 	OPC_CHMASK          = _OPC(0, 9),
     49 	OPC_CHSH            = _OPC(0, 10),
     50 	OPC_FLOW_REV        = _OPC(0, 11),
     51 
     52 	/* category 1: */
     53 	OPC_MOV             = _OPC(1, 0),
     54 
     55 	/* category 2: */
     56 	OPC_ADD_F           = _OPC(2, 0),
     57 	OPC_MIN_F           = _OPC(2, 1),
     58 	OPC_MAX_F           = _OPC(2, 2),
     59 	OPC_MUL_F           = _OPC(2, 3),
     60 	OPC_SIGN_F          = _OPC(2, 4),
     61 	OPC_CMPS_F          = _OPC(2, 5),
     62 	OPC_ABSNEG_F        = _OPC(2, 6),
     63 	OPC_CMPV_F          = _OPC(2, 7),
     64 	/* 8 - invalid */
     65 	OPC_FLOOR_F         = _OPC(2, 9),
     66 	OPC_CEIL_F          = _OPC(2, 10),
     67 	OPC_RNDNE_F         = _OPC(2, 11),
     68 	OPC_RNDAZ_F         = _OPC(2, 12),
     69 	OPC_TRUNC_F         = _OPC(2, 13),
     70 	/* 14-15 - invalid */
     71 	OPC_ADD_U           = _OPC(2, 16),
     72 	OPC_ADD_S           = _OPC(2, 17),
     73 	OPC_SUB_U           = _OPC(2, 18),
     74 	OPC_SUB_S           = _OPC(2, 19),
     75 	OPC_CMPS_U          = _OPC(2, 20),
     76 	OPC_CMPS_S          = _OPC(2, 21),
     77 	OPC_MIN_U           = _OPC(2, 22),
     78 	OPC_MIN_S           = _OPC(2, 23),
     79 	OPC_MAX_U           = _OPC(2, 24),
     80 	OPC_MAX_S           = _OPC(2, 25),
     81 	OPC_ABSNEG_S        = _OPC(2, 26),
     82 	/* 27 - invalid */
     83 	OPC_AND_B           = _OPC(2, 28),
     84 	OPC_OR_B            = _OPC(2, 29),
     85 	OPC_NOT_B           = _OPC(2, 30),
     86 	OPC_XOR_B           = _OPC(2, 31),
     87 	/* 32 - invalid */
     88 	OPC_CMPV_U          = _OPC(2, 33),
     89 	OPC_CMPV_S          = _OPC(2, 34),
     90 	/* 35-47 - invalid */
     91 	OPC_MUL_U           = _OPC(2, 48),
     92 	OPC_MUL_S           = _OPC(2, 49),
     93 	OPC_MULL_U          = _OPC(2, 50),
     94 	OPC_BFREV_B         = _OPC(2, 51),
     95 	OPC_CLZ_S           = _OPC(2, 52),
     96 	OPC_CLZ_B           = _OPC(2, 53),
     97 	OPC_SHL_B           = _OPC(2, 54),
     98 	OPC_SHR_B           = _OPC(2, 55),
     99 	OPC_ASHR_B          = _OPC(2, 56),
    100 	OPC_BARY_F          = _OPC(2, 57),
    101 	OPC_MGEN_B          = _OPC(2, 58),
    102 	OPC_GETBIT_B        = _OPC(2, 59),
    103 	OPC_SETRM           = _OPC(2, 60),
    104 	OPC_CBITS_B         = _OPC(2, 61),
    105 	OPC_SHB             = _OPC(2, 62),
    106 	OPC_MSAD            = _OPC(2, 63),
    107 
    108 	/* category 3: */
    109 	OPC_MAD_U16         = _OPC(3, 0),
    110 	OPC_MADSH_U16       = _OPC(3, 1),
    111 	OPC_MAD_S16         = _OPC(3, 2),
    112 	OPC_MADSH_M16       = _OPC(3, 3),   /* should this be .s16? */
    113 	OPC_MAD_U24         = _OPC(3, 4),
    114 	OPC_MAD_S24         = _OPC(3, 5),
    115 	OPC_MAD_F16         = _OPC(3, 6),
    116 	OPC_MAD_F32         = _OPC(3, 7),
    117 	OPC_SEL_B16         = _OPC(3, 8),
    118 	OPC_SEL_B32         = _OPC(3, 9),
    119 	OPC_SEL_S16         = _OPC(3, 10),
    120 	OPC_SEL_S32         = _OPC(3, 11),
    121 	OPC_SEL_F16         = _OPC(3, 12),
    122 	OPC_SEL_F32         = _OPC(3, 13),
    123 	OPC_SAD_S16         = _OPC(3, 14),
    124 	OPC_SAD_S32         = _OPC(3, 15),
    125 
    126 	/* category 4: */
    127 	OPC_RCP             = _OPC(4, 0),
    128 	OPC_RSQ             = _OPC(4, 1),
    129 	OPC_LOG2            = _OPC(4, 2),
    130 	OPC_EXP2            = _OPC(4, 3),
    131 	OPC_SIN             = _OPC(4, 4),
    132 	OPC_COS             = _OPC(4, 5),
    133 	OPC_SQRT            = _OPC(4, 6),
    134 	// 7-63 - invalid
    135 
    136 	/* category 5: */
    137 	OPC_ISAM            = _OPC(5, 0),
    138 	OPC_ISAML           = _OPC(5, 1),
    139 	OPC_ISAMM           = _OPC(5, 2),
    140 	OPC_SAM             = _OPC(5, 3),
    141 	OPC_SAMB            = _OPC(5, 4),
    142 	OPC_SAML            = _OPC(5, 5),
    143 	OPC_SAMGQ           = _OPC(5, 6),
    144 	OPC_GETLOD          = _OPC(5, 7),
    145 	OPC_CONV            = _OPC(5, 8),
    146 	OPC_CONVM           = _OPC(5, 9),
    147 	OPC_GETSIZE         = _OPC(5, 10),
    148 	OPC_GETBUF          = _OPC(5, 11),
    149 	OPC_GETPOS          = _OPC(5, 12),
    150 	OPC_GETINFO         = _OPC(5, 13),
    151 	OPC_DSX             = _OPC(5, 14),
    152 	OPC_DSY             = _OPC(5, 15),
    153 	OPC_GATHER4R        = _OPC(5, 16),
    154 	OPC_GATHER4G        = _OPC(5, 17),
    155 	OPC_GATHER4B        = _OPC(5, 18),
    156 	OPC_GATHER4A        = _OPC(5, 19),
    157 	OPC_SAMGP0          = _OPC(5, 20),
    158 	OPC_SAMGP1          = _OPC(5, 21),
    159 	OPC_SAMGP2          = _OPC(5, 22),
    160 	OPC_SAMGP3          = _OPC(5, 23),
    161 	OPC_DSXPP_1         = _OPC(5, 24),
    162 	OPC_DSYPP_1         = _OPC(5, 25),
    163 	OPC_RGETPOS         = _OPC(5, 26),
    164 	OPC_RGETINFO        = _OPC(5, 27),
    165 
    166 	/* category 6: */
    167 	OPC_LDG             = _OPC(6, 0),        /* load-global */
    168 	OPC_LDL             = _OPC(6, 1),
    169 	OPC_LDP             = _OPC(6, 2),
    170 	OPC_STG             = _OPC(6, 3),        /* store-global */
    171 	OPC_STL             = _OPC(6, 4),
    172 	OPC_STP             = _OPC(6, 5),
    173 	OPC_STI             = _OPC(6, 6),
    174 	OPC_G2L             = _OPC(6, 7),
    175 	OPC_L2G             = _OPC(6, 8),
    176 	OPC_PREFETCH        = _OPC(6, 9),
    177 	OPC_LDLW            = _OPC(6, 10),
    178 	OPC_STLW            = _OPC(6, 11),
    179 	OPC_RESFMT          = _OPC(6, 14),
    180 	OPC_RESINFO         = _OPC(6, 15),
    181 	OPC_ATOMIC_ADD      = _OPC(6, 16),
    182 	OPC_ATOMIC_SUB      = _OPC(6, 17),
    183 	OPC_ATOMIC_XCHG     = _OPC(6, 18),
    184 	OPC_ATOMIC_INC      = _OPC(6, 19),
    185 	OPC_ATOMIC_DEC      = _OPC(6, 20),
    186 	OPC_ATOMIC_CMPXCHG  = _OPC(6, 21),
    187 	OPC_ATOMIC_MIN      = _OPC(6, 22),
    188 	OPC_ATOMIC_MAX      = _OPC(6, 23),
    189 	OPC_ATOMIC_AND      = _OPC(6, 24),
    190 	OPC_ATOMIC_OR       = _OPC(6, 25),
    191 	OPC_ATOMIC_XOR      = _OPC(6, 26),
    192 	OPC_LDGB            = _OPC(6, 27),
    193 	OPC_STGB            = _OPC(6, 28),
    194 	OPC_STIB            = _OPC(6, 29),
    195 	OPC_LDC             = _OPC(6, 30),
    196 	OPC_LDLV            = _OPC(6, 31),
    197 
    198 	/* category 7: */
    199 	OPC_BAR             = _OPC(7, 0),
    200 	OPC_FENCE           = _OPC(7, 1),
    201 
    202 	/* meta instructions (category -1): */
    203 	/* placeholder instr to mark shader inputs: */
    204 	OPC_META_INPUT      = _OPC(-1, 0),
    205 	OPC_META_PHI        = _OPC(-1, 1),
    206 	/* The "fan-in" and "fan-out" instructions are used for keeping
    207 	 * track of instructions that write to multiple dst registers
    208 	 * (fan-out) like texture sample instructions, or read multiple
    209 	 * consecutive scalar registers (fan-in) (bary.f, texture samp)
    210 	 */
    211 	OPC_META_FO         = _OPC(-1, 2),
    212 	OPC_META_FI         = _OPC(-1, 3),
    213 
    214 } opc_t;
    215 
    216 #define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
    217 #define opc_op(opc)  ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
    218 
    219 typedef enum {
    220 	TYPE_F16 = 0,
    221 	TYPE_F32 = 1,
    222 	TYPE_U16 = 2,
    223 	TYPE_U32 = 3,
    224 	TYPE_S16 = 4,
    225 	TYPE_S32 = 5,
    226 	TYPE_U8  = 6,
    227 	TYPE_S8  = 7,  // XXX I assume?
    228 } type_t;
    229 
    230 static inline uint32_t type_size(type_t type)
    231 {
    232 	switch (type) {
    233 	case TYPE_F32:
    234 	case TYPE_U32:
    235 	case TYPE_S32:
    236 		return 32;
    237 	case TYPE_F16:
    238 	case TYPE_U16:
    239 	case TYPE_S16:
    240 		return 16;
    241 	case TYPE_U8:
    242 	case TYPE_S8:
    243 		return 8;
    244 	default:
    245 		assert(0); /* invalid type */
    246 		return 0;
    247 	}
    248 }
    249 
    250 static inline int type_float(type_t type)
    251 {
    252 	return (type == TYPE_F32) || (type == TYPE_F16);
    253 }
    254 
    255 static inline int type_uint(type_t type)
    256 {
    257 	return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
    258 }
    259 
    260 static inline int type_sint(type_t type)
    261 {
    262 	return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
    263 }
    264 
    265 typedef union PACKED {
    266 	/* normal gpr or const src register: */
    267 	struct PACKED {
    268 		uint32_t comp  : 2;
    269 		uint32_t num   : 10;
    270 	};
    271 	/* for immediate val: */
    272 	int32_t  iim_val   : 11;
    273 	/* to make compiler happy: */
    274 	uint32_t dummy32;
    275 	uint32_t dummy10   : 10;
    276 	int32_t  idummy10  : 10;
    277 	uint32_t dummy11   : 11;
    278 	uint32_t dummy12   : 12;
    279 	uint32_t dummy13   : 13;
    280 	uint32_t dummy8    : 8;
    281 } reg_t;
    282 
    283 /* special registers: */
    284 #define REG_A0 61       /* address register */
    285 #define REG_P0 62       /* predicate register */
    286 
    287 static inline int reg_special(reg_t reg)
    288 {
    289 	return (reg.num == REG_A0) || (reg.num == REG_P0);
    290 }
    291 
    292 typedef struct PACKED {
    293 	/* dword0: */
    294 	union PACKED {
    295 		struct PACKED {
    296 			int16_t  immed    : 16;
    297 			uint32_t dummy1   : 16;
    298 		} a3xx;
    299 		struct PACKED {
    300 			int32_t  immed    : 20;
    301 			uint32_t dummy1   : 12;
    302 		} a4xx;
    303 		struct PACKED {
    304 			uint32_t immed    : 32;
    305 		} a5xx;
    306 	};
    307 
    308 	/* dword1: */
    309 	uint32_t dummy2   : 8;
    310 	uint32_t repeat   : 3;
    311 	uint32_t dummy3   : 1;
    312 	uint32_t ss       : 1;
    313 	uint32_t dummy4   : 7;
    314 	uint32_t inv      : 1;
    315 	uint32_t comp     : 2;
    316 	uint32_t opc      : 4;
    317 	uint32_t jmp_tgt  : 1;
    318 	uint32_t sync     : 1;
    319 	uint32_t opc_cat  : 3;
    320 } instr_cat0_t;
    321 
    322 typedef struct PACKED {
    323 	/* dword0: */
    324 	union PACKED {
    325 		/* for normal src register: */
    326 		struct PACKED {
    327 			uint32_t src : 11;
    328 			/* at least low bit of pad must be zero or it will
    329 			 * look like a address relative src
    330 			 */
    331 			uint32_t pad : 21;
    332 		};
    333 		/* for address relative: */
    334 		struct PACKED {
    335 			int32_t  off : 10;
    336 			uint32_t src_rel_c : 1;
    337 			uint32_t src_rel : 1;
    338 			uint32_t unknown : 20;
    339 		};
    340 		/* for immediate: */
    341 		int32_t  iim_val;
    342 		uint32_t uim_val;
    343 		float    fim_val;
    344 	};
    345 
    346 	/* dword1: */
    347 	uint32_t dst        : 8;
    348 	uint32_t repeat     : 3;
    349 	uint32_t src_r      : 1;
    350 	uint32_t ss         : 1;
    351 	uint32_t ul         : 1;
    352 	uint32_t dst_type   : 3;
    353 	uint32_t dst_rel    : 1;
    354 	uint32_t src_type   : 3;
    355 	uint32_t src_c      : 1;
    356 	uint32_t src_im     : 1;
    357 	uint32_t even       : 1;
    358 	uint32_t pos_inf    : 1;
    359 	uint32_t must_be_0  : 2;
    360 	uint32_t jmp_tgt    : 1;
    361 	uint32_t sync       : 1;
    362 	uint32_t opc_cat    : 3;
    363 } instr_cat1_t;
    364 
    365 typedef struct PACKED {
    366 	/* dword0: */
    367 	union PACKED {
    368 		struct PACKED {
    369 			uint32_t src1         : 11;
    370 			uint32_t must_be_zero1: 2;
    371 			uint32_t src1_im      : 1;   /* immediate */
    372 			uint32_t src1_neg     : 1;   /* negate */
    373 			uint32_t src1_abs     : 1;   /* absolute value */
    374 		};
    375 		struct PACKED {
    376 			uint32_t src1         : 10;
    377 			uint32_t src1_c       : 1;   /* relative-const */
    378 			uint32_t src1_rel     : 1;   /* relative address */
    379 			uint32_t must_be_zero : 1;
    380 			uint32_t dummy        : 3;
    381 		} rel1;
    382 		struct PACKED {
    383 			uint32_t src1         : 12;
    384 			uint32_t src1_c       : 1;   /* const */
    385 			uint32_t dummy        : 3;
    386 		} c1;
    387 	};
    388 
    389 	union PACKED {
    390 		struct PACKED {
    391 			uint32_t src2         : 11;
    392 			uint32_t must_be_zero2: 2;
    393 			uint32_t src2_im      : 1;   /* immediate */
    394 			uint32_t src2_neg     : 1;   /* negate */
    395 			uint32_t src2_abs     : 1;   /* absolute value */
    396 		};
    397 		struct PACKED {
    398 			uint32_t src2         : 10;
    399 			uint32_t src2_c       : 1;   /* relative-const */
    400 			uint32_t src2_rel     : 1;   /* relative address */
    401 			uint32_t must_be_zero : 1;
    402 			uint32_t dummy        : 3;
    403 		} rel2;
    404 		struct PACKED {
    405 			uint32_t src2         : 12;
    406 			uint32_t src2_c       : 1;   /* const */
    407 			uint32_t dummy        : 3;
    408 		} c2;
    409 	};
    410 
    411 	/* dword1: */
    412 	uint32_t dst      : 8;
    413 	uint32_t repeat   : 3;
    414 	uint32_t src1_r   : 1;
    415 	uint32_t ss       : 1;
    416 	uint32_t ul       : 1;   /* dunno */
    417 	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
    418 	uint32_t ei       : 1;
    419 	uint32_t cond     : 3;
    420 	uint32_t src2_r   : 1;
    421 	uint32_t full     : 1;   /* not half */
    422 	uint32_t opc      : 6;
    423 	uint32_t jmp_tgt  : 1;
    424 	uint32_t sync     : 1;
    425 	uint32_t opc_cat  : 3;
    426 } instr_cat2_t;
    427 
    428 typedef struct PACKED {
    429 	/* dword0: */
    430 	union PACKED {
    431 		struct PACKED {
    432 			uint32_t src1         : 11;
    433 			uint32_t must_be_zero1: 2;
    434 			uint32_t src2_c       : 1;
    435 			uint32_t src1_neg     : 1;
    436 			uint32_t src2_r       : 1;
    437 		};
    438 		struct PACKED {
    439 			uint32_t src1         : 10;
    440 			uint32_t src1_c       : 1;
    441 			uint32_t src1_rel     : 1;
    442 			uint32_t must_be_zero : 1;
    443 			uint32_t dummy        : 3;
    444 		} rel1;
    445 		struct PACKED {
    446 			uint32_t src1         : 12;
    447 			uint32_t src1_c       : 1;
    448 			uint32_t dummy        : 3;
    449 		} c1;
    450 	};
    451 
    452 	union PACKED {
    453 		struct PACKED {
    454 			uint32_t src3         : 11;
    455 			uint32_t must_be_zero2: 2;
    456 			uint32_t src3_r       : 1;
    457 			uint32_t src2_neg     : 1;
    458 			uint32_t src3_neg     : 1;
    459 		};
    460 		struct PACKED {
    461 			uint32_t src3         : 10;
    462 			uint32_t src3_c       : 1;
    463 			uint32_t src3_rel     : 1;
    464 			uint32_t must_be_zero : 1;
    465 			uint32_t dummy        : 3;
    466 		} rel2;
    467 		struct PACKED {
    468 			uint32_t src3         : 12;
    469 			uint32_t src3_c       : 1;
    470 			uint32_t dummy        : 3;
    471 		} c2;
    472 	};
    473 
    474 	/* dword1: */
    475 	uint32_t dst      : 8;
    476 	uint32_t repeat   : 3;
    477 	uint32_t src1_r   : 1;
    478 	uint32_t ss       : 1;
    479 	uint32_t ul       : 1;
    480 	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
    481 	uint32_t src2     : 8;
    482 	uint32_t opc      : 4;
    483 	uint32_t jmp_tgt  : 1;
    484 	uint32_t sync     : 1;
    485 	uint32_t opc_cat  : 3;
    486 } instr_cat3_t;
    487 
    488 static inline bool instr_cat3_full(instr_cat3_t *cat3)
    489 {
    490 	switch (_OPC(3, cat3->opc)) {
    491 	case OPC_MAD_F16:
    492 	case OPC_MAD_U16:
    493 	case OPC_MAD_S16:
    494 	case OPC_SEL_B16:
    495 	case OPC_SEL_S16:
    496 	case OPC_SEL_F16:
    497 	case OPC_SAD_S16:
    498 	case OPC_SAD_S32:  // really??
    499 		return false;
    500 	default:
    501 		return true;
    502 	}
    503 }
    504 
    505 typedef struct PACKED {
    506 	/* dword0: */
    507 	union PACKED {
    508 		struct PACKED {
    509 			uint32_t src          : 11;
    510 			uint32_t must_be_zero1: 2;
    511 			uint32_t src_im       : 1;   /* immediate */
    512 			uint32_t src_neg      : 1;   /* negate */
    513 			uint32_t src_abs      : 1;   /* absolute value */
    514 		};
    515 		struct PACKED {
    516 			uint32_t src          : 10;
    517 			uint32_t src_c        : 1;   /* relative-const */
    518 			uint32_t src_rel      : 1;   /* relative address */
    519 			uint32_t must_be_zero : 1;
    520 			uint32_t dummy        : 3;
    521 		} rel;
    522 		struct PACKED {
    523 			uint32_t src          : 12;
    524 			uint32_t src_c        : 1;   /* const */
    525 			uint32_t dummy        : 3;
    526 		} c;
    527 	};
    528 	uint32_t dummy1   : 16;  /* seem to be ignored */
    529 
    530 	/* dword1: */
    531 	uint32_t dst      : 8;
    532 	uint32_t repeat   : 3;
    533 	uint32_t src_r    : 1;
    534 	uint32_t ss       : 1;
    535 	uint32_t ul       : 1;
    536 	uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
    537 	uint32_t dummy2   : 5;   /* seem to be ignored */
    538 	uint32_t full     : 1;   /* not half */
    539 	uint32_t opc      : 6;
    540 	uint32_t jmp_tgt  : 1;
    541 	uint32_t sync     : 1;
    542 	uint32_t opc_cat  : 3;
    543 } instr_cat4_t;
    544 
    545 typedef struct PACKED {
    546 	/* dword0: */
    547 	union PACKED {
    548 		/* normal case: */
    549 		struct PACKED {
    550 			uint32_t full     : 1;   /* not half */
    551 			uint32_t src1     : 8;
    552 			uint32_t src2     : 8;
    553 			uint32_t dummy1   : 4;   /* seem to be ignored */
    554 			uint32_t samp     : 4;
    555 			uint32_t tex      : 7;
    556 		} norm;
    557 		/* s2en case: */
    558 		struct PACKED {
    559 			uint32_t full     : 1;   /* not half */
    560 			uint32_t src1     : 8;
    561 			uint32_t src2     : 11;
    562 			uint32_t dummy1   : 1;
    563 			uint32_t src3     : 8;
    564 			uint32_t dummy2   : 3;
    565 		} s2en;
    566 		/* same in either case: */
    567 		// XXX I think, confirm this
    568 		struct PACKED {
    569 			uint32_t full     : 1;   /* not half */
    570 			uint32_t src1     : 8;
    571 			uint32_t pad      : 23;
    572 		};
    573 	};
    574 
    575 	/* dword1: */
    576 	uint32_t dst      : 8;
    577 	uint32_t wrmask   : 4;   /* write-mask */
    578 	uint32_t type     : 3;
    579 	uint32_t dummy2   : 1;   /* seems to be ignored */
    580 	uint32_t is_3d    : 1;
    581 
    582 	uint32_t is_a     : 1;
    583 	uint32_t is_s     : 1;
    584 	uint32_t is_s2en  : 1;
    585 	uint32_t is_o     : 1;
    586 	uint32_t is_p     : 1;
    587 
    588 	uint32_t opc      : 5;
    589 	uint32_t jmp_tgt  : 1;
    590 	uint32_t sync     : 1;
    591 	uint32_t opc_cat  : 3;
    592 } instr_cat5_t;
    593 
    594 /* dword0 encoding for src_off: [src1 + off], src2: */
    595 typedef struct PACKED {
    596 	/* dword0: */
    597 	uint32_t mustbe1  : 1;
    598 	int32_t  off      : 13;
    599 	uint32_t src1     : 8;
    600 	uint32_t src1_im  : 1;
    601 	uint32_t src2_im  : 1;
    602 	uint32_t src2     : 8;
    603 
    604 	/* dword1: */
    605 	uint32_t dword1;
    606 } instr_cat6a_t;
    607 
    608 /* dword0 encoding for !src_off: [src1], src2 */
    609 typedef struct PACKED {
    610 	/* dword0: */
    611 	uint32_t mustbe0  : 1;
    612 	uint32_t src1     : 13;
    613 	uint32_t ignore0  : 8;
    614 	uint32_t src1_im  : 1;
    615 	uint32_t src2_im  : 1;
    616 	uint32_t src2     : 8;
    617 
    618 	/* dword1: */
    619 	uint32_t dword1;
    620 } instr_cat6b_t;
    621 
    622 /* dword1 encoding for dst_off: */
    623 typedef struct PACKED {
    624 	/* dword0: */
    625 	uint32_t dword0;
    626 
    627 	/* note: there is some weird stuff going on where sometimes
    628 	 * cat6->a.off is involved.. but that seems like a bug in
    629 	 * the blob, since it is used even if !cat6->src_off
    630 	 * It would make sense for there to be some more bits to
    631 	 * bring us to 11 bits worth of offset, but not sure..
    632 	 */
    633 	int32_t off       : 8;
    634 	uint32_t mustbe1  : 1;
    635 	uint32_t dst      : 8;
    636 	uint32_t pad1     : 15;
    637 } instr_cat6c_t;
    638 
    639 /* dword1 encoding for !dst_off: */
    640 typedef struct PACKED {
    641 	/* dword0: */
    642 	uint32_t dword0;
    643 
    644 	uint32_t dst      : 8;
    645 	uint32_t mustbe0  : 1;
    646 	uint32_t idx      : 8;
    647 	uint32_t pad0     : 15;
    648 } instr_cat6d_t;
    649 
    650 /* ldgb and atomics..
    651  *
    652  * ldgb:      pad0=0, pad3=1
    653  * atomic .g: pad0=1, pad3=1
    654  *        .l: pad0=1, pad3=0
    655  */
    656 typedef struct PACKED {
    657 	/* dword0: */
    658 	uint32_t pad0     : 1;
    659 	uint32_t src3     : 8;
    660 	uint32_t d        : 2;
    661 	uint32_t typed    : 1;
    662 	uint32_t type_size : 2;
    663 	uint32_t src1     : 8;
    664 	uint32_t src1_im  : 1;
    665 	uint32_t src2_im  : 1;
    666 	uint32_t src2     : 8;
    667 
    668 	/* dword1: */
    669 	uint32_t dst      : 8;
    670 	uint32_t mustbe0  : 1;
    671 	uint32_t src_ssbo : 8;
    672 	uint32_t pad2     : 3;  // type
    673 	uint32_t g        : 1;
    674 	uint32_t pad3     : 1;
    675 	uint32_t pad4     : 10; // opc/jmp_tgt/sync/opc_cat
    676 } instr_cat6ldgb_t;
    677 
    678 /* stgb, pad0=0, pad3=2
    679  */
    680 typedef struct PACKED {
    681 	/* dword0: */
    682 	uint32_t mustbe1  : 1;  // ???
    683 	uint32_t src1     : 8;
    684 	uint32_t d        : 2;
    685 	uint32_t typed    : 1;
    686 	uint32_t type_size : 2;
    687 	uint32_t pad0     : 9;
    688 	uint32_t src2_im  : 1;
    689 	uint32_t src2     : 8;
    690 
    691 	/* dword1: */
    692 	uint32_t src3     : 8;
    693 	uint32_t src3_im  : 1;
    694 	uint32_t dst_ssbo : 8;
    695 	uint32_t pad2     : 3;  // type
    696 	uint32_t pad3     : 2;
    697 	uint32_t pad4     : 10; // opc/jmp_tgt/sync/opc_cat
    698 } instr_cat6stgb_t;
    699 
    700 typedef union PACKED {
    701 	instr_cat6a_t a;
    702 	instr_cat6b_t b;
    703 	instr_cat6c_t c;
    704 	instr_cat6d_t d;
    705 	instr_cat6ldgb_t ldgb;
    706 	instr_cat6stgb_t stgb;
    707 	struct PACKED {
    708 		/* dword0: */
    709 		uint32_t src_off  : 1;
    710 		uint32_t pad1     : 31;
    711 
    712 		/* dword1: */
    713 		uint32_t pad2     : 8;
    714 		uint32_t dst_off  : 1;
    715 		uint32_t pad3     : 8;
    716 		uint32_t type     : 3;
    717 		uint32_t g        : 1;  /* or in some cases it means dst immed */
    718 		uint32_t pad4     : 1;
    719 		uint32_t opc      : 5;
    720 		uint32_t jmp_tgt  : 1;
    721 		uint32_t sync     : 1;
    722 		uint32_t opc_cat  : 3;
    723 	};
    724 } instr_cat6_t;
    725 
    726 typedef struct PACKED {
    727 	/* dword0: */
    728 	uint32_t pad1     : 32;
    729 
    730 	/* dword1: */
    731 	uint32_t pad2     : 12;
    732 	uint32_t ss       : 1;  /* maybe in the encoding, but blob only uses (sy) */
    733 	uint32_t pad3     : 6;
    734 	uint32_t w        : 1;  /* write */
    735 	uint32_t r        : 1;  /* read */
    736 	uint32_t l        : 1;  /* local */
    737 	uint32_t g        : 1;  /* global */
    738 	uint32_t opc      : 4;  /* presumed, but only a couple known OPCs */
    739 	uint32_t jmp_tgt  : 1;  /* (jp) */
    740 	uint32_t sync     : 1;  /* (sy) */
    741 	uint32_t opc_cat  : 3;
    742 } instr_cat7_t;
    743 
    744 typedef union PACKED {
    745 	instr_cat0_t cat0;
    746 	instr_cat1_t cat1;
    747 	instr_cat2_t cat2;
    748 	instr_cat3_t cat3;
    749 	instr_cat4_t cat4;
    750 	instr_cat5_t cat5;
    751 	instr_cat6_t cat6;
    752 	instr_cat7_t cat7;
    753 	struct PACKED {
    754 		/* dword0: */
    755 		uint64_t pad1     : 40;
    756 		uint32_t repeat   : 3;  /* cat0-cat4 */
    757 		uint32_t pad2     : 1;
    758 		uint32_t ss       : 1;  /* cat1-cat4 (cat0??) and cat7 (?) */
    759 		uint32_t ul       : 1;  /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
    760 		uint32_t pad3     : 13;
    761 		uint32_t jmp_tgt  : 1;
    762 		uint32_t sync     : 1;
    763 		uint32_t opc_cat  : 3;
    764 
    765 	};
    766 } instr_t;
    767 
    768 static inline uint32_t instr_opc(instr_t *instr)
    769 {
    770 	switch (instr->opc_cat) {
    771 	case 0:  return instr->cat0.opc;
    772 	case 1:  return 0;
    773 	case 2:  return instr->cat2.opc;
    774 	case 3:  return instr->cat3.opc;
    775 	case 4:  return instr->cat4.opc;
    776 	case 5:  return instr->cat5.opc;
    777 	case 6:  return instr->cat6.opc;
    778 	case 7:  return instr->cat7.opc;
    779 	default: return 0;
    780 	}
    781 }
    782 
    783 static inline bool is_mad(opc_t opc)
    784 {
    785 	switch (opc) {
    786 	case OPC_MAD_U16:
    787 	case OPC_MAD_S16:
    788 	case OPC_MAD_U24:
    789 	case OPC_MAD_S24:
    790 	case OPC_MAD_F16:
    791 	case OPC_MAD_F32:
    792 		return true;
    793 	default:
    794 		return false;
    795 	}
    796 }
    797 
    798 static inline bool is_madsh(opc_t opc)
    799 {
    800 	switch (opc) {
    801 	case OPC_MADSH_U16:
    802 	case OPC_MADSH_M16:
    803 		return true;
    804 	default:
    805 		return false;
    806 	}
    807 }
    808 
    809 static inline bool is_atomic(opc_t opc)
    810 {
    811 	switch (opc) {
    812 	case OPC_ATOMIC_ADD:
    813 	case OPC_ATOMIC_SUB:
    814 	case OPC_ATOMIC_XCHG:
    815 	case OPC_ATOMIC_INC:
    816 	case OPC_ATOMIC_DEC:
    817 	case OPC_ATOMIC_CMPXCHG:
    818 	case OPC_ATOMIC_MIN:
    819 	case OPC_ATOMIC_MAX:
    820 	case OPC_ATOMIC_AND:
    821 	case OPC_ATOMIC_OR:
    822 	case OPC_ATOMIC_XOR:
    823 		return true;
    824 	default:
    825 		return false;
    826 	}
    827 }
    828 
    829 static inline bool is_ssbo(opc_t opc)
    830 {
    831 	switch (opc) {
    832 	case OPC_RESFMT:
    833 	case OPC_RESINFO:
    834 	case OPC_LDGB:
    835 	case OPC_STGB:
    836 	case OPC_STIB:
    837 		return true;
    838 	default:
    839 		return false;
    840 	}
    841 }
    842 
    843 #endif /* INSTR_A3XX_H_ */
    844