Home | History | Annotate | Download | only in a2xx
      1 /*
      2  * Copyright (c) 2012 Rob Clark <robdclark (at) gmail.com>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     21  * SOFTWARE.
     22  */
     23 
     24 #include "ir-a2xx.h"
     25 
     26 #include <stdlib.h>
     27 #include <stdio.h>
     28 #include <string.h>
     29 #include <assert.h>
     30 
     31 #include "freedreno_util.h"
     32 #include "instr-a2xx.h"
     33 
     34 #define DEBUG_MSG(f, ...)  do { if (0) DBG(f, ##__VA_ARGS__); } while (0)
     35 #define WARN_MSG(f, ...)   DBG("WARN:  "f, ##__VA_ARGS__)
     36 #define ERROR_MSG(f, ...)  DBG("ERROR: "f, ##__VA_ARGS__)
     37 
     38 #define REG_MASK 0x3f
     39 
     40 static int cf_emit(struct ir2_cf *cf, instr_cf_t *instr);
     41 
     42 static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords,
     43 		uint32_t idx, struct ir2_shader_info *info);
     44 
     45 static void reg_update_stats(struct ir2_register *reg,
     46 		struct ir2_shader_info *info, bool dest);
     47 static uint32_t reg_fetch_src_swiz(struct ir2_register *reg, uint32_t n);
     48 static uint32_t reg_fetch_dst_swiz(struct ir2_register *reg);
     49 static uint32_t reg_alu_dst_swiz(struct ir2_register *reg);
     50 static uint32_t reg_alu_src_swiz(struct ir2_register *reg);
     51 
     52 /* simple allocator to carve allocations out of an up-front allocated heap,
     53  * so that we can free everything easily in one shot.
     54  */
     55 static void * ir2_alloc(struct ir2_shader *shader, int sz)
     56 {
     57 	void *ptr = &shader->heap[shader->heap_idx];
     58 	shader->heap_idx += align(sz, 4);
     59 	return ptr;
     60 }
     61 
     62 static char * ir2_strdup(struct ir2_shader *shader, const char *str)
     63 {
     64 	char *ptr = NULL;
     65 	if (str) {
     66 		int len = strlen(str);
     67 		ptr = ir2_alloc(shader, len+1);
     68 		memcpy(ptr, str, len);
     69 		ptr[len] = '\0';
     70 	}
     71 	return ptr;
     72 }
     73 
     74 struct ir2_shader * ir2_shader_create(void)
     75 {
     76 	DEBUG_MSG("");
     77 	return calloc(1, sizeof(struct ir2_shader));
     78 }
     79 
     80 void ir2_shader_destroy(struct ir2_shader *shader)
     81 {
     82 	DEBUG_MSG("");
     83 	free(shader);
     84 }
     85 
     86 /* resolve addr/cnt/sequence fields in the individual CF's */
     87 static int shader_resolve(struct ir2_shader *shader, struct ir2_shader_info *info)
     88 {
     89 	uint32_t addr;
     90 	unsigned i;
     91 	int j;
     92 
     93 	addr = shader->cfs_count / 2;
     94 	for (i = 0; i < shader->cfs_count; i++) {
     95 		struct ir2_cf *cf = shader->cfs[i];
     96 		if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) {
     97 			uint32_t sequence = 0;
     98 
     99 			if (cf->exec.addr && (cf->exec.addr != addr))
    100 				WARN_MSG("invalid addr '%d' at CF %d", cf->exec.addr, i);
    101 			if (cf->exec.cnt && (cf->exec.cnt != cf->exec.instrs_count))
    102 				WARN_MSG("invalid cnt '%d' at CF %d", cf->exec.cnt, i);
    103 
    104 			for (j = cf->exec.instrs_count - 1; j >= 0; j--) {
    105 				struct ir2_instruction *instr = cf->exec.instrs[j];
    106 				sequence <<= 2;
    107 				if (instr->instr_type == IR2_FETCH)
    108 					sequence |= 0x1;
    109 				if (instr->sync)
    110 					sequence |= 0x2;
    111 			}
    112 
    113 			cf->exec.addr = addr;
    114 			cf->exec.cnt  = cf->exec.instrs_count;
    115 			cf->exec.sequence = sequence;
    116 
    117 			addr += cf->exec.instrs_count;
    118 		}
    119 	}
    120 
    121 	info->sizedwords = 3 * addr;
    122 
    123 	return 0;
    124 }
    125 
    126 void * ir2_shader_assemble(struct ir2_shader *shader, struct ir2_shader_info *info)
    127 {
    128 	uint32_t i, j;
    129 	uint32_t *ptr, *dwords = NULL;
    130 	uint32_t idx = 0;
    131 	int ret;
    132 
    133 	info->sizedwords    = 0;
    134 	info->max_reg       = -1;
    135 	info->max_input_reg = 0;
    136 	info->regs_written  = 0;
    137 
    138 	/* we need an even # of CF's.. insert a NOP if needed */
    139 	if (shader->cfs_count != align(shader->cfs_count, 2))
    140 		ir2_cf_create(shader, NOP);
    141 
    142 	/* first pass, resolve sizes and addresses: */
    143 	ret = shader_resolve(shader, info);
    144 	if (ret) {
    145 		ERROR_MSG("resolve failed: %d", ret);
    146 		goto fail;
    147 	}
    148 
    149 	ptr = dwords = calloc(4, info->sizedwords);
    150 
    151 	/* second pass, emit CF program in pairs: */
    152 	for (i = 0; i < shader->cfs_count; i += 2) {
    153 		instr_cf_t *cfs = (instr_cf_t *)ptr;
    154 		ret = cf_emit(shader->cfs[i], &cfs[0]);
    155 		if (ret) {
    156 			ERROR_MSG("CF emit failed: %d\n", ret);
    157 			goto fail;
    158 		}
    159 		ret = cf_emit(shader->cfs[i+1], &cfs[1]);
    160 		if (ret) {
    161 			ERROR_MSG("CF emit failed: %d\n", ret);
    162 			goto fail;
    163 		}
    164 		ptr += 3;
    165 		assert((ptr - dwords) <= info->sizedwords);
    166 	}
    167 
    168 	/* third pass, emit ALU/FETCH: */
    169 	for (i = 0; i < shader->cfs_count; i++) {
    170 		struct ir2_cf *cf = shader->cfs[i];
    171 		if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) {
    172 			for (j = 0; j < cf->exec.instrs_count; j++) {
    173 				ret = instr_emit(cf->exec.instrs[j], ptr, idx++, info);
    174 				if (ret) {
    175 					ERROR_MSG("instruction emit failed: %d", ret);
    176 					goto fail;
    177 				}
    178 				ptr += 3;
    179 				assert((ptr - dwords) <= info->sizedwords);
    180 			}
    181 		}
    182 	}
    183 
    184 	return dwords;
    185 
    186 fail:
    187 	free(dwords);
    188 	return NULL;
    189 }
    190 
    191 
    192 struct ir2_cf * ir2_cf_create(struct ir2_shader *shader, instr_cf_opc_t cf_type)
    193 {
    194 	struct ir2_cf *cf = ir2_alloc(shader, sizeof(struct ir2_cf));
    195 	DEBUG_MSG("%d", cf_type);
    196 	cf->shader = shader;
    197 	cf->cf_type = cf_type;
    198 	assert(shader->cfs_count < ARRAY_SIZE(shader->cfs));
    199 	shader->cfs[shader->cfs_count++] = cf;
    200 	return cf;
    201 }
    202 
    203 
    204 /*
    205  * CF instructions:
    206  */
    207 
    208 static int cf_emit(struct ir2_cf *cf, instr_cf_t *instr)
    209 {
    210 	memset(instr, 0, sizeof(*instr));
    211 
    212 	instr->opc = cf->cf_type;
    213 
    214 	switch (cf->cf_type) {
    215 	case NOP:
    216 		break;
    217 	case EXEC:
    218 	case EXEC_END:
    219 		assert(cf->exec.addr <= 0x1ff);
    220 		assert(cf->exec.cnt <= 0x6);
    221 		assert(cf->exec.sequence <= 0xfff);
    222 		instr->exec.address = cf->exec.addr;
    223 		instr->exec.count = cf->exec.cnt;
    224 		instr->exec.serialize = cf->exec.sequence;
    225 		break;
    226 	case ALLOC:
    227 		assert(cf->alloc.size <= 0xf);
    228 		instr->alloc.size = cf->alloc.size;
    229 		switch (cf->alloc.type) {
    230 		case SQ_POSITION:
    231 		case SQ_PARAMETER_PIXEL:
    232 			instr->alloc.buffer_select = cf->alloc.type;
    233 			break;
    234 		default:
    235 			ERROR_MSG("invalid alloc type: %d", cf->alloc.type);
    236 			return -1;
    237 		}
    238 		break;
    239 	case COND_EXEC:
    240 	case COND_EXEC_END:
    241 	case COND_PRED_EXEC:
    242 	case COND_PRED_EXEC_END:
    243 	case LOOP_START:
    244 	case LOOP_END:
    245 	case COND_CALL:
    246 	case RETURN:
    247 	case COND_JMP:
    248 	case COND_EXEC_PRED_CLEAN:
    249 	case COND_EXEC_PRED_CLEAN_END:
    250 	case MARK_VS_FETCH_DONE:
    251 		ERROR_MSG("TODO");
    252 		return -1;
    253 	}
    254 
    255 	return 0;
    256 }
    257 
    258 
    259 struct ir2_instruction * ir2_instr_create(struct ir2_cf *cf, int instr_type)
    260 {
    261 	struct ir2_instruction *instr =
    262 			ir2_alloc(cf->shader, sizeof(struct ir2_instruction));
    263 	DEBUG_MSG("%d", instr_type);
    264 	instr->shader = cf->shader;
    265 	instr->pred = cf->shader->pred;
    266 	instr->instr_type = instr_type;
    267 	assert(cf->exec.instrs_count < ARRAY_SIZE(cf->exec.instrs));
    268 	cf->exec.instrs[cf->exec.instrs_count++] = instr;
    269 	return instr;
    270 }
    271 
    272 
    273 /*
    274  * FETCH instructions:
    275  */
    276 
    277 static int instr_emit_fetch(struct ir2_instruction *instr,
    278 		uint32_t *dwords, uint32_t idx,
    279 		struct ir2_shader_info *info)
    280 {
    281 	instr_fetch_t *fetch = (instr_fetch_t *)dwords;
    282 	int reg = 0;
    283 	struct ir2_register *dst_reg = instr->regs[reg++];
    284 	struct ir2_register *src_reg = instr->regs[reg++];
    285 
    286 	memset(fetch, 0, sizeof(*fetch));
    287 
    288 	reg_update_stats(dst_reg, info, true);
    289 	reg_update_stats(src_reg, info, false);
    290 
    291 	fetch->opc = instr->fetch.opc;
    292 
    293 	if (instr->fetch.opc == VTX_FETCH) {
    294 		instr_fetch_vtx_t *vtx = &fetch->vtx;
    295 
    296 		assert(instr->fetch.stride <= 0xff);
    297 		assert(instr->fetch.fmt <= 0x3f);
    298 		assert(instr->fetch.const_idx <= 0x1f);
    299 		assert(instr->fetch.const_idx_sel <= 0x3);
    300 
    301 		vtx->src_reg = src_reg->num;
    302 		vtx->src_swiz = reg_fetch_src_swiz(src_reg, 1);
    303 		vtx->dst_reg = dst_reg->num;
    304 		vtx->dst_swiz = reg_fetch_dst_swiz(dst_reg);
    305 		vtx->must_be_one = 1;
    306 		vtx->const_index = instr->fetch.const_idx;
    307 		vtx->const_index_sel = instr->fetch.const_idx_sel;
    308 		vtx->format_comp_all = !!instr->fetch.is_signed;
    309 		vtx->num_format_all = !instr->fetch.is_normalized;
    310 		vtx->format = instr->fetch.fmt;
    311 		vtx->stride = instr->fetch.stride;
    312 		vtx->offset = instr->fetch.offset;
    313 
    314 		if (instr->pred != IR2_PRED_NONE) {
    315 			vtx->pred_select = 1;
    316 			vtx->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0;
    317 		}
    318 
    319 		/* XXX seems like every FETCH but the first has
    320 		 * this bit set:
    321 		 */
    322 		vtx->reserved3 = (idx > 0) ? 0x1 : 0x0;
    323 		vtx->reserved0 = (idx > 0) ? 0x2 : 0x3;
    324 	} else if (instr->fetch.opc == TEX_FETCH) {
    325 		instr_fetch_tex_t *tex = &fetch->tex;
    326 
    327 		assert(instr->fetch.const_idx <= 0x1f);
    328 
    329 		tex->src_reg = src_reg->num;
    330 		tex->src_swiz = reg_fetch_src_swiz(src_reg, 3);
    331 		tex->dst_reg = dst_reg->num;
    332 		tex->dst_swiz = reg_fetch_dst_swiz(dst_reg);
    333 		tex->const_idx = instr->fetch.const_idx;
    334 		tex->mag_filter = TEX_FILTER_USE_FETCH_CONST;
    335 		tex->min_filter = TEX_FILTER_USE_FETCH_CONST;
    336 		tex->mip_filter = TEX_FILTER_USE_FETCH_CONST;
    337 		tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST;
    338 		tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST;
    339 		tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST;
    340 		tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST;
    341 		tex->use_comp_lod = 1;
    342 		tex->use_reg_lod = !instr->fetch.is_cube;
    343 		tex->sample_location = SAMPLE_CENTER;
    344 
    345 		if (instr->pred != IR2_PRED_NONE) {
    346 			tex->pred_select = 1;
    347 			tex->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0;
    348 		}
    349 
    350 	} else {
    351 		ERROR_MSG("invalid fetch opc: %d\n", instr->fetch.opc);
    352 		return -1;
    353 	}
    354 
    355 	return 0;
    356 }
    357 
    358 /*
    359  * ALU instructions:
    360  */
    361 
    362 static int instr_emit_alu(struct ir2_instruction *instr, uint32_t *dwords,
    363 		struct ir2_shader_info *info)
    364 {
    365 	int reg = 0;
    366 	instr_alu_t *alu = (instr_alu_t *)dwords;
    367 	struct ir2_register *dst_reg  = instr->regs[reg++];
    368 	struct ir2_register *src1_reg;
    369 	struct ir2_register *src2_reg;
    370 	struct ir2_register *src3_reg;
    371 
    372 	memset(alu, 0, sizeof(*alu));
    373 
    374 	/* handle instructions w/ 3 src operands: */
    375 	switch (instr->alu.vector_opc) {
    376 	case MULADDv:
    377 	case CNDEv:
    378 	case CNDGTEv:
    379 	case CNDGTv:
    380 	case DOT2ADDv:
    381 		/* note: disassembler lists 3rd src first, ie:
    382 		 *   MULADDv Rdst = Rsrc3 + (Rsrc1 * Rsrc2)
    383 		 * which is the reason for this strange ordering.
    384 		 */
    385 		src3_reg = instr->regs[reg++];
    386 		break;
    387 	default:
    388 		src3_reg = NULL;
    389 		break;
    390 	}
    391 
    392 	src1_reg = instr->regs[reg++];
    393 	src2_reg = instr->regs[reg++];
    394 
    395 	reg_update_stats(dst_reg, info, true);
    396 	reg_update_stats(src1_reg, info, false);
    397 	reg_update_stats(src2_reg, info, false);
    398 
    399 	assert((dst_reg->flags & ~IR2_REG_EXPORT) == 0);
    400 	assert(!dst_reg->swizzle || (strlen(dst_reg->swizzle) == 4));
    401 	assert((src1_reg->flags & IR2_REG_EXPORT) == 0);
    402 	assert(!src1_reg->swizzle || (strlen(src1_reg->swizzle) == 4));
    403 	assert((src2_reg->flags & IR2_REG_EXPORT) == 0);
    404 	assert(!src2_reg->swizzle || (strlen(src2_reg->swizzle) == 4));
    405 
    406 	if (instr->alu.vector_opc == (instr_vector_opc_t)~0) {
    407 		alu->vector_opc          = MAXv;
    408 		alu->vector_write_mask   = 0;
    409 	} else {
    410 		alu->vector_opc          = instr->alu.vector_opc;
    411 		alu->vector_write_mask   = reg_alu_dst_swiz(dst_reg);
    412 	}
    413 
    414 	alu->vector_dest         = dst_reg->num;
    415 	alu->export_data         = !!(dst_reg->flags & IR2_REG_EXPORT);
    416 
    417 	// TODO predicate case/condition.. need to add to parser
    418 
    419 	alu->src2_reg            = src2_reg->num;
    420 	alu->src2_swiz           = reg_alu_src_swiz(src2_reg);
    421 	alu->src2_reg_negate     = !!(src2_reg->flags & IR2_REG_NEGATE);
    422 	alu->src2_reg_abs        = !!(src2_reg->flags & IR2_REG_ABS);
    423 	alu->src2_sel            = !(src2_reg->flags & IR2_REG_CONST);
    424 
    425 	alu->src1_reg            = src1_reg->num;
    426 	alu->src1_swiz           = reg_alu_src_swiz(src1_reg);
    427 	alu->src1_reg_negate     = !!(src1_reg->flags & IR2_REG_NEGATE);
    428 	alu->src1_reg_abs        = !!(src1_reg->flags & IR2_REG_ABS);
    429 	alu->src1_sel            = !(src1_reg->flags & IR2_REG_CONST);
    430 
    431 	alu->vector_clamp        = instr->alu.vector_clamp;
    432 	alu->scalar_clamp        = instr->alu.scalar_clamp;
    433 
    434 	if (instr->alu.scalar_opc != (instr_scalar_opc_t)~0) {
    435 		struct ir2_register *sdst_reg = instr->regs[reg++];
    436 
    437 		reg_update_stats(sdst_reg, info, true);
    438 
    439 		assert(sdst_reg->flags == dst_reg->flags);
    440 
    441 		if (src3_reg) {
    442 			assert(src3_reg == instr->regs[reg]);
    443 			reg++;
    444 		} else {
    445 			src3_reg = instr->regs[reg++];
    446 		}
    447 
    448 		alu->scalar_dest         = sdst_reg->num;
    449 		alu->scalar_write_mask   = reg_alu_dst_swiz(sdst_reg);
    450 		alu->scalar_opc          = instr->alu.scalar_opc;
    451 	} else {
    452 		/* not sure if this is required, but adreno compiler seems
    453 		 * to always set scalar opc to MAXs if it is not used:
    454 		 */
    455 		alu->scalar_opc = MAXs;
    456 	}
    457 
    458 	if (src3_reg) {
    459 		reg_update_stats(src3_reg, info, false);
    460 
    461 		alu->src3_reg            = src3_reg->num;
    462 		alu->src3_swiz           = reg_alu_src_swiz(src3_reg);
    463 		alu->src3_reg_negate     = !!(src3_reg->flags & IR2_REG_NEGATE);
    464 		alu->src3_reg_abs        = !!(src3_reg->flags & IR2_REG_ABS);
    465 		alu->src3_sel            = !(src3_reg->flags & IR2_REG_CONST);
    466 	} else {
    467 		/* not sure if this is required, but adreno compiler seems
    468 		 * to always set register bank for 3rd src if unused:
    469 		 */
    470 		alu->src3_sel = 1;
    471 	}
    472 
    473 	if (instr->pred != IR2_PRED_NONE) {
    474 		alu->pred_select = (instr->pred == IR2_PRED_EQ) ? 3 : 2;
    475 	}
    476 
    477 	return 0;
    478 }
    479 
    480 static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords,
    481 		uint32_t idx, struct ir2_shader_info *info)
    482 {
    483 	switch (instr->instr_type) {
    484 	case IR2_FETCH: return instr_emit_fetch(instr, dwords, idx, info);
    485 	case IR2_ALU:   return instr_emit_alu(instr, dwords, info);
    486 	}
    487 	return -1;
    488 }
    489 
    490 
    491 struct ir2_register * ir2_reg_create(struct ir2_instruction *instr,
    492 		int num, const char *swizzle, int flags)
    493 {
    494 	struct ir2_register *reg =
    495 			ir2_alloc(instr->shader, sizeof(struct ir2_register));
    496 	DEBUG_MSG("%x, %d, %s", flags, num, swizzle);
    497 	assert(num <= REG_MASK);
    498 	reg->flags = flags;
    499 	reg->num = num;
    500 	reg->swizzle = ir2_strdup(instr->shader, swizzle);
    501 	assert(instr->regs_count < ARRAY_SIZE(instr->regs));
    502 	instr->regs[instr->regs_count++] = reg;
    503 	return reg;
    504 }
    505 
    506 static void reg_update_stats(struct ir2_register *reg,
    507 		struct ir2_shader_info *info, bool dest)
    508 {
    509 	if (!(reg->flags & (IR2_REG_CONST|IR2_REG_EXPORT))) {
    510 		info->max_reg = MAX2(info->max_reg, reg->num);
    511 
    512 		if (dest) {
    513 			info->regs_written |= (1 << reg->num);
    514 		} else if (!(info->regs_written & (1 << reg->num))) {
    515 			/* for registers that haven't been written, they must be an
    516 			 * input register that the thread scheduler (presumably?)
    517 			 * needs to know about:
    518 			 */
    519 			info->max_input_reg = MAX2(info->max_input_reg, reg->num);
    520 		}
    521 	}
    522 }
    523 
    524 static uint32_t reg_fetch_src_swiz(struct ir2_register *reg, uint32_t n)
    525 {
    526 	uint32_t swiz = 0;
    527 	int i;
    528 
    529 	assert(reg->flags == 0);
    530 	assert(reg->swizzle);
    531 
    532 	DEBUG_MSG("fetch src R%d.%s", reg->num, reg->swizzle);
    533 
    534 	for (i = n-1; i >= 0; i--) {
    535 		swiz <<= 2;
    536 		switch (reg->swizzle[i]) {
    537 		default:
    538 			ERROR_MSG("invalid fetch src swizzle: %s", reg->swizzle);
    539 		case 'x': swiz |= 0x0; break;
    540 		case 'y': swiz |= 0x1; break;
    541 		case 'z': swiz |= 0x2; break;
    542 		case 'w': swiz |= 0x3; break;
    543 		}
    544 	}
    545 
    546 	return swiz;
    547 }
    548 
    549 static uint32_t reg_fetch_dst_swiz(struct ir2_register *reg)
    550 {
    551 	uint32_t swiz = 0;
    552 	int i;
    553 
    554 	assert(reg->flags == 0);
    555 	assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
    556 
    557 	DEBUG_MSG("fetch dst R%d.%s", reg->num, reg->swizzle);
    558 
    559 	if (reg->swizzle) {
    560 		for (i = 3; i >= 0; i--) {
    561 			swiz <<= 3;
    562 			switch (reg->swizzle[i]) {
    563 			default:
    564 				ERROR_MSG("invalid dst swizzle: %s", reg->swizzle);
    565 			case 'x': swiz |= 0x0; break;
    566 			case 'y': swiz |= 0x1; break;
    567 			case 'z': swiz |= 0x2; break;
    568 			case 'w': swiz |= 0x3; break;
    569 			case '0': swiz |= 0x4; break;
    570 			case '1': swiz |= 0x5; break;
    571 			case '_': swiz |= 0x7; break;
    572 			}
    573 		}
    574 	} else {
    575 		swiz = 0x688;
    576 	}
    577 
    578 	return swiz;
    579 }
    580 
    581 /* actually, a write-mask */
    582 static uint32_t reg_alu_dst_swiz(struct ir2_register *reg)
    583 {
    584 	uint32_t swiz = 0;
    585 	int i;
    586 
    587 	assert((reg->flags & ~IR2_REG_EXPORT) == 0);
    588 	assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
    589 
    590 	DEBUG_MSG("alu dst R%d.%s", reg->num, reg->swizzle);
    591 
    592 	if (reg->swizzle) {
    593 		for (i = 3; i >= 0; i--) {
    594 			swiz <<= 1;
    595 			if (reg->swizzle[i] == "xyzw"[i]) {
    596 				swiz |= 0x1;
    597 			} else if (reg->swizzle[i] != '_') {
    598 				ERROR_MSG("invalid dst swizzle: %s", reg->swizzle);
    599 				break;
    600 			}
    601 		}
    602 	} else {
    603 		swiz = 0xf;
    604 	}
    605 
    606 	return swiz;
    607 }
    608 
    609 static uint32_t reg_alu_src_swiz(struct ir2_register *reg)
    610 {
    611 	uint32_t swiz = 0;
    612 	int i;
    613 
    614 	assert((reg->flags & IR2_REG_EXPORT) == 0);
    615 	assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
    616 
    617 	DEBUG_MSG("vector src R%d.%s", reg->num, reg->swizzle);
    618 
    619 	if (reg->swizzle) {
    620 		for (i = 3; i >= 0; i--) {
    621 			swiz <<= 2;
    622 			switch (reg->swizzle[i]) {
    623 			default:
    624 				ERROR_MSG("invalid vector src swizzle: %s", reg->swizzle);
    625 			case 'x': swiz |= (0x0 - i) & 0x3; break;
    626 			case 'y': swiz |= (0x1 - i) & 0x3; break;
    627 			case 'z': swiz |= (0x2 - i) & 0x3; break;
    628 			case 'w': swiz |= (0x3 - i) & 0x3; break;
    629 			}
    630 		}
    631 	} else {
    632 		swiz = 0x0;
    633 	}
    634 
    635 	return swiz;
    636 }
    637