1 /* 2 * Copyright (c) 2012 Rob Clark <robdclark (at) gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24 #include "ir3.h" 25 26 #include <stdlib.h> 27 #include <stdio.h> 28 #include <string.h> 29 #include <assert.h> 30 #include <stdbool.h> 31 #include <errno.h> 32 33 #include "util/ralloc.h" 34 35 #include "freedreno_util.h" 36 #include "instr-a3xx.h" 37 38 /* simple allocator to carve allocations out of an up-front allocated heap, 39 * so that we can free everything easily in one shot. 40 */ 41 void * ir3_alloc(struct ir3 *shader, int sz) 42 { 43 return rzalloc_size(shader, sz); /* TODO: don't use rzalloc */ 44 } 45 46 struct ir3 * ir3_create(struct ir3_compiler *compiler, 47 unsigned nin, unsigned nout) 48 { 49 struct ir3 *shader = rzalloc(compiler, struct ir3); 50 51 shader->compiler = compiler; 52 shader->ninputs = nin; 53 shader->inputs = ir3_alloc(shader, sizeof(shader->inputs[0]) * nin); 54 55 shader->noutputs = nout; 56 shader->outputs = ir3_alloc(shader, sizeof(shader->outputs[0]) * nout); 57 58 list_inithead(&shader->block_list); 59 list_inithead(&shader->array_list); 60 61 return shader; 62 } 63 64 void ir3_destroy(struct ir3 *shader) 65 { 66 /* TODO convert the dynamic array to ralloc too: */ 67 free(shader->indirects); 68 free(shader->predicates); 69 free(shader->baryfs); 70 free(shader->keeps); 71 free(shader->astc_srgb); 72 ralloc_free(shader); 73 } 74 75 #define iassert(cond) do { \ 76 if (!(cond)) { \ 77 assert(cond); \ 78 return -1; \ 79 } } while (0) 80 81 static uint32_t reg(struct ir3_register *reg, struct ir3_info *info, 82 uint32_t repeat, uint32_t valid_flags) 83 { 84 reg_t val = { .dummy32 = 0 }; 85 86 if (reg->flags & ~valid_flags) { 87 debug_printf("INVALID FLAGS: %x vs %x\n", 88 reg->flags, valid_flags); 89 } 90 91 if (!(reg->flags & IR3_REG_R)) 92 repeat = 0; 93 94 if (reg->flags & IR3_REG_IMMED) { 95 val.iim_val = reg->iim_val; 96 } else { 97 unsigned components; 98 int16_t max; 99 100 if (reg->flags & IR3_REG_RELATIV) { 101 components = reg->size; 102 val.idummy10 = reg->array.offset; 103 max = (reg->array.offset + repeat + components - 1) >> 2; 104 } else { 105 components = util_last_bit(reg->wrmask); 106 val.comp = reg->num & 0x3; 107 val.num = reg->num >> 2; 108 max = (reg->num + repeat + components - 1) >> 2; 109 } 110 111 if (reg->flags & IR3_REG_CONST) { 112 info->max_const = MAX2(info->max_const, max); 113 } else if (val.num == 63) { 114 /* ignore writes to dummy register r63.x */ 115 } else if ((max != REG_A0) && (max != REG_P0)) { 116 if (reg->flags & IR3_REG_HALF) { 117 info->max_half_reg = MAX2(info->max_half_reg, max); 118 } else { 119 info->max_reg = MAX2(info->max_reg, max); 120 } 121 } 122 } 123 124 return val.dummy32; 125 } 126 127 static int emit_cat0(struct ir3_instruction *instr, void *ptr, 128 struct ir3_info *info) 129 { 130 instr_cat0_t *cat0 = ptr; 131 132 if (info->gpu_id >= 500) { 133 cat0->a5xx.immed = instr->cat0.immed; 134 } else if (info->gpu_id >= 400) { 135 cat0->a4xx.immed = instr->cat0.immed; 136 } else { 137 cat0->a3xx.immed = instr->cat0.immed; 138 } 139 cat0->repeat = instr->repeat; 140 cat0->ss = !!(instr->flags & IR3_INSTR_SS); 141 cat0->inv = instr->cat0.inv; 142 cat0->comp = instr->cat0.comp; 143 cat0->opc = instr->opc; 144 cat0->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); 145 cat0->sync = !!(instr->flags & IR3_INSTR_SY); 146 cat0->opc_cat = 0; 147 148 return 0; 149 } 150 151 static uint32_t type_flags(type_t type) 152 { 153 return (type_size(type) == 32) ? 0 : IR3_REG_HALF; 154 } 155 156 static int emit_cat1(struct ir3_instruction *instr, void *ptr, 157 struct ir3_info *info) 158 { 159 struct ir3_register *dst = instr->regs[0]; 160 struct ir3_register *src = instr->regs[1]; 161 instr_cat1_t *cat1 = ptr; 162 163 iassert(instr->regs_count == 2); 164 iassert(!((dst->flags ^ type_flags(instr->cat1.dst_type)) & IR3_REG_HALF)); 165 iassert((src->flags & IR3_REG_IMMED) || 166 !((src->flags ^ type_flags(instr->cat1.src_type)) & IR3_REG_HALF)); 167 168 if (src->flags & IR3_REG_IMMED) { 169 cat1->iim_val = src->iim_val; 170 cat1->src_im = 1; 171 } else if (src->flags & IR3_REG_RELATIV) { 172 cat1->off = reg(src, info, instr->repeat, 173 IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF | IR3_REG_RELATIV); 174 cat1->src_rel = 1; 175 cat1->src_rel_c = !!(src->flags & IR3_REG_CONST); 176 } else { 177 cat1->src = reg(src, info, instr->repeat, 178 IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF); 179 cat1->src_c = !!(src->flags & IR3_REG_CONST); 180 } 181 182 cat1->dst = reg(dst, info, instr->repeat, 183 IR3_REG_RELATIV | IR3_REG_EVEN | 184 IR3_REG_R | IR3_REG_POS_INF | IR3_REG_HALF); 185 cat1->repeat = instr->repeat; 186 cat1->src_r = !!(src->flags & IR3_REG_R); 187 cat1->ss = !!(instr->flags & IR3_INSTR_SS); 188 cat1->ul = !!(instr->flags & IR3_INSTR_UL); 189 cat1->dst_type = instr->cat1.dst_type; 190 cat1->dst_rel = !!(dst->flags & IR3_REG_RELATIV); 191 cat1->src_type = instr->cat1.src_type; 192 cat1->even = !!(dst->flags & IR3_REG_EVEN); 193 cat1->pos_inf = !!(dst->flags & IR3_REG_POS_INF); 194 cat1->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); 195 cat1->sync = !!(instr->flags & IR3_INSTR_SY); 196 cat1->opc_cat = 1; 197 198 return 0; 199 } 200 201 static int emit_cat2(struct ir3_instruction *instr, void *ptr, 202 struct ir3_info *info) 203 { 204 struct ir3_register *dst = instr->regs[0]; 205 struct ir3_register *src1 = instr->regs[1]; 206 struct ir3_register *src2 = instr->regs[2]; 207 instr_cat2_t *cat2 = ptr; 208 unsigned absneg = ir3_cat2_absneg(instr->opc); 209 210 iassert((instr->regs_count == 2) || (instr->regs_count == 3)); 211 212 if (src1->flags & IR3_REG_RELATIV) { 213 iassert(src1->array.offset < (1 << 10)); 214 cat2->rel1.src1 = reg(src1, info, instr->repeat, 215 IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R | 216 IR3_REG_HALF | absneg); 217 cat2->rel1.src1_c = !!(src1->flags & IR3_REG_CONST); 218 cat2->rel1.src1_rel = 1; 219 } else if (src1->flags & IR3_REG_CONST) { 220 iassert(src1->num < (1 << 12)); 221 cat2->c1.src1 = reg(src1, info, instr->repeat, 222 IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF); 223 cat2->c1.src1_c = 1; 224 } else { 225 iassert(src1->num < (1 << 11)); 226 cat2->src1 = reg(src1, info, instr->repeat, 227 IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF | 228 absneg); 229 } 230 cat2->src1_im = !!(src1->flags & IR3_REG_IMMED); 231 cat2->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); 232 cat2->src1_abs = !!(src1->flags & (IR3_REG_FABS | IR3_REG_SABS)); 233 cat2->src1_r = !!(src1->flags & IR3_REG_R); 234 235 if (src2) { 236 iassert((src2->flags & IR3_REG_IMMED) || 237 !((src1->flags ^ src2->flags) & IR3_REG_HALF)); 238 239 if (src2->flags & IR3_REG_RELATIV) { 240 iassert(src2->array.offset < (1 << 10)); 241 cat2->rel2.src2 = reg(src2, info, instr->repeat, 242 IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R | 243 IR3_REG_HALF | absneg); 244 cat2->rel2.src2_c = !!(src2->flags & IR3_REG_CONST); 245 cat2->rel2.src2_rel = 1; 246 } else if (src2->flags & IR3_REG_CONST) { 247 iassert(src2->num < (1 << 12)); 248 cat2->c2.src2 = reg(src2, info, instr->repeat, 249 IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF); 250 cat2->c2.src2_c = 1; 251 } else { 252 iassert(src2->num < (1 << 11)); 253 cat2->src2 = reg(src2, info, instr->repeat, 254 IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF | 255 absneg); 256 } 257 258 cat2->src2_im = !!(src2->flags & IR3_REG_IMMED); 259 cat2->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); 260 cat2->src2_abs = !!(src2->flags & (IR3_REG_FABS | IR3_REG_SABS)); 261 cat2->src2_r = !!(src2->flags & IR3_REG_R); 262 } 263 264 cat2->dst = reg(dst, info, instr->repeat, 265 IR3_REG_R | IR3_REG_EI | IR3_REG_HALF); 266 cat2->repeat = instr->repeat; 267 cat2->ss = !!(instr->flags & IR3_INSTR_SS); 268 cat2->ul = !!(instr->flags & IR3_INSTR_UL); 269 cat2->dst_half = !!((src1->flags ^ dst->flags) & IR3_REG_HALF); 270 cat2->ei = !!(dst->flags & IR3_REG_EI); 271 cat2->cond = instr->cat2.condition; 272 cat2->full = ! (src1->flags & IR3_REG_HALF); 273 cat2->opc = instr->opc; 274 cat2->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); 275 cat2->sync = !!(instr->flags & IR3_INSTR_SY); 276 cat2->opc_cat = 2; 277 278 return 0; 279 } 280 281 static int emit_cat3(struct ir3_instruction *instr, void *ptr, 282 struct ir3_info *info) 283 { 284 struct ir3_register *dst = instr->regs[0]; 285 struct ir3_register *src1 = instr->regs[1]; 286 struct ir3_register *src2 = instr->regs[2]; 287 struct ir3_register *src3 = instr->regs[3]; 288 unsigned absneg = ir3_cat3_absneg(instr->opc); 289 instr_cat3_t *cat3 = ptr; 290 uint32_t src_flags = 0; 291 292 switch (instr->opc) { 293 case OPC_MAD_F16: 294 case OPC_MAD_U16: 295 case OPC_MAD_S16: 296 case OPC_SEL_B16: 297 case OPC_SEL_S16: 298 case OPC_SEL_F16: 299 case OPC_SAD_S16: 300 case OPC_SAD_S32: // really?? 301 src_flags |= IR3_REG_HALF; 302 break; 303 default: 304 break; 305 } 306 307 iassert(instr->regs_count == 4); 308 iassert(!((src1->flags ^ src_flags) & IR3_REG_HALF)); 309 iassert(!((src2->flags ^ src_flags) & IR3_REG_HALF)); 310 iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF)); 311 312 if (src1->flags & IR3_REG_RELATIV) { 313 iassert(src1->array.offset < (1 << 10)); 314 cat3->rel1.src1 = reg(src1, info, instr->repeat, 315 IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R | 316 IR3_REG_HALF | absneg); 317 cat3->rel1.src1_c = !!(src1->flags & IR3_REG_CONST); 318 cat3->rel1.src1_rel = 1; 319 } else if (src1->flags & IR3_REG_CONST) { 320 iassert(src1->num < (1 << 12)); 321 cat3->c1.src1 = reg(src1, info, instr->repeat, 322 IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF); 323 cat3->c1.src1_c = 1; 324 } else { 325 iassert(src1->num < (1 << 11)); 326 cat3->src1 = reg(src1, info, instr->repeat, 327 IR3_REG_R | IR3_REG_HALF | absneg); 328 } 329 330 cat3->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); 331 cat3->src1_r = !!(src1->flags & IR3_REG_R); 332 333 cat3->src2 = reg(src2, info, instr->repeat, 334 IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF | absneg); 335 cat3->src2_c = !!(src2->flags & IR3_REG_CONST); 336 cat3->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); 337 cat3->src2_r = !!(src2->flags & IR3_REG_R); 338 339 340 if (src3->flags & IR3_REG_RELATIV) { 341 iassert(src3->array.offset < (1 << 10)); 342 cat3->rel2.src3 = reg(src3, info, instr->repeat, 343 IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R | 344 IR3_REG_HALF | absneg); 345 cat3->rel2.src3_c = !!(src3->flags & IR3_REG_CONST); 346 cat3->rel2.src3_rel = 1; 347 } else if (src3->flags & IR3_REG_CONST) { 348 iassert(src3->num < (1 << 12)); 349 cat3->c2.src3 = reg(src3, info, instr->repeat, 350 IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF); 351 cat3->c2.src3_c = 1; 352 } else { 353 iassert(src3->num < (1 << 11)); 354 cat3->src3 = reg(src3, info, instr->repeat, 355 IR3_REG_R | IR3_REG_HALF | absneg); 356 } 357 358 cat3->src3_neg = !!(src3->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); 359 cat3->src3_r = !!(src3->flags & IR3_REG_R); 360 361 cat3->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); 362 cat3->repeat = instr->repeat; 363 cat3->ss = !!(instr->flags & IR3_INSTR_SS); 364 cat3->ul = !!(instr->flags & IR3_INSTR_UL); 365 cat3->dst_half = !!((src_flags ^ dst->flags) & IR3_REG_HALF); 366 cat3->opc = instr->opc; 367 cat3->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); 368 cat3->sync = !!(instr->flags & IR3_INSTR_SY); 369 cat3->opc_cat = 3; 370 371 return 0; 372 } 373 374 static int emit_cat4(struct ir3_instruction *instr, void *ptr, 375 struct ir3_info *info) 376 { 377 struct ir3_register *dst = instr->regs[0]; 378 struct ir3_register *src = instr->regs[1]; 379 instr_cat4_t *cat4 = ptr; 380 381 iassert(instr->regs_count == 2); 382 383 if (src->flags & IR3_REG_RELATIV) { 384 iassert(src->array.offset < (1 << 10)); 385 cat4->rel.src = reg(src, info, instr->repeat, 386 IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_FNEG | 387 IR3_REG_FABS | IR3_REG_R | IR3_REG_HALF); 388 cat4->rel.src_c = !!(src->flags & IR3_REG_CONST); 389 cat4->rel.src_rel = 1; 390 } else if (src->flags & IR3_REG_CONST) { 391 iassert(src->num < (1 << 12)); 392 cat4->c.src = reg(src, info, instr->repeat, 393 IR3_REG_CONST | IR3_REG_FNEG | IR3_REG_FABS | 394 IR3_REG_R | IR3_REG_HALF); 395 cat4->c.src_c = 1; 396 } else { 397 iassert(src->num < (1 << 11)); 398 cat4->src = reg(src, info, instr->repeat, 399 IR3_REG_IMMED | IR3_REG_FNEG | IR3_REG_FABS | 400 IR3_REG_R | IR3_REG_HALF); 401 } 402 403 cat4->src_im = !!(src->flags & IR3_REG_IMMED); 404 cat4->src_neg = !!(src->flags & IR3_REG_FNEG); 405 cat4->src_abs = !!(src->flags & IR3_REG_FABS); 406 cat4->src_r = !!(src->flags & IR3_REG_R); 407 408 cat4->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); 409 cat4->repeat = instr->repeat; 410 cat4->ss = !!(instr->flags & IR3_INSTR_SS); 411 cat4->ul = !!(instr->flags & IR3_INSTR_UL); 412 cat4->dst_half = !!((src->flags ^ dst->flags) & IR3_REG_HALF); 413 cat4->full = ! (src->flags & IR3_REG_HALF); 414 cat4->opc = instr->opc; 415 cat4->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); 416 cat4->sync = !!(instr->flags & IR3_INSTR_SY); 417 cat4->opc_cat = 4; 418 419 return 0; 420 } 421 422 static int emit_cat5(struct ir3_instruction *instr, void *ptr, 423 struct ir3_info *info) 424 { 425 struct ir3_register *dst = instr->regs[0]; 426 struct ir3_register *src1 = instr->regs[1]; 427 struct ir3_register *src2 = instr->regs[2]; 428 struct ir3_register *src3 = instr->regs[3]; 429 instr_cat5_t *cat5 = ptr; 430 431 iassert(!((dst->flags ^ type_flags(instr->cat5.type)) & IR3_REG_HALF)); 432 433 assume(src1 || !src2); 434 assume(src2 || !src3); 435 436 if (src1) { 437 cat5->full = ! (src1->flags & IR3_REG_HALF); 438 cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF); 439 } 440 441 if (instr->flags & IR3_INSTR_S2EN) { 442 if (src2) { 443 iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF)); 444 cat5->s2en.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF); 445 } 446 if (src3) { 447 iassert(src3->flags & IR3_REG_HALF); 448 cat5->s2en.src3 = reg(src3, info, instr->repeat, IR3_REG_HALF); 449 } 450 iassert(!(instr->cat5.samp | instr->cat5.tex)); 451 } else { 452 iassert(!src3); 453 if (src2) { 454 iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF)); 455 cat5->norm.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF); 456 } 457 cat5->norm.samp = instr->cat5.samp; 458 cat5->norm.tex = instr->cat5.tex; 459 } 460 461 cat5->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); 462 cat5->wrmask = dst->wrmask; 463 cat5->type = instr->cat5.type; 464 cat5->is_3d = !!(instr->flags & IR3_INSTR_3D); 465 cat5->is_a = !!(instr->flags & IR3_INSTR_A); 466 cat5->is_s = !!(instr->flags & IR3_INSTR_S); 467 cat5->is_s2en = !!(instr->flags & IR3_INSTR_S2EN); 468 cat5->is_o = !!(instr->flags & IR3_INSTR_O); 469 cat5->is_p = !!(instr->flags & IR3_INSTR_P); 470 cat5->opc = instr->opc; 471 cat5->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); 472 cat5->sync = !!(instr->flags & IR3_INSTR_SY); 473 cat5->opc_cat = 5; 474 475 return 0; 476 } 477 478 static int emit_cat6(struct ir3_instruction *instr, void *ptr, 479 struct ir3_info *info) 480 { 481 struct ir3_register *dst, *src1, *src2; 482 instr_cat6_t *cat6 = ptr; 483 484 /* the "dst" for a store instruction is (from the perspective 485 * of data flow in the shader, ie. register use/def, etc) in 486 * fact a register that is read by the instruction, rather 487 * than written: 488 */ 489 if (is_store(instr)) { 490 iassert(instr->regs_count >= 3); 491 492 dst = instr->regs[1]; 493 src1 = instr->regs[2]; 494 src2 = (instr->regs_count >= 4) ? instr->regs[3] : NULL; 495 } else { 496 iassert(instr->regs_count >= 2); 497 498 dst = instr->regs[0]; 499 src1 = instr->regs[1]; 500 src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL; 501 } 502 503 504 /* TODO we need a more comprehensive list about which instructions 505 * can be encoded which way. Or possibly use IR3_INSTR_0 flag to 506 * indicate to use the src_off encoding even if offset is zero 507 * (but then what to do about dst_off?) 508 */ 509 if (instr->cat6.src_offset || (instr->opc == OPC_LDG)) { 510 instr_cat6a_t *cat6a = ptr; 511 512 cat6->src_off = true; 513 514 cat6a->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED); 515 cat6a->src1_im = !!(src1->flags & IR3_REG_IMMED); 516 if (src2) { 517 cat6a->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED); 518 cat6a->src2_im = !!(src2->flags & IR3_REG_IMMED); 519 } 520 cat6a->off = instr->cat6.src_offset; 521 } else { 522 instr_cat6b_t *cat6b = ptr; 523 524 cat6->src_off = false; 525 526 cat6b->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED); 527 cat6b->src1_im = !!(src1->flags & IR3_REG_IMMED); 528 if (src2) { 529 cat6b->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED); 530 cat6b->src2_im = !!(src2->flags & IR3_REG_IMMED); 531 } 532 } 533 534 if (instr->cat6.dst_offset || (instr->opc == OPC_STG)) { 535 instr_cat6c_t *cat6c = ptr; 536 cat6->dst_off = true; 537 cat6c->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); 538 cat6c->off = instr->cat6.dst_offset; 539 } else { 540 instr_cat6d_t *cat6d = ptr; 541 cat6->dst_off = false; 542 cat6d->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); 543 } 544 545 cat6->type = instr->cat6.type; 546 cat6->opc = instr->opc; 547 cat6->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); 548 cat6->sync = !!(instr->flags & IR3_INSTR_SY); 549 cat6->g = !!(instr->flags & IR3_INSTR_G); 550 cat6->opc_cat = 6; 551 552 return 0; 553 } 554 555 static int (*emit[])(struct ir3_instruction *instr, void *ptr, 556 struct ir3_info *info) = { 557 emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6, 558 }; 559 560 void * ir3_assemble(struct ir3 *shader, struct ir3_info *info, 561 uint32_t gpu_id) 562 { 563 uint32_t *ptr, *dwords; 564 565 info->gpu_id = gpu_id; 566 info->max_reg = -1; 567 info->max_half_reg = -1; 568 info->max_const = -1; 569 info->instrs_count = 0; 570 info->sizedwords = 0; 571 572 list_for_each_entry (struct ir3_block, block, &shader->block_list, node) { 573 list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { 574 info->sizedwords += 2; 575 } 576 } 577 578 /* need an integer number of instruction "groups" (sets of 16 579 * instructions on a4xx or sets of 4 instructions on a3xx), 580 * so pad out w/ NOPs if needed: (NOTE each instruction is 64bits) 581 */ 582 if (gpu_id >= 400) { 583 info->sizedwords = align(info->sizedwords, 16 * 2); 584 } else { 585 info->sizedwords = align(info->sizedwords, 4 * 2); 586 } 587 588 ptr = dwords = calloc(4, info->sizedwords); 589 590 list_for_each_entry (struct ir3_block, block, &shader->block_list, node) { 591 list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { 592 int ret = emit[opc_cat(instr->opc)](instr, dwords, info); 593 if (ret) 594 goto fail; 595 info->instrs_count += 1 + instr->repeat; 596 dwords += 2; 597 } 598 } 599 600 return ptr; 601 602 fail: 603 free(ptr); 604 return NULL; 605 } 606 607 static struct ir3_register * reg_create(struct ir3 *shader, 608 int num, int flags) 609 { 610 struct ir3_register *reg = 611 ir3_alloc(shader, sizeof(struct ir3_register)); 612 reg->wrmask = 1; 613 reg->flags = flags; 614 reg->num = num; 615 return reg; 616 } 617 618 static void insert_instr(struct ir3_block *block, 619 struct ir3_instruction *instr) 620 { 621 struct ir3 *shader = block->shader; 622 #ifdef DEBUG 623 static uint32_t serialno = 0; 624 instr->serialno = ++serialno; 625 #endif 626 list_addtail(&instr->node, &block->instr_list); 627 628 if (is_input(instr)) 629 array_insert(shader->baryfs, instr); 630 } 631 632 struct ir3_block * ir3_block_create(struct ir3 *shader) 633 { 634 struct ir3_block *block = ir3_alloc(shader, sizeof(*block)); 635 #ifdef DEBUG 636 static uint32_t serialno = 0; 637 block->serialno = ++serialno; 638 #endif 639 block->shader = shader; 640 list_inithead(&block->node); 641 list_inithead(&block->instr_list); 642 return block; 643 } 644 645 static struct ir3_instruction *instr_create(struct ir3_block *block, int nreg) 646 { 647 struct ir3_instruction *instr; 648 unsigned sz = sizeof(*instr) + (nreg * sizeof(instr->regs[0])); 649 char *ptr = ir3_alloc(block->shader, sz); 650 651 instr = (struct ir3_instruction *)ptr; 652 ptr += sizeof(*instr); 653 instr->regs = (struct ir3_register **)ptr; 654 655 #ifdef DEBUG 656 instr->regs_max = nreg; 657 #endif 658 659 return instr; 660 } 661 662 struct ir3_instruction * ir3_instr_create2(struct ir3_block *block, 663 opc_t opc, int nreg) 664 { 665 struct ir3_instruction *instr = instr_create(block, nreg); 666 instr->block = block; 667 instr->opc = opc; 668 insert_instr(block, instr); 669 return instr; 670 } 671 672 struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc) 673 { 674 /* NOTE: we could be slightly more clever, at least for non-meta, 675 * and choose # of regs based on category. 676 */ 677 return ir3_instr_create2(block, opc, 4); 678 } 679 680 struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr) 681 { 682 struct ir3_instruction *new_instr = instr_create(instr->block, 683 instr->regs_count); 684 struct ir3_register **regs; 685 unsigned i; 686 687 regs = new_instr->regs; 688 *new_instr = *instr; 689 new_instr->regs = regs; 690 691 insert_instr(instr->block, new_instr); 692 693 /* clone registers: */ 694 new_instr->regs_count = 0; 695 for (i = 0; i < instr->regs_count; i++) { 696 struct ir3_register *reg = instr->regs[i]; 697 struct ir3_register *new_reg = 698 ir3_reg_create(new_instr, reg->num, reg->flags); 699 *new_reg = *reg; 700 } 701 702 return new_instr; 703 } 704 705 struct ir3_register * ir3_reg_create(struct ir3_instruction *instr, 706 int num, int flags) 707 { 708 struct ir3 *shader = instr->block->shader; 709 struct ir3_register *reg = reg_create(shader, num, flags); 710 #ifdef DEBUG 711 debug_assert(instr->regs_count < instr->regs_max); 712 #endif 713 instr->regs[instr->regs_count++] = reg; 714 return reg; 715 } 716 717 struct ir3_register * ir3_reg_clone(struct ir3 *shader, 718 struct ir3_register *reg) 719 { 720 struct ir3_register *new_reg = reg_create(shader, 0, 0); 721 *new_reg = *reg; 722 return new_reg; 723 } 724 725 void 726 ir3_instr_set_address(struct ir3_instruction *instr, 727 struct ir3_instruction *addr) 728 { 729 if (instr->address != addr) { 730 struct ir3 *ir = instr->block->shader; 731 instr->address = addr; 732 array_insert(ir->indirects, instr); 733 } 734 } 735 736 void 737 ir3_block_clear_mark(struct ir3_block *block) 738 { 739 list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) 740 instr->flags &= ~IR3_INSTR_MARK; 741 } 742 743 void 744 ir3_clear_mark(struct ir3 *ir) 745 { 746 list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { 747 ir3_block_clear_mark(block); 748 } 749 } 750 751 /* note: this will destroy instr->depth, don't do it until after sched! */ 752 unsigned 753 ir3_count_instructions(struct ir3 *ir) 754 { 755 unsigned cnt = 0; 756 list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { 757 list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { 758 instr->ip = cnt++; 759 } 760 block->start_ip = list_first_entry(&block->instr_list, struct ir3_instruction, node)->ip; 761 block->end_ip = list_last_entry(&block->instr_list, struct ir3_instruction, node)->ip; 762 } 763 return cnt; 764 } 765 766 struct ir3_array * 767 ir3_lookup_array(struct ir3 *ir, unsigned id) 768 { 769 list_for_each_entry (struct ir3_array, arr, &ir->array_list, node) 770 if (arr->id == id) 771 return arr; 772 return NULL; 773 } 774