1 /* 2 * Copyright (c) 2012 Rob Clark <robdclark (at) gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24 #include "ir3.h" 25 26 #include <stdlib.h> 27 #include <stdio.h> 28 #include <string.h> 29 #include <assert.h> 30 #include <stdbool.h> 31 #include <errno.h> 32 33 #include "util/ralloc.h" 34 35 #include "freedreno_util.h" 36 #include "instr-a3xx.h" 37 38 /* simple allocator to carve allocations out of an up-front allocated heap, 39 * so that we can free everything easily in one shot. 40 */ 41 void * ir3_alloc(struct ir3 *shader, int sz) 42 { 43 return rzalloc_size(shader, sz); /* TODO: don't use rzalloc */ 44 } 45 46 struct ir3 * ir3_create(struct ir3_compiler *compiler, 47 unsigned nin, unsigned nout) 48 { 49 struct ir3 *shader = rzalloc(compiler, struct ir3); 50 51 shader->compiler = compiler; 52 shader->ninputs = nin; 53 shader->inputs = ir3_alloc(shader, sizeof(shader->inputs[0]) * nin); 54 55 shader->noutputs = nout; 56 shader->outputs = ir3_alloc(shader, sizeof(shader->outputs[0]) * nout); 57 58 list_inithead(&shader->block_list); 59 list_inithead(&shader->array_list); 60 61 return shader; 62 } 63 64 void ir3_destroy(struct ir3 *shader) 65 { 66 ralloc_free(shader); 67 } 68 69 #define iassert(cond) do { \ 70 if (!(cond)) { \ 71 assert(cond); \ 72 return -1; \ 73 } } while (0) 74 75 static uint32_t reg(struct ir3_register *reg, struct ir3_info *info, 76 uint32_t repeat, uint32_t valid_flags) 77 { 78 reg_t val = { .dummy32 = 0 }; 79 80 if (reg->flags & ~valid_flags) { 81 debug_printf("INVALID FLAGS: %x vs %x\n", 82 reg->flags, valid_flags); 83 } 84 85 if (!(reg->flags & IR3_REG_R)) 86 repeat = 0; 87 88 if (reg->flags & IR3_REG_IMMED) { 89 val.iim_val = reg->iim_val; 90 } else { 91 unsigned components; 92 int16_t max; 93 94 if (reg->flags & IR3_REG_RELATIV) { 95 components = reg->size; 96 val.idummy10 = reg->array.offset; 97 max = (reg->array.offset + repeat + components - 1) >> 2; 98 } else { 99 components = util_last_bit(reg->wrmask); 100 val.comp = reg->num & 0x3; 101 val.num = reg->num >> 2; 102 max = (reg->num + repeat + components - 1) >> 2; 103 } 104 105 if (reg->flags & IR3_REG_CONST) { 106 info->max_const = MAX2(info->max_const, max); 107 } else if (val.num == 63) { 108 /* ignore writes to dummy register r63.x */ 109 } else if (max < 48) { 110 if (reg->flags & IR3_REG_HALF) { 111 info->max_half_reg = MAX2(info->max_half_reg, max); 112 } else { 113 info->max_reg = MAX2(info->max_reg, max); 114 } 115 } 116 } 117 118 return val.dummy32; 119 } 120 121 static int emit_cat0(struct ir3_instruction *instr, void *ptr, 122 struct ir3_info *info) 123 { 124 instr_cat0_t *cat0 = ptr; 125 126 if (info->gpu_id >= 500) { 127 cat0->a5xx.immed = instr->cat0.immed; 128 } else if (info->gpu_id >= 400) { 129 cat0->a4xx.immed = instr->cat0.immed; 130 } else { 131 cat0->a3xx.immed = instr->cat0.immed; 132 } 133 cat0->repeat = instr->repeat; 134 cat0->ss = !!(instr->flags & IR3_INSTR_SS); 135 cat0->inv = instr->cat0.inv; 136 cat0->comp = instr->cat0.comp; 137 cat0->opc = instr->opc; 138 cat0->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); 139 cat0->sync = !!(instr->flags & IR3_INSTR_SY); 140 cat0->opc_cat = 0; 141 142 return 0; 143 } 144 145 static uint32_t type_flags(type_t type) 146 { 147 return (type_size(type) == 32) ? 0 : IR3_REG_HALF; 148 } 149 150 static int emit_cat1(struct ir3_instruction *instr, void *ptr, 151 struct ir3_info *info) 152 { 153 struct ir3_register *dst = instr->regs[0]; 154 struct ir3_register *src = instr->regs[1]; 155 instr_cat1_t *cat1 = ptr; 156 157 iassert(instr->regs_count == 2); 158 iassert(!((dst->flags ^ type_flags(instr->cat1.dst_type)) & IR3_REG_HALF)); 159 iassert((src->flags & IR3_REG_IMMED) || 160 !((src->flags ^ type_flags(instr->cat1.src_type)) & IR3_REG_HALF)); 161 162 if (src->flags & IR3_REG_IMMED) { 163 cat1->iim_val = src->iim_val; 164 cat1->src_im = 1; 165 } else if (src->flags & IR3_REG_RELATIV) { 166 cat1->off = reg(src, info, instr->repeat, 167 IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF | IR3_REG_RELATIV); 168 cat1->src_rel = 1; 169 cat1->src_rel_c = !!(src->flags & IR3_REG_CONST); 170 } else { 171 cat1->src = reg(src, info, instr->repeat, 172 IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF); 173 cat1->src_c = !!(src->flags & IR3_REG_CONST); 174 } 175 176 cat1->dst = reg(dst, info, instr->repeat, 177 IR3_REG_RELATIV | IR3_REG_EVEN | 178 IR3_REG_R | IR3_REG_POS_INF | IR3_REG_HALF); 179 cat1->repeat = instr->repeat; 180 cat1->src_r = !!(src->flags & IR3_REG_R); 181 cat1->ss = !!(instr->flags & IR3_INSTR_SS); 182 cat1->ul = !!(instr->flags & IR3_INSTR_UL); 183 cat1->dst_type = instr->cat1.dst_type; 184 cat1->dst_rel = !!(dst->flags & IR3_REG_RELATIV); 185 cat1->src_type = instr->cat1.src_type; 186 cat1->even = !!(dst->flags & IR3_REG_EVEN); 187 cat1->pos_inf = !!(dst->flags & IR3_REG_POS_INF); 188 cat1->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); 189 cat1->sync = !!(instr->flags & IR3_INSTR_SY); 190 cat1->opc_cat = 1; 191 192 return 0; 193 } 194 195 static int emit_cat2(struct ir3_instruction *instr, void *ptr, 196 struct ir3_info *info) 197 { 198 struct ir3_register *dst = instr->regs[0]; 199 struct ir3_register *src1 = instr->regs[1]; 200 struct ir3_register *src2 = instr->regs[2]; 201 instr_cat2_t *cat2 = ptr; 202 unsigned absneg = ir3_cat2_absneg(instr->opc); 203 204 iassert((instr->regs_count == 2) || (instr->regs_count == 3)); 205 206 if (src1->flags & IR3_REG_RELATIV) { 207 iassert(src1->array.offset < (1 << 10)); 208 cat2->rel1.src1 = reg(src1, info, instr->repeat, 209 IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R | 210 IR3_REG_HALF | absneg); 211 cat2->rel1.src1_c = !!(src1->flags & IR3_REG_CONST); 212 cat2->rel1.src1_rel = 1; 213 } else if (src1->flags & IR3_REG_CONST) { 214 iassert(src1->num < (1 << 12)); 215 cat2->c1.src1 = reg(src1, info, instr->repeat, 216 IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF); 217 cat2->c1.src1_c = 1; 218 } else { 219 iassert(src1->num < (1 << 11)); 220 cat2->src1 = reg(src1, info, instr->repeat, 221 IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF | 222 absneg); 223 } 224 cat2->src1_im = !!(src1->flags & IR3_REG_IMMED); 225 cat2->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); 226 cat2->src1_abs = !!(src1->flags & (IR3_REG_FABS | IR3_REG_SABS)); 227 cat2->src1_r = !!(src1->flags & IR3_REG_R); 228 229 if (src2) { 230 iassert((src2->flags & IR3_REG_IMMED) || 231 !((src1->flags ^ src2->flags) & IR3_REG_HALF)); 232 233 if (src2->flags & IR3_REG_RELATIV) { 234 iassert(src2->array.offset < (1 << 10)); 235 cat2->rel2.src2 = reg(src2, info, instr->repeat, 236 IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R | 237 IR3_REG_HALF | absneg); 238 cat2->rel2.src2_c = !!(src2->flags & IR3_REG_CONST); 239 cat2->rel2.src2_rel = 1; 240 } else if (src2->flags & IR3_REG_CONST) { 241 iassert(src2->num < (1 << 12)); 242 cat2->c2.src2 = reg(src2, info, instr->repeat, 243 IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF); 244 cat2->c2.src2_c = 1; 245 } else { 246 iassert(src2->num < (1 << 11)); 247 cat2->src2 = reg(src2, info, instr->repeat, 248 IR3_REG_IMMED | IR3_REG_R | IR3_REG_HALF | 249 absneg); 250 } 251 252 cat2->src2_im = !!(src2->flags & IR3_REG_IMMED); 253 cat2->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); 254 cat2->src2_abs = !!(src2->flags & (IR3_REG_FABS | IR3_REG_SABS)); 255 cat2->src2_r = !!(src2->flags & IR3_REG_R); 256 } 257 258 cat2->dst = reg(dst, info, instr->repeat, 259 IR3_REG_R | IR3_REG_EI | IR3_REG_HALF); 260 cat2->repeat = instr->repeat; 261 cat2->ss = !!(instr->flags & IR3_INSTR_SS); 262 cat2->ul = !!(instr->flags & IR3_INSTR_UL); 263 cat2->dst_half = !!((src1->flags ^ dst->flags) & IR3_REG_HALF); 264 cat2->ei = !!(dst->flags & IR3_REG_EI); 265 cat2->cond = instr->cat2.condition; 266 cat2->full = ! (src1->flags & IR3_REG_HALF); 267 cat2->opc = instr->opc; 268 cat2->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); 269 cat2->sync = !!(instr->flags & IR3_INSTR_SY); 270 cat2->opc_cat = 2; 271 272 return 0; 273 } 274 275 static int emit_cat3(struct ir3_instruction *instr, void *ptr, 276 struct ir3_info *info) 277 { 278 struct ir3_register *dst = instr->regs[0]; 279 struct ir3_register *src1 = instr->regs[1]; 280 struct ir3_register *src2 = instr->regs[2]; 281 struct ir3_register *src3 = instr->regs[3]; 282 unsigned absneg = ir3_cat3_absneg(instr->opc); 283 instr_cat3_t *cat3 = ptr; 284 uint32_t src_flags = 0; 285 286 switch (instr->opc) { 287 case OPC_MAD_F16: 288 case OPC_MAD_U16: 289 case OPC_MAD_S16: 290 case OPC_SEL_B16: 291 case OPC_SEL_S16: 292 case OPC_SEL_F16: 293 case OPC_SAD_S16: 294 case OPC_SAD_S32: // really?? 295 src_flags |= IR3_REG_HALF; 296 break; 297 default: 298 break; 299 } 300 301 iassert(instr->regs_count == 4); 302 iassert(!((src1->flags ^ src_flags) & IR3_REG_HALF)); 303 iassert(!((src2->flags ^ src_flags) & IR3_REG_HALF)); 304 iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF)); 305 306 if (src1->flags & IR3_REG_RELATIV) { 307 iassert(src1->array.offset < (1 << 10)); 308 cat3->rel1.src1 = reg(src1, info, instr->repeat, 309 IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R | 310 IR3_REG_HALF | absneg); 311 cat3->rel1.src1_c = !!(src1->flags & IR3_REG_CONST); 312 cat3->rel1.src1_rel = 1; 313 } else if (src1->flags & IR3_REG_CONST) { 314 iassert(src1->num < (1 << 12)); 315 cat3->c1.src1 = reg(src1, info, instr->repeat, 316 IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF); 317 cat3->c1.src1_c = 1; 318 } else { 319 iassert(src1->num < (1 << 11)); 320 cat3->src1 = reg(src1, info, instr->repeat, 321 IR3_REG_R | IR3_REG_HALF | absneg); 322 } 323 324 cat3->src1_neg = !!(src1->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); 325 cat3->src1_r = !!(src1->flags & IR3_REG_R); 326 327 cat3->src2 = reg(src2, info, instr->repeat, 328 IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF | absneg); 329 cat3->src2_c = !!(src2->flags & IR3_REG_CONST); 330 cat3->src2_neg = !!(src2->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); 331 cat3->src2_r = !!(src2->flags & IR3_REG_R); 332 333 334 if (src3->flags & IR3_REG_RELATIV) { 335 iassert(src3->array.offset < (1 << 10)); 336 cat3->rel2.src3 = reg(src3, info, instr->repeat, 337 IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_R | 338 IR3_REG_HALF | absneg); 339 cat3->rel2.src3_c = !!(src3->flags & IR3_REG_CONST); 340 cat3->rel2.src3_rel = 1; 341 } else if (src3->flags & IR3_REG_CONST) { 342 iassert(src3->num < (1 << 12)); 343 cat3->c2.src3 = reg(src3, info, instr->repeat, 344 IR3_REG_CONST | IR3_REG_R | IR3_REG_HALF); 345 cat3->c2.src3_c = 1; 346 } else { 347 iassert(src3->num < (1 << 11)); 348 cat3->src3 = reg(src3, info, instr->repeat, 349 IR3_REG_R | IR3_REG_HALF | absneg); 350 } 351 352 cat3->src3_neg = !!(src3->flags & (IR3_REG_FNEG | IR3_REG_SNEG | IR3_REG_BNOT)); 353 cat3->src3_r = !!(src3->flags & IR3_REG_R); 354 355 cat3->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); 356 cat3->repeat = instr->repeat; 357 cat3->ss = !!(instr->flags & IR3_INSTR_SS); 358 cat3->ul = !!(instr->flags & IR3_INSTR_UL); 359 cat3->dst_half = !!((src_flags ^ dst->flags) & IR3_REG_HALF); 360 cat3->opc = instr->opc; 361 cat3->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); 362 cat3->sync = !!(instr->flags & IR3_INSTR_SY); 363 cat3->opc_cat = 3; 364 365 return 0; 366 } 367 368 static int emit_cat4(struct ir3_instruction *instr, void *ptr, 369 struct ir3_info *info) 370 { 371 struct ir3_register *dst = instr->regs[0]; 372 struct ir3_register *src = instr->regs[1]; 373 instr_cat4_t *cat4 = ptr; 374 375 iassert(instr->regs_count == 2); 376 377 if (src->flags & IR3_REG_RELATIV) { 378 iassert(src->array.offset < (1 << 10)); 379 cat4->rel.src = reg(src, info, instr->repeat, 380 IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_FNEG | 381 IR3_REG_FABS | IR3_REG_R | IR3_REG_HALF); 382 cat4->rel.src_c = !!(src->flags & IR3_REG_CONST); 383 cat4->rel.src_rel = 1; 384 } else if (src->flags & IR3_REG_CONST) { 385 iassert(src->num < (1 << 12)); 386 cat4->c.src = reg(src, info, instr->repeat, 387 IR3_REG_CONST | IR3_REG_FNEG | IR3_REG_FABS | 388 IR3_REG_R | IR3_REG_HALF); 389 cat4->c.src_c = 1; 390 } else { 391 iassert(src->num < (1 << 11)); 392 cat4->src = reg(src, info, instr->repeat, 393 IR3_REG_IMMED | IR3_REG_FNEG | IR3_REG_FABS | 394 IR3_REG_R | IR3_REG_HALF); 395 } 396 397 cat4->src_im = !!(src->flags & IR3_REG_IMMED); 398 cat4->src_neg = !!(src->flags & IR3_REG_FNEG); 399 cat4->src_abs = !!(src->flags & IR3_REG_FABS); 400 cat4->src_r = !!(src->flags & IR3_REG_R); 401 402 cat4->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); 403 cat4->repeat = instr->repeat; 404 cat4->ss = !!(instr->flags & IR3_INSTR_SS); 405 cat4->ul = !!(instr->flags & IR3_INSTR_UL); 406 cat4->dst_half = !!((src->flags ^ dst->flags) & IR3_REG_HALF); 407 cat4->full = ! (src->flags & IR3_REG_HALF); 408 cat4->opc = instr->opc; 409 cat4->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); 410 cat4->sync = !!(instr->flags & IR3_INSTR_SY); 411 cat4->opc_cat = 4; 412 413 return 0; 414 } 415 416 static int emit_cat5(struct ir3_instruction *instr, void *ptr, 417 struct ir3_info *info) 418 { 419 struct ir3_register *dst = instr->regs[0]; 420 struct ir3_register *src1 = instr->regs[1]; 421 struct ir3_register *src2 = instr->regs[2]; 422 struct ir3_register *src3 = instr->regs[3]; 423 instr_cat5_t *cat5 = ptr; 424 425 iassert(!((dst->flags ^ type_flags(instr->cat5.type)) & IR3_REG_HALF)); 426 427 assume(src1 || !src2); 428 assume(src2 || !src3); 429 430 if (src1) { 431 cat5->full = ! (src1->flags & IR3_REG_HALF); 432 cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF); 433 } 434 435 if (instr->flags & IR3_INSTR_S2EN) { 436 if (src2) { 437 iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF)); 438 cat5->s2en.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF); 439 } 440 if (src3) { 441 iassert(src3->flags & IR3_REG_HALF); 442 cat5->s2en.src3 = reg(src3, info, instr->repeat, IR3_REG_HALF); 443 } 444 iassert(!(instr->cat5.samp | instr->cat5.tex)); 445 } else { 446 iassert(!src3); 447 if (src2) { 448 iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF)); 449 cat5->norm.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF); 450 } 451 cat5->norm.samp = instr->cat5.samp; 452 cat5->norm.tex = instr->cat5.tex; 453 } 454 455 cat5->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); 456 cat5->wrmask = dst->wrmask; 457 cat5->type = instr->cat5.type; 458 cat5->is_3d = !!(instr->flags & IR3_INSTR_3D); 459 cat5->is_a = !!(instr->flags & IR3_INSTR_A); 460 cat5->is_s = !!(instr->flags & IR3_INSTR_S); 461 cat5->is_s2en = !!(instr->flags & IR3_INSTR_S2EN); 462 cat5->is_o = !!(instr->flags & IR3_INSTR_O); 463 cat5->is_p = !!(instr->flags & IR3_INSTR_P); 464 cat5->opc = instr->opc; 465 cat5->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); 466 cat5->sync = !!(instr->flags & IR3_INSTR_SY); 467 cat5->opc_cat = 5; 468 469 return 0; 470 } 471 472 static int emit_cat6(struct ir3_instruction *instr, void *ptr, 473 struct ir3_info *info) 474 { 475 struct ir3_register *dst, *src1, *src2; 476 instr_cat6_t *cat6 = ptr; 477 478 cat6->type = instr->cat6.type; 479 cat6->opc = instr->opc; 480 cat6->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); 481 cat6->sync = !!(instr->flags & IR3_INSTR_SY); 482 cat6->g = !!(instr->flags & IR3_INSTR_G); 483 cat6->opc_cat = 6; 484 485 /* the "dst" for a store instruction is (from the perspective 486 * of data flow in the shader, ie. register use/def, etc) in 487 * fact a register that is read by the instruction, rather 488 * than written: 489 */ 490 if (is_store(instr)) { 491 iassert(instr->regs_count >= 3); 492 493 dst = instr->regs[1]; 494 src1 = instr->regs[2]; 495 src2 = (instr->regs_count >= 4) ? instr->regs[3] : NULL; 496 } else { 497 iassert(instr->regs_count >= 2); 498 499 dst = instr->regs[0]; 500 src1 = instr->regs[1]; 501 src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL; 502 } 503 504 /* TODO we need a more comprehensive list about which instructions 505 * can be encoded which way. Or possibly use IR3_INSTR_0 flag to 506 * indicate to use the src_off encoding even if offset is zero 507 * (but then what to do about dst_off?) 508 */ 509 if (is_atomic(instr->opc)) { 510 instr_cat6ldgb_t *ldgb = ptr; 511 512 /* maybe these two bits both determine the instruction encoding? */ 513 cat6->src_off = false; 514 515 ldgb->d = instr->cat6.d - 1; 516 ldgb->typed = instr->cat6.typed; 517 ldgb->type_size = instr->cat6.iim_val - 1; 518 519 ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); 520 521 if (ldgb->g) { 522 struct ir3_register *src3 = instr->regs[3]; 523 struct ir3_register *src4 = instr->regs[4]; 524 525 /* first src is src_ssbo: */ 526 iassert(src1->flags & IR3_REG_IMMED); 527 ldgb->src_ssbo = src1->uim_val; 528 529 ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED); 530 ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED); 531 ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED); 532 ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED); 533 534 ldgb->src3 = reg(src4, info, instr->repeat, 0); 535 ldgb->pad0 = 0x1; 536 ldgb->pad3 = 0x1; 537 } else { 538 ldgb->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED); 539 ldgb->src1_im = !!(src1->flags & IR3_REG_IMMED); 540 ldgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED); 541 ldgb->src2_im = !!(src2->flags & IR3_REG_IMMED); 542 ldgb->pad0 = 0x1; 543 ldgb->pad3 = 0x0; 544 } 545 546 return 0; 547 } else if (instr->opc == OPC_LDGB) { 548 struct ir3_register *src3 = instr->regs[3]; 549 instr_cat6ldgb_t *ldgb = ptr; 550 551 /* maybe these two bits both determine the instruction encoding? */ 552 cat6->src_off = false; 553 554 ldgb->d = instr->cat6.d - 1; 555 ldgb->typed = instr->cat6.typed; 556 ldgb->type_size = instr->cat6.iim_val - 1; 557 558 ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); 559 560 /* first src is src_ssbo: */ 561 iassert(src1->flags & IR3_REG_IMMED); 562 ldgb->src_ssbo = src1->uim_val; 563 564 /* then next two are src1/src2: */ 565 ldgb->src1 = reg(src2, info, instr->repeat, IR3_REG_IMMED); 566 ldgb->src1_im = !!(src2->flags & IR3_REG_IMMED); 567 ldgb->src2 = reg(src3, info, instr->repeat, IR3_REG_IMMED); 568 ldgb->src2_im = !!(src3->flags & IR3_REG_IMMED); 569 570 ldgb->pad0 = 0x0; 571 ldgb->pad3 = 0x1; 572 573 return 0; 574 } else if (instr->opc == OPC_RESINFO) { 575 instr_cat6ldgb_t *ldgb = ptr; 576 577 ldgb->d = instr->cat6.d - 1; 578 579 ldgb->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); 580 581 /* first src is src_ssbo: */ 582 iassert(src1->flags & IR3_REG_IMMED); 583 ldgb->src_ssbo = src1->uim_val; 584 585 return 0; 586 } else if ((instr->opc == OPC_STGB) || (instr->opc == OPC_STIB)) { 587 struct ir3_register *src3 = instr->regs[4]; 588 instr_cat6stgb_t *stgb = ptr; 589 590 /* maybe these two bits both determine the instruction encoding? */ 591 cat6->src_off = true; 592 stgb->pad3 = 0x2; 593 594 stgb->d = instr->cat6.d - 1; 595 stgb->typed = instr->cat6.typed; 596 stgb->type_size = instr->cat6.iim_val - 1; 597 598 /* first src is dst_ssbo: */ 599 iassert(dst->flags & IR3_REG_IMMED); 600 stgb->dst_ssbo = dst->uim_val; 601 602 /* then src1/src2/src3: */ 603 stgb->src1 = reg(src1, info, instr->repeat, 0); 604 stgb->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED); 605 stgb->src2_im = !!(src2->flags & IR3_REG_IMMED); 606 stgb->src3 = reg(src3, info, instr->repeat, IR3_REG_IMMED); 607 stgb->src3_im = !!(src3->flags & IR3_REG_IMMED); 608 609 return 0; 610 } else if (instr->cat6.src_offset || (instr->opc == OPC_LDG) || 611 (instr->opc == OPC_LDL)) { 612 instr_cat6a_t *cat6a = ptr; 613 614 cat6->src_off = true; 615 616 cat6a->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED); 617 cat6a->src1_im = !!(src1->flags & IR3_REG_IMMED); 618 if (src2) { 619 cat6a->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED); 620 cat6a->src2_im = !!(src2->flags & IR3_REG_IMMED); 621 } 622 cat6a->off = instr->cat6.src_offset; 623 } else { 624 instr_cat6b_t *cat6b = ptr; 625 626 cat6->src_off = false; 627 628 cat6b->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED); 629 cat6b->src1_im = !!(src1->flags & IR3_REG_IMMED); 630 if (src2) { 631 cat6b->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED); 632 cat6b->src2_im = !!(src2->flags & IR3_REG_IMMED); 633 } 634 } 635 636 if (instr->cat6.dst_offset || (instr->opc == OPC_STG) || 637 (instr->opc == OPC_STL)) { 638 instr_cat6c_t *cat6c = ptr; 639 cat6->dst_off = true; 640 cat6c->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); 641 cat6c->off = instr->cat6.dst_offset; 642 } else { 643 instr_cat6d_t *cat6d = ptr; 644 cat6->dst_off = false; 645 cat6d->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF); 646 } 647 648 return 0; 649 } 650 651 static int emit_cat7(struct ir3_instruction *instr, void *ptr, 652 struct ir3_info *info) 653 { 654 instr_cat7_t *cat7 = ptr; 655 656 cat7->ss = !!(instr->flags & IR3_INSTR_SS); 657 cat7->w = instr->cat7.w; 658 cat7->r = instr->cat7.r; 659 cat7->l = instr->cat7.l; 660 cat7->g = instr->cat7.g; 661 cat7->opc = instr->opc; 662 cat7->jmp_tgt = !!(instr->flags & IR3_INSTR_JP); 663 cat7->sync = !!(instr->flags & IR3_INSTR_SY); 664 cat7->opc_cat = 7; 665 666 return 0; 667 } 668 669 static int (*emit[])(struct ir3_instruction *instr, void *ptr, 670 struct ir3_info *info) = { 671 emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6, 672 emit_cat7, 673 }; 674 675 void * ir3_assemble(struct ir3 *shader, struct ir3_info *info, 676 uint32_t gpu_id) 677 { 678 uint32_t *ptr, *dwords; 679 680 info->gpu_id = gpu_id; 681 info->max_reg = -1; 682 info->max_half_reg = -1; 683 info->max_const = -1; 684 info->instrs_count = 0; 685 info->sizedwords = 0; 686 687 list_for_each_entry (struct ir3_block, block, &shader->block_list, node) { 688 list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { 689 info->sizedwords += 2; 690 } 691 } 692 693 /* need an integer number of instruction "groups" (sets of 16 694 * instructions on a4xx or sets of 4 instructions on a3xx), 695 * so pad out w/ NOPs if needed: (NOTE each instruction is 64bits) 696 */ 697 if (gpu_id >= 400) { 698 info->sizedwords = align(info->sizedwords, 16 * 2); 699 } else { 700 info->sizedwords = align(info->sizedwords, 4 * 2); 701 } 702 703 ptr = dwords = calloc(4, info->sizedwords); 704 705 list_for_each_entry (struct ir3_block, block, &shader->block_list, node) { 706 list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { 707 int ret = emit[opc_cat(instr->opc)](instr, dwords, info); 708 if (ret) 709 goto fail; 710 info->instrs_count += 1 + instr->repeat; 711 dwords += 2; 712 } 713 } 714 715 return ptr; 716 717 fail: 718 free(ptr); 719 return NULL; 720 } 721 722 static struct ir3_register * reg_create(struct ir3 *shader, 723 int num, int flags) 724 { 725 struct ir3_register *reg = 726 ir3_alloc(shader, sizeof(struct ir3_register)); 727 reg->wrmask = 1; 728 reg->flags = flags; 729 reg->num = num; 730 return reg; 731 } 732 733 static void insert_instr(struct ir3_block *block, 734 struct ir3_instruction *instr) 735 { 736 struct ir3 *shader = block->shader; 737 #ifdef DEBUG 738 static uint32_t serialno = 0; 739 instr->serialno = ++serialno; 740 #endif 741 list_addtail(&instr->node, &block->instr_list); 742 743 if (is_input(instr)) 744 array_insert(shader, shader->baryfs, instr); 745 } 746 747 struct ir3_block * ir3_block_create(struct ir3 *shader) 748 { 749 struct ir3_block *block = ir3_alloc(shader, sizeof(*block)); 750 #ifdef DEBUG 751 static uint32_t serialno = 0; 752 block->serialno = ++serialno; 753 #endif 754 block->shader = shader; 755 list_inithead(&block->node); 756 list_inithead(&block->instr_list); 757 return block; 758 } 759 760 static struct ir3_instruction *instr_create(struct ir3_block *block, int nreg) 761 { 762 struct ir3_instruction *instr; 763 unsigned sz = sizeof(*instr) + (nreg * sizeof(instr->regs[0])); 764 char *ptr = ir3_alloc(block->shader, sz); 765 766 instr = (struct ir3_instruction *)ptr; 767 ptr += sizeof(*instr); 768 instr->regs = (struct ir3_register **)ptr; 769 770 #ifdef DEBUG 771 instr->regs_max = nreg; 772 #endif 773 774 return instr; 775 } 776 777 struct ir3_instruction * ir3_instr_create2(struct ir3_block *block, 778 opc_t opc, int nreg) 779 { 780 struct ir3_instruction *instr = instr_create(block, nreg); 781 instr->block = block; 782 instr->opc = opc; 783 insert_instr(block, instr); 784 return instr; 785 } 786 787 struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc) 788 { 789 /* NOTE: we could be slightly more clever, at least for non-meta, 790 * and choose # of regs based on category. 791 */ 792 return ir3_instr_create2(block, opc, 4); 793 } 794 795 struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr) 796 { 797 struct ir3_instruction *new_instr = instr_create(instr->block, 798 instr->regs_count); 799 struct ir3_register **regs; 800 unsigned i; 801 802 regs = new_instr->regs; 803 *new_instr = *instr; 804 new_instr->regs = regs; 805 806 insert_instr(instr->block, new_instr); 807 808 /* clone registers: */ 809 new_instr->regs_count = 0; 810 for (i = 0; i < instr->regs_count; i++) { 811 struct ir3_register *reg = instr->regs[i]; 812 struct ir3_register *new_reg = 813 ir3_reg_create(new_instr, reg->num, reg->flags); 814 *new_reg = *reg; 815 } 816 817 return new_instr; 818 } 819 820 /* Add a false dependency to instruction, to ensure it is scheduled first: */ 821 void ir3_instr_add_dep(struct ir3_instruction *instr, struct ir3_instruction *dep) 822 { 823 array_insert(instr, instr->deps, dep); 824 } 825 826 struct ir3_register * ir3_reg_create(struct ir3_instruction *instr, 827 int num, int flags) 828 { 829 struct ir3 *shader = instr->block->shader; 830 struct ir3_register *reg = reg_create(shader, num, flags); 831 #ifdef DEBUG 832 debug_assert(instr->regs_count < instr->regs_max); 833 #endif 834 instr->regs[instr->regs_count++] = reg; 835 return reg; 836 } 837 838 struct ir3_register * ir3_reg_clone(struct ir3 *shader, 839 struct ir3_register *reg) 840 { 841 struct ir3_register *new_reg = reg_create(shader, 0, 0); 842 *new_reg = *reg; 843 return new_reg; 844 } 845 846 void 847 ir3_instr_set_address(struct ir3_instruction *instr, 848 struct ir3_instruction *addr) 849 { 850 if (instr->address != addr) { 851 struct ir3 *ir = instr->block->shader; 852 instr->address = addr; 853 array_insert(ir, ir->indirects, instr); 854 } 855 } 856 857 void 858 ir3_block_clear_mark(struct ir3_block *block) 859 { 860 list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) 861 instr->flags &= ~IR3_INSTR_MARK; 862 } 863 864 void 865 ir3_clear_mark(struct ir3 *ir) 866 { 867 list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { 868 ir3_block_clear_mark(block); 869 } 870 } 871 872 /* note: this will destroy instr->depth, don't do it until after sched! */ 873 unsigned 874 ir3_count_instructions(struct ir3 *ir) 875 { 876 unsigned cnt = 0; 877 list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { 878 list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) { 879 instr->ip = cnt++; 880 } 881 block->start_ip = list_first_entry(&block->instr_list, struct ir3_instruction, node)->ip; 882 block->end_ip = list_last_entry(&block->instr_list, struct ir3_instruction, node)->ip; 883 } 884 return cnt; 885 } 886 887 struct ir3_array * 888 ir3_lookup_array(struct ir3 *ir, unsigned id) 889 { 890 list_for_each_entry (struct ir3_array, arr, &ir->array_list, node) 891 if (arr->id == id) 892 return arr; 893 return NULL; 894 } 895