1 /* 2 * Copyright 2015 Intel Corporation 3 * Copyright 2014-2015 Broadcom 4 * Copyright (C) 2014 Rob Clark <robclark (at) freedesktop.org> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 23 * IN THE SOFTWARE. 24 */ 25 26 #include "compiler/nir/nir.h" 27 #include "compiler/nir/nir_builder.h" 28 #include "compiler/glsl/list.h" 29 #include "main/imports.h" 30 #include "util/ralloc.h" 31 32 #include "prog_to_nir.h" 33 #include "prog_instruction.h" 34 #include "prog_parameter.h" 35 #include "prog_print.h" 36 #include "program.h" 37 38 /** 39 * \file prog_to_nir.c 40 * 41 * A translator from Mesa IR (prog_instruction.h) to NIR. This is primarily 42 * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function 43 * vertex processing. Full GLSL support should use glsl_to_nir instead. 44 */ 45 46 struct ptn_compile { 47 const struct gl_program *prog; 48 nir_builder build; 49 bool error; 50 51 nir_variable *parameters; 52 nir_variable *input_vars[VARYING_SLOT_MAX]; 53 nir_variable *output_vars[VARYING_SLOT_MAX]; 54 nir_register **output_regs; 55 nir_register **temp_regs; 56 57 nir_register *addr_reg; 58 }; 59 60 #define SWIZ(X, Y, Z, W) \ 61 (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W } 62 #define ptn_channel(b, src, ch) nir_swizzle(b, src, SWIZ(ch, ch, ch, ch), 1, true) 63 64 static nir_ssa_def * 65 ptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest) 66 { 67 nir_builder *b = &c->build; 68 69 nir_alu_src src; 70 memset(&src, 0, sizeof(src)); 71 72 if (dest->dest.is_ssa) 73 src.src = nir_src_for_ssa(&dest->dest.ssa); 74 else { 75 assert(!dest->dest.reg.indirect); 76 src.src = nir_src_for_reg(dest->dest.reg.reg); 77 src.src.reg.base_offset = dest->dest.reg.base_offset; 78 } 79 80 for (int i = 0; i < 4; i++) 81 src.swizzle[i] = i; 82 83 return nir_fmov_alu(b, src, 4); 84 } 85 86 static nir_alu_dest 87 ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst) 88 { 89 nir_alu_dest dest; 90 91 memset(&dest, 0, sizeof(dest)); 92 93 switch (prog_dst->File) { 94 case PROGRAM_TEMPORARY: 95 dest.dest.reg.reg = c->temp_regs[prog_dst->Index]; 96 break; 97 case PROGRAM_OUTPUT: 98 dest.dest.reg.reg = c->output_regs[prog_dst->Index]; 99 break; 100 case PROGRAM_ADDRESS: 101 assert(prog_dst->Index == 0); 102 dest.dest.reg.reg = c->addr_reg; 103 break; 104 case PROGRAM_UNDEFINED: 105 break; 106 } 107 108 dest.write_mask = prog_dst->WriteMask; 109 dest.saturate = false; 110 111 assert(!prog_dst->RelAddr); 112 113 return dest; 114 } 115 116 static nir_ssa_def * 117 ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src) 118 { 119 nir_builder *b = &c->build; 120 nir_alu_src src; 121 122 memset(&src, 0, sizeof(src)); 123 124 switch (prog_src->File) { 125 case PROGRAM_UNDEFINED: 126 return nir_imm_float(b, 0.0); 127 case PROGRAM_TEMPORARY: 128 assert(!prog_src->RelAddr && prog_src->Index >= 0); 129 src.src.reg.reg = c->temp_regs[prog_src->Index]; 130 break; 131 case PROGRAM_INPUT: { 132 /* ARB_vertex_program doesn't allow relative addressing on vertex 133 * attributes; ARB_fragment_program has no relative addressing at all. 134 */ 135 assert(!prog_src->RelAddr); 136 137 assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX); 138 139 nir_intrinsic_instr *load = 140 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); 141 load->num_components = 4; 142 load->variables[0] = nir_deref_var_create(load, c->input_vars[prog_src->Index]); 143 144 nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL); 145 nir_builder_instr_insert(b, &load->instr); 146 147 src.src = nir_src_for_ssa(&load->dest.ssa); 148 break; 149 } 150 case PROGRAM_STATE_VAR: 151 case PROGRAM_CONSTANT: { 152 /* We actually want to look at the type in the Parameters list for this, 153 * because it lets us upload constant builtin uniforms as actual 154 * constants. 155 */ 156 struct gl_program_parameter_list *plist = c->prog->Parameters; 157 gl_register_file file = prog_src->RelAddr ? prog_src->File : 158 plist->Parameters[prog_src->Index].Type; 159 160 switch (file) { 161 case PROGRAM_CONSTANT: 162 if ((c->prog->arb.IndirectRegisterFiles & 163 (1 << PROGRAM_CONSTANT)) == 0) { 164 float *v = (float *) plist->ParameterValues[prog_src->Index]; 165 src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3])); 166 break; 167 } 168 /* FALLTHROUGH */ 169 case PROGRAM_STATE_VAR: { 170 assert(c->parameters != NULL); 171 172 nir_intrinsic_instr *load = 173 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); 174 nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL); 175 load->num_components = 4; 176 177 load->variables[0] = nir_deref_var_create(load, c->parameters); 178 nir_deref_array *deref_arr = 179 nir_deref_array_create(load->variables[0]); 180 deref_arr->deref.type = glsl_vec4_type(); 181 load->variables[0]->deref.child = &deref_arr->deref; 182 183 if (prog_src->RelAddr) { 184 deref_arr->deref_array_type = nir_deref_array_type_indirect; 185 186 nir_alu_src addr_src = { NIR_SRC_INIT }; 187 addr_src.src = nir_src_for_reg(c->addr_reg); 188 nir_ssa_def *reladdr = nir_imov_alu(b, addr_src, 1); 189 190 if (prog_src->Index < 0) { 191 /* This is a negative offset which should be added to the address 192 * register's value. 193 */ 194 reladdr = nir_iadd(b, reladdr, nir_imm_int(b, prog_src->Index)); 195 196 deref_arr->base_offset = 0; 197 } else { 198 deref_arr->base_offset = prog_src->Index; 199 } 200 deref_arr->indirect = nir_src_for_ssa(reladdr); 201 } else { 202 deref_arr->deref_array_type = nir_deref_array_type_direct; 203 deref_arr->base_offset = prog_src->Index; 204 } 205 206 nir_builder_instr_insert(b, &load->instr); 207 208 src.src = nir_src_for_ssa(&load->dest.ssa); 209 break; 210 } 211 default: 212 fprintf(stderr, "bad uniform src register file: %s (%d)\n", 213 _mesa_register_file_name(file), file); 214 abort(); 215 } 216 break; 217 } 218 default: 219 fprintf(stderr, "unknown src register file: %s (%d)\n", 220 _mesa_register_file_name(prog_src->File), prog_src->File); 221 abort(); 222 } 223 224 nir_ssa_def *def; 225 if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) && 226 (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) { 227 /* The simple non-SWZ case. */ 228 for (int i = 0; i < 4; i++) 229 src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i); 230 231 def = nir_fmov_alu(b, src, 4); 232 233 if (prog_src->Negate) 234 def = nir_fneg(b, def); 235 } else { 236 /* The SWZ instruction allows per-component zero/one swizzles, and also 237 * per-component negation. 238 */ 239 nir_ssa_def *chans[4]; 240 for (int i = 0; i < 4; i++) { 241 int swizzle = GET_SWZ(prog_src->Swizzle, i); 242 if (swizzle == SWIZZLE_ZERO) { 243 chans[i] = nir_imm_float(b, 0.0); 244 } else if (swizzle == SWIZZLE_ONE) { 245 chans[i] = nir_imm_float(b, 1.0); 246 } else { 247 assert(swizzle != SWIZZLE_NIL); 248 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov); 249 nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, 32, NULL); 250 mov->dest.write_mask = 0x1; 251 mov->src[0] = src; 252 mov->src[0].swizzle[0] = swizzle; 253 nir_builder_instr_insert(b, &mov->instr); 254 255 chans[i] = &mov->dest.dest.ssa; 256 } 257 258 if (prog_src->Negate & (1 << i)) 259 chans[i] = nir_fneg(b, chans[i]); 260 } 261 def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]); 262 } 263 264 return def; 265 } 266 267 static void 268 ptn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src) 269 { 270 unsigned num_srcs = nir_op_infos[op].num_inputs; 271 nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); 272 unsigned i; 273 274 for (i = 0; i < num_srcs; i++) 275 instr->src[i].src = nir_src_for_ssa(src[i]); 276 277 instr->dest = dest; 278 nir_builder_instr_insert(b, &instr->instr); 279 } 280 281 static void 282 ptn_move_dest_masked(nir_builder *b, nir_alu_dest dest, 283 nir_ssa_def *def, unsigned write_mask) 284 { 285 if (!(dest.write_mask & write_mask)) 286 return; 287 288 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_fmov); 289 if (!mov) 290 return; 291 292 mov->dest = dest; 293 mov->dest.write_mask &= write_mask; 294 mov->src[0].src = nir_src_for_ssa(def); 295 for (unsigned i = def->num_components; i < 4; i++) 296 mov->src[0].swizzle[i] = def->num_components - 1; 297 nir_builder_instr_insert(b, &mov->instr); 298 } 299 300 static void 301 ptn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def) 302 { 303 ptn_move_dest_masked(b, dest, def, WRITEMASK_XYZW); 304 } 305 306 static void 307 ptn_arl(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 308 { 309 ptn_move_dest(b, dest, nir_f2i32(b, nir_ffloor(b, src[0]))); 310 } 311 312 /* EXP - Approximate Exponential Base 2 313 * dst.x = 2^{\lfloor src.x\rfloor} 314 * dst.y = src.x - \lfloor src.x\rfloor 315 * dst.z = 2^{src.x} 316 * dst.w = 1.0 317 */ 318 static void 319 ptn_exp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 320 { 321 nir_ssa_def *srcx = ptn_channel(b, src[0], X); 322 323 ptn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)), WRITEMASK_X); 324 ptn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)), WRITEMASK_Y); 325 ptn_move_dest_masked(b, dest, nir_fexp2(b, srcx), WRITEMASK_Z); 326 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W); 327 } 328 329 /* LOG - Approximate Logarithm Base 2 330 * dst.x = \lfloor\log_2{|src.x|}\rfloor 331 * dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}} 332 * dst.z = \log_2{|src.x|} 333 * dst.w = 1.0 334 */ 335 static void 336 ptn_log(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 337 { 338 nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X)); 339 nir_ssa_def *log2 = nir_flog2(b, abs_srcx); 340 nir_ssa_def *floor_log2 = nir_ffloor(b, log2); 341 342 ptn_move_dest_masked(b, dest, floor_log2, WRITEMASK_X); 343 ptn_move_dest_masked(b, dest, 344 nir_fmul(b, abs_srcx, 345 nir_fexp2(b, nir_fneg(b, floor_log2))), 346 WRITEMASK_Y); 347 ptn_move_dest_masked(b, dest, log2, WRITEMASK_Z); 348 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W); 349 } 350 351 /* DST - Distance Vector 352 * dst.x = 1.0 353 * dst.y = src0.y \times src1.y 354 * dst.z = src0.z 355 * dst.w = src1.w 356 */ 357 static void 358 ptn_dst(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 359 { 360 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_X); 361 ptn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y); 362 ptn_move_dest_masked(b, dest, nir_fmov(b, src[0]), WRITEMASK_Z); 363 ptn_move_dest_masked(b, dest, nir_fmov(b, src[1]), WRITEMASK_W); 364 } 365 366 /* LIT - Light Coefficients 367 * dst.x = 1.0 368 * dst.y = max(src.x, 0.0) 369 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0 370 * dst.w = 1.0 371 */ 372 static void 373 ptn_lit(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 374 { 375 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_XW); 376 377 ptn_move_dest_masked(b, dest, nir_fmax(b, ptn_channel(b, src[0], X), 378 nir_imm_float(b, 0.0)), WRITEMASK_Y); 379 380 if (dest.write_mask & WRITEMASK_Z) { 381 nir_ssa_def *src0_y = ptn_channel(b, src[0], Y); 382 nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W), 383 nir_imm_float(b, 128.0)), 384 nir_imm_float(b, -128.0)); 385 nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)), 386 wclamp); 387 388 nir_ssa_def *z; 389 if (b->shader->options->native_integers) { 390 z = nir_bcsel(b, 391 nir_fge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)), 392 nir_imm_float(b, 0.0), 393 pow); 394 } else { 395 z = nir_fcsel(b, 396 nir_sge(b, nir_imm_float(b, 0.0), ptn_channel(b, src[0], X)), 397 nir_imm_float(b, 0.0), 398 pow); 399 } 400 401 ptn_move_dest_masked(b, dest, z, WRITEMASK_Z); 402 } 403 } 404 405 /* SCS - Sine Cosine 406 * dst.x = \cos{src.x} 407 * dst.y = \sin{src.x} 408 * dst.z = 0.0 409 * dst.w = 1.0 410 */ 411 static void 412 ptn_scs(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 413 { 414 ptn_move_dest_masked(b, dest, nir_fcos(b, ptn_channel(b, src[0], X)), 415 WRITEMASK_X); 416 ptn_move_dest_masked(b, dest, nir_fsin(b, ptn_channel(b, src[0], X)), 417 WRITEMASK_Y); 418 ptn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), WRITEMASK_Z); 419 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W); 420 } 421 422 /** 423 * Emit SLT. For platforms with integers, prefer b2f(flt(...)). 424 */ 425 static void 426 ptn_slt(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 427 { 428 if (b->shader->options->native_integers) { 429 ptn_move_dest(b, dest, nir_b2f(b, nir_flt(b, src[0], src[1]))); 430 } else { 431 ptn_move_dest(b, dest, nir_slt(b, src[0], src[1])); 432 } 433 } 434 435 /** 436 * Emit SGE. For platforms with integers, prefer b2f(fge(...)). 437 */ 438 static void 439 ptn_sge(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 440 { 441 if (b->shader->options->native_integers) { 442 ptn_move_dest(b, dest, nir_b2f(b, nir_fge(b, src[0], src[1]))); 443 } else { 444 ptn_move_dest(b, dest, nir_sge(b, src[0], src[1])); 445 } 446 } 447 448 static void 449 ptn_xpd(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 450 { 451 ptn_move_dest_masked(b, dest, 452 nir_fsub(b, 453 nir_fmul(b, 454 nir_swizzle(b, src[0], SWIZ(Y, Z, X, W), 3, true), 455 nir_swizzle(b, src[1], SWIZ(Z, X, Y, W), 3, true)), 456 nir_fmul(b, 457 nir_swizzle(b, src[1], SWIZ(Y, Z, X, W), 3, true), 458 nir_swizzle(b, src[0], SWIZ(Z, X, Y, W), 3, true))), 459 WRITEMASK_XYZ); 460 ptn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), WRITEMASK_W); 461 } 462 463 static void 464 ptn_dp2(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 465 { 466 ptn_move_dest(b, dest, nir_fdot2(b, src[0], src[1])); 467 } 468 469 static void 470 ptn_dp3(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 471 { 472 ptn_move_dest(b, dest, nir_fdot3(b, src[0], src[1])); 473 } 474 475 static void 476 ptn_dp4(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 477 { 478 ptn_move_dest(b, dest, nir_fdot4(b, src[0], src[1])); 479 } 480 481 static void 482 ptn_dph(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 483 { 484 ptn_move_dest(b, dest, nir_fdph(b, src[0], src[1])); 485 } 486 487 static void 488 ptn_cmp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 489 { 490 if (b->shader->options->native_integers) { 491 ptn_move_dest(b, dest, nir_bcsel(b, 492 nir_flt(b, src[0], nir_imm_float(b, 0.0)), 493 src[1], src[2])); 494 } else { 495 ptn_move_dest(b, dest, nir_fcsel(b, 496 nir_slt(b, src[0], nir_imm_float(b, 0.0)), 497 src[1], src[2])); 498 } 499 } 500 501 static void 502 ptn_lrp(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src) 503 { 504 ptn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0])); 505 } 506 507 static void 508 ptn_kil(nir_builder *b, nir_ssa_def **src) 509 { 510 nir_ssa_def *cmp = b->shader->options->native_integers ? 511 nir_bany_inequal4(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)), nir_imm_int(b, 0)) : 512 nir_fany_nequal4(b, nir_slt(b, src[0], nir_imm_float(b, 0.0)), nir_imm_float(b, 0.0)); 513 514 nir_intrinsic_instr *discard = 515 nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if); 516 discard->src[0] = nir_src_for_ssa(cmp); 517 nir_builder_instr_insert(b, &discard->instr); 518 } 519 520 static void 521 ptn_tex(nir_builder *b, nir_alu_dest dest, nir_ssa_def **src, 522 struct prog_instruction *prog_inst) 523 { 524 nir_tex_instr *instr; 525 nir_texop op; 526 unsigned num_srcs; 527 528 switch (prog_inst->Opcode) { 529 case OPCODE_TEX: 530 op = nir_texop_tex; 531 num_srcs = 1; 532 break; 533 case OPCODE_TXB: 534 op = nir_texop_txb; 535 num_srcs = 2; 536 break; 537 case OPCODE_TXD: 538 op = nir_texop_txd; 539 num_srcs = 3; 540 break; 541 case OPCODE_TXL: 542 op = nir_texop_txl; 543 num_srcs = 2; 544 break; 545 case OPCODE_TXP: 546 op = nir_texop_tex; 547 num_srcs = 2; 548 break; 549 default: 550 fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode); 551 abort(); 552 } 553 554 if (prog_inst->TexShadow) 555 num_srcs++; 556 557 instr = nir_tex_instr_create(b->shader, num_srcs); 558 instr->op = op; 559 instr->dest_type = nir_type_float; 560 instr->is_shadow = prog_inst->TexShadow; 561 instr->texture_index = prog_inst->TexSrcUnit; 562 instr->sampler_index = prog_inst->TexSrcUnit; 563 564 switch (prog_inst->TexSrcTarget) { 565 case TEXTURE_1D_INDEX: 566 instr->sampler_dim = GLSL_SAMPLER_DIM_1D; 567 break; 568 case TEXTURE_2D_INDEX: 569 instr->sampler_dim = GLSL_SAMPLER_DIM_2D; 570 break; 571 case TEXTURE_3D_INDEX: 572 instr->sampler_dim = GLSL_SAMPLER_DIM_3D; 573 break; 574 case TEXTURE_CUBE_INDEX: 575 instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE; 576 break; 577 case TEXTURE_RECT_INDEX: 578 instr->sampler_dim = GLSL_SAMPLER_DIM_RECT; 579 break; 580 default: 581 fprintf(stderr, "Unknown texture target %d\n", prog_inst->TexSrcTarget); 582 abort(); 583 } 584 585 switch (instr->sampler_dim) { 586 case GLSL_SAMPLER_DIM_1D: 587 case GLSL_SAMPLER_DIM_BUF: 588 instr->coord_components = 1; 589 break; 590 case GLSL_SAMPLER_DIM_2D: 591 case GLSL_SAMPLER_DIM_RECT: 592 case GLSL_SAMPLER_DIM_EXTERNAL: 593 case GLSL_SAMPLER_DIM_MS: 594 instr->coord_components = 2; 595 break; 596 case GLSL_SAMPLER_DIM_3D: 597 case GLSL_SAMPLER_DIM_CUBE: 598 instr->coord_components = 3; 599 break; 600 case GLSL_SAMPLER_DIM_SUBPASS: 601 case GLSL_SAMPLER_DIM_SUBPASS_MS: 602 unreachable("can't reach"); 603 } 604 605 unsigned src_number = 0; 606 607 instr->src[src_number].src = 608 nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W), 609 instr->coord_components, true)); 610 instr->src[src_number].src_type = nir_tex_src_coord; 611 src_number++; 612 613 if (prog_inst->Opcode == OPCODE_TXP) { 614 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W)); 615 instr->src[src_number].src_type = nir_tex_src_projector; 616 src_number++; 617 } 618 619 if (prog_inst->Opcode == OPCODE_TXB) { 620 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W)); 621 instr->src[src_number].src_type = nir_tex_src_bias; 622 src_number++; 623 } 624 625 if (prog_inst->Opcode == OPCODE_TXL) { 626 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W)); 627 instr->src[src_number].src_type = nir_tex_src_lod; 628 src_number++; 629 } 630 631 if (instr->is_shadow) { 632 if (instr->coord_components < 3) 633 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z)); 634 else 635 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W)); 636 637 instr->src[src_number].src_type = nir_tex_src_comparator; 638 src_number++; 639 } 640 641 assert(src_number == num_srcs); 642 643 nir_ssa_dest_init(&instr->instr, &instr->dest, 4, 32, NULL); 644 nir_builder_instr_insert(b, &instr->instr); 645 646 /* Resolve the writemask on the texture op. */ 647 ptn_move_dest(b, dest, &instr->dest.ssa); 648 } 649 650 static const nir_op op_trans[MAX_OPCODE] = { 651 [OPCODE_NOP] = 0, 652 [OPCODE_ABS] = nir_op_fabs, 653 [OPCODE_ADD] = nir_op_fadd, 654 [OPCODE_ARL] = 0, 655 [OPCODE_CMP] = 0, 656 [OPCODE_COS] = 0, 657 [OPCODE_DDX] = nir_op_fddx, 658 [OPCODE_DDY] = nir_op_fddy, 659 [OPCODE_DP2] = 0, 660 [OPCODE_DP3] = 0, 661 [OPCODE_DP4] = 0, 662 [OPCODE_DPH] = 0, 663 [OPCODE_DST] = 0, 664 [OPCODE_END] = 0, 665 [OPCODE_EX2] = 0, 666 [OPCODE_EXP] = 0, 667 [OPCODE_FLR] = nir_op_ffloor, 668 [OPCODE_FRC] = nir_op_ffract, 669 [OPCODE_LG2] = 0, 670 [OPCODE_LIT] = 0, 671 [OPCODE_LOG] = 0, 672 [OPCODE_LRP] = 0, 673 [OPCODE_MAD] = 0, 674 [OPCODE_MAX] = nir_op_fmax, 675 [OPCODE_MIN] = nir_op_fmin, 676 [OPCODE_MOV] = nir_op_fmov, 677 [OPCODE_MUL] = nir_op_fmul, 678 [OPCODE_POW] = 0, 679 [OPCODE_RCP] = 0, 680 681 [OPCODE_RSQ] = 0, 682 [OPCODE_SCS] = 0, 683 [OPCODE_SGE] = 0, 684 [OPCODE_SIN] = 0, 685 [OPCODE_SLT] = 0, 686 [OPCODE_SSG] = nir_op_fsign, 687 [OPCODE_SUB] = nir_op_fsub, 688 [OPCODE_SWZ] = 0, 689 [OPCODE_TEX] = 0, 690 [OPCODE_TRUNC] = nir_op_ftrunc, 691 [OPCODE_TXB] = 0, 692 [OPCODE_TXD] = 0, 693 [OPCODE_TXL] = 0, 694 [OPCODE_TXP] = 0, 695 [OPCODE_XPD] = 0, 696 }; 697 698 static void 699 ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst) 700 { 701 nir_builder *b = &c->build; 702 unsigned i; 703 const unsigned op = prog_inst->Opcode; 704 705 if (op == OPCODE_END) 706 return; 707 708 nir_ssa_def *src[3]; 709 for (i = 0; i < 3; i++) { 710 src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]); 711 } 712 nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg); 713 if (c->error) 714 return; 715 716 switch (op) { 717 case OPCODE_RSQ: 718 ptn_move_dest(b, dest, 719 nir_frsq(b, nir_fabs(b, ptn_channel(b, src[0], X)))); 720 break; 721 722 case OPCODE_RCP: 723 ptn_move_dest(b, dest, nir_frcp(b, ptn_channel(b, src[0], X))); 724 break; 725 726 case OPCODE_EX2: 727 ptn_move_dest(b, dest, nir_fexp2(b, ptn_channel(b, src[0], X))); 728 break; 729 730 case OPCODE_LG2: 731 ptn_move_dest(b, dest, nir_flog2(b, ptn_channel(b, src[0], X))); 732 break; 733 734 case OPCODE_POW: 735 ptn_move_dest(b, dest, nir_fpow(b, 736 ptn_channel(b, src[0], X), 737 ptn_channel(b, src[1], X))); 738 break; 739 740 case OPCODE_COS: 741 ptn_move_dest(b, dest, nir_fcos(b, ptn_channel(b, src[0], X))); 742 break; 743 744 case OPCODE_SIN: 745 ptn_move_dest(b, dest, nir_fsin(b, ptn_channel(b, src[0], X))); 746 break; 747 748 case OPCODE_ARL: 749 ptn_arl(b, dest, src); 750 break; 751 752 case OPCODE_EXP: 753 ptn_exp(b, dest, src); 754 break; 755 756 case OPCODE_LOG: 757 ptn_log(b, dest, src); 758 break; 759 760 case OPCODE_LRP: 761 ptn_lrp(b, dest, src); 762 break; 763 764 case OPCODE_MAD: 765 ptn_move_dest(b, dest, nir_fadd(b, nir_fmul(b, src[0], src[1]), src[2])); 766 break; 767 768 case OPCODE_DST: 769 ptn_dst(b, dest, src); 770 break; 771 772 case OPCODE_LIT: 773 ptn_lit(b, dest, src); 774 break; 775 776 case OPCODE_XPD: 777 ptn_xpd(b, dest, src); 778 break; 779 780 case OPCODE_DP2: 781 ptn_dp2(b, dest, src); 782 break; 783 784 case OPCODE_DP3: 785 ptn_dp3(b, dest, src); 786 break; 787 788 case OPCODE_DP4: 789 ptn_dp4(b, dest, src); 790 break; 791 792 case OPCODE_DPH: 793 ptn_dph(b, dest, src); 794 break; 795 796 case OPCODE_KIL: 797 ptn_kil(b, src); 798 break; 799 800 case OPCODE_CMP: 801 ptn_cmp(b, dest, src); 802 break; 803 804 case OPCODE_SCS: 805 ptn_scs(b, dest, src); 806 break; 807 808 case OPCODE_SLT: 809 ptn_slt(b, dest, src); 810 break; 811 812 case OPCODE_SGE: 813 ptn_sge(b, dest, src); 814 break; 815 816 case OPCODE_TEX: 817 case OPCODE_TXB: 818 case OPCODE_TXD: 819 case OPCODE_TXL: 820 case OPCODE_TXP: 821 ptn_tex(b, dest, src, prog_inst); 822 break; 823 824 case OPCODE_SWZ: 825 /* Extended swizzles were already handled in ptn_get_src(). */ 826 ptn_alu(b, nir_op_fmov, dest, src); 827 break; 828 829 case OPCODE_NOP: 830 break; 831 832 default: 833 if (op_trans[op] != 0) { 834 ptn_alu(b, op_trans[op], dest, src); 835 } else { 836 fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op)); 837 abort(); 838 } 839 break; 840 } 841 842 if (prog_inst->Saturate) { 843 assert(prog_inst->Saturate); 844 assert(!dest.dest.is_ssa); 845 ptn_move_dest(b, dest, nir_fsat(b, ptn_src_for_dest(c, &dest))); 846 } 847 } 848 849 /** 850 * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output 851 * variables at the end of the shader. 852 * 853 * We don't generate these incrementally as the PROGRAM_OUTPUT values are 854 * written, because there's no output load intrinsic, which means we couldn't 855 * handle writemasks. 856 */ 857 static void 858 ptn_add_output_stores(struct ptn_compile *c) 859 { 860 nir_builder *b = &c->build; 861 862 nir_foreach_variable(var, &b->shader->outputs) { 863 nir_intrinsic_instr *store = 864 nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); 865 store->num_components = glsl_get_vector_elements(var->type); 866 nir_intrinsic_set_write_mask(store, (1 << store->num_components) - 1); 867 store->variables[0] = 868 nir_deref_var_create(store, c->output_vars[var->data.location]); 869 870 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && 871 var->data.location == FRAG_RESULT_DEPTH) { 872 /* result.depth has this strange convention of being the .z component of 873 * a vec4 with undefined .xyw components. We resolve it to a scalar, to 874 * match GLSL's gl_FragDepth and the expectations of most backends. 875 */ 876 nir_alu_src alu_src = { NIR_SRC_INIT }; 877 alu_src.src = nir_src_for_reg(c->output_regs[FRAG_RESULT_DEPTH]); 878 alu_src.swizzle[0] = SWIZZLE_Z; 879 store->src[0] = nir_src_for_ssa(nir_fmov_alu(b, alu_src, 1)); 880 } else { 881 store->src[0].reg.reg = c->output_regs[var->data.location]; 882 } 883 nir_builder_instr_insert(b, &store->instr); 884 } 885 } 886 887 static void 888 setup_registers_and_variables(struct ptn_compile *c) 889 { 890 nir_builder *b = &c->build; 891 struct nir_shader *shader = b->shader; 892 893 /* Create input variables. */ 894 const int num_inputs = util_last_bit64(c->prog->info.inputs_read); 895 for (int i = 0; i < num_inputs; i++) { 896 if (!(c->prog->info.inputs_read & BITFIELD64_BIT(i))) 897 continue; 898 899 nir_variable *var = 900 nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(), 901 ralloc_asprintf(shader, "in_%d", i)); 902 var->data.location = i; 903 var->data.index = 0; 904 905 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 906 if (i == VARYING_SLOT_POS) { 907 var->data.origin_upper_left = c->prog->OriginUpperLeft; 908 var->data.pixel_center_integer = c->prog->PixelCenterInteger; 909 } else if (i == VARYING_SLOT_FOGC) { 910 /* fogcoord is defined as <f, 0.0, 0.0, 1.0>. Make the actual 911 * input variable a float, and create a local containing the 912 * full vec4 value. 913 */ 914 var->type = glsl_float_type(); 915 916 nir_intrinsic_instr *load_x = 917 nir_intrinsic_instr_create(shader, nir_intrinsic_load_var); 918 load_x->num_components = 1; 919 load_x->variables[0] = nir_deref_var_create(load_x, var); 920 nir_ssa_dest_init(&load_x->instr, &load_x->dest, 1, 32, NULL); 921 nir_builder_instr_insert(b, &load_x->instr); 922 923 nir_ssa_def *f001 = nir_vec4(b, &load_x->dest.ssa, nir_imm_float(b, 0.0), 924 nir_imm_float(b, 0.0), nir_imm_float(b, 1.0)); 925 926 nir_variable *fullvar = 927 nir_local_variable_create(b->impl, glsl_vec4_type(), 928 "fogcoord_tmp"); 929 nir_intrinsic_instr *store = 930 nir_intrinsic_instr_create(shader, nir_intrinsic_store_var); 931 store->num_components = 4; 932 nir_intrinsic_set_write_mask(store, WRITEMASK_XYZW); 933 store->variables[0] = nir_deref_var_create(store, fullvar); 934 store->src[0] = nir_src_for_ssa(f001); 935 nir_builder_instr_insert(b, &store->instr); 936 937 /* We inserted the real input into the list so the driver has real 938 * inputs, but we set c->input_vars[i] to the temporary so we use 939 * the splatted value. 940 */ 941 c->input_vars[i] = fullvar; 942 continue; 943 } 944 } 945 946 c->input_vars[i] = var; 947 } 948 949 /* Create output registers and variables. */ 950 int max_outputs = util_last_bit(c->prog->info.outputs_written); 951 c->output_regs = rzalloc_array(c, nir_register *, max_outputs); 952 953 for (int i = 0; i < max_outputs; i++) { 954 if (!(c->prog->info.outputs_written & BITFIELD64_BIT(i))) 955 continue; 956 957 /* Since we can't load from outputs in the IR, we make temporaries 958 * for the outputs and emit stores to the real outputs at the end of 959 * the shader. 960 */ 961 nir_register *reg = nir_local_reg_create(b->impl); 962 reg->num_components = 4; 963 964 nir_variable *var = rzalloc(shader, nir_variable); 965 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && i == FRAG_RESULT_DEPTH) 966 var->type = glsl_float_type(); 967 else 968 var->type = glsl_vec4_type(); 969 var->data.mode = nir_var_shader_out; 970 var->name = ralloc_asprintf(var, "out_%d", i); 971 972 var->data.location = i; 973 var->data.index = 0; 974 975 c->output_regs[i] = reg; 976 977 exec_list_push_tail(&shader->outputs, &var->node); 978 c->output_vars[i] = var; 979 } 980 981 /* Create temporary registers. */ 982 c->temp_regs = rzalloc_array(c, nir_register *, 983 c->prog->arb.NumTemporaries); 984 985 nir_register *reg; 986 for (unsigned i = 0; i < c->prog->arb.NumTemporaries; i++) { 987 reg = nir_local_reg_create(b->impl); 988 if (!reg) { 989 c->error = true; 990 return; 991 } 992 reg->num_components = 4; 993 c->temp_regs[i] = reg; 994 } 995 996 /* Create the address register (for ARB_vertex_program). */ 997 reg = nir_local_reg_create(b->impl); 998 if (!reg) { 999 c->error = true; 1000 return; 1001 } 1002 reg->num_components = 1; 1003 c->addr_reg = reg; 1004 } 1005 1006 struct nir_shader * 1007 prog_to_nir(const struct gl_program *prog, 1008 const nir_shader_compiler_options *options) 1009 { 1010 struct ptn_compile *c; 1011 struct nir_shader *s; 1012 gl_shader_stage stage = _mesa_program_enum_to_shader_stage(prog->Target); 1013 1014 c = rzalloc(NULL, struct ptn_compile); 1015 if (!c) 1016 return NULL; 1017 c->prog = prog; 1018 1019 nir_builder_init_simple_shader(&c->build, NULL, stage, options); 1020 1021 /* Copy the shader_info from the gl_program */ 1022 c->build.shader->info = prog->info; 1023 1024 s = c->build.shader; 1025 1026 if (prog->Parameters->NumParameters > 0) { 1027 c->parameters = rzalloc(s, nir_variable); 1028 c->parameters->type = 1029 glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters); 1030 c->parameters->name = "parameters"; 1031 c->parameters->data.read_only = true; 1032 c->parameters->data.mode = nir_var_uniform; 1033 exec_list_push_tail(&s->uniforms, &c->parameters->node); 1034 } 1035 1036 setup_registers_and_variables(c); 1037 if (unlikely(c->error)) 1038 goto fail; 1039 1040 for (unsigned int i = 0; i < prog->arb.NumInstructions; i++) { 1041 ptn_emit_instruction(c, &prog->arb.Instructions[i]); 1042 1043 if (unlikely(c->error)) 1044 break; 1045 } 1046 1047 ptn_add_output_stores(c); 1048 1049 s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id); 1050 s->info.num_textures = util_last_bit(prog->SamplersUsed); 1051 s->info.num_ubos = 0; 1052 s->info.num_abos = 0; 1053 s->info.num_ssbos = 0; 1054 s->info.num_images = 0; 1055 s->info.uses_texture_gather = false; 1056 s->info.clip_distance_array_size = 0; 1057 s->info.cull_distance_array_size = 0; 1058 s->info.separate_shader = false; 1059 1060 fail: 1061 if (c->error) { 1062 ralloc_free(s); 1063 s = NULL; 1064 } 1065 ralloc_free(c); 1066 return s; 1067 } 1068