1 /* 2 * Copyright 2015 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include "vc4_qir.h" 25 #include "compiler/nir/nir_builder.h" 26 #include "util/u_format.h" 27 28 /** 29 * Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io 30 * intrinsics into something amenable to the VC4 architecture. 31 * 32 * Currently, it splits VS inputs and uniforms into scalars, drops any 33 * non-position outputs in coordinate shaders, and fixes up the addressing on 34 * indirect uniform loads. FS input and VS output scalarization is handled by 35 * nir_lower_io_to_scalar(). 36 */ 37 38 static void 39 replace_intrinsic_with_vec(nir_builder *b, nir_intrinsic_instr *intr, 40 nir_ssa_def **comps) 41 { 42 43 /* Batch things back together into a vector. This will get split by 44 * the later ALU scalarization pass. 45 */ 46 nir_ssa_def *vec = nir_vec(b, comps, intr->num_components); 47 48 /* Replace the old intrinsic with a reference to our reconstructed 49 * vector. 50 */ 51 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(vec)); 52 nir_instr_remove(&intr->instr); 53 } 54 55 static nir_ssa_def * 56 vc4_nir_unpack_8i(nir_builder *b, nir_ssa_def *src, unsigned chan) 57 { 58 return nir_ubitfield_extract(b, 59 src, 60 nir_imm_int(b, 8 * chan), 61 nir_imm_int(b, 8)); 62 } 63 64 /** Returns the 16 bit field as a sign-extended 32-bit value. */ 65 static nir_ssa_def * 66 vc4_nir_unpack_16i(nir_builder *b, nir_ssa_def *src, unsigned chan) 67 { 68 return nir_ibitfield_extract(b, 69 src, 70 nir_imm_int(b, 16 * chan), 71 nir_imm_int(b, 16)); 72 } 73 74 /** Returns the 16 bit field as an unsigned 32 bit value. */ 75 static nir_ssa_def * 76 vc4_nir_unpack_16u(nir_builder *b, nir_ssa_def *src, unsigned chan) 77 { 78 if (chan == 0) { 79 return nir_iand(b, src, nir_imm_int(b, 0xffff)); 80 } else { 81 return nir_ushr(b, src, nir_imm_int(b, 16)); 82 } 83 } 84 85 static nir_ssa_def * 86 vc4_nir_unpack_8f(nir_builder *b, nir_ssa_def *src, unsigned chan) 87 { 88 return nir_channel(b, nir_unpack_unorm_4x8(b, src), chan); 89 } 90 91 static nir_ssa_def * 92 vc4_nir_get_vattr_channel_vpm(struct vc4_compile *c, 93 nir_builder *b, 94 nir_ssa_def **vpm_reads, 95 uint8_t swiz, 96 const struct util_format_description *desc) 97 { 98 const struct util_format_channel_description *chan = 99 &desc->channel[swiz]; 100 nir_ssa_def *temp; 101 102 if (swiz > PIPE_SWIZZLE_W) { 103 return vc4_nir_get_swizzled_channel(b, vpm_reads, swiz); 104 } else if (chan->size == 32 && chan->type == UTIL_FORMAT_TYPE_FLOAT) { 105 return vc4_nir_get_swizzled_channel(b, vpm_reads, swiz); 106 } else if (chan->size == 32 && chan->type == UTIL_FORMAT_TYPE_SIGNED) { 107 if (chan->normalized) { 108 return nir_fmul(b, 109 nir_i2f(b, vpm_reads[swiz]), 110 nir_imm_float(b, 111 1.0 / 0x7fffffff)); 112 } else { 113 return nir_i2f(b, vpm_reads[swiz]); 114 } 115 } else if (chan->size == 8 && 116 (chan->type == UTIL_FORMAT_TYPE_UNSIGNED || 117 chan->type == UTIL_FORMAT_TYPE_SIGNED)) { 118 nir_ssa_def *vpm = vpm_reads[0]; 119 if (chan->type == UTIL_FORMAT_TYPE_SIGNED) { 120 temp = nir_ixor(b, vpm, nir_imm_int(b, 0x80808080)); 121 if (chan->normalized) { 122 return nir_fsub(b, nir_fmul(b, 123 vc4_nir_unpack_8f(b, temp, swiz), 124 nir_imm_float(b, 2.0)), 125 nir_imm_float(b, 1.0)); 126 } else { 127 return nir_fadd(b, 128 nir_i2f(b, 129 vc4_nir_unpack_8i(b, temp, 130 swiz)), 131 nir_imm_float(b, -128.0)); 132 } 133 } else { 134 if (chan->normalized) { 135 return vc4_nir_unpack_8f(b, vpm, swiz); 136 } else { 137 return nir_i2f(b, vc4_nir_unpack_8i(b, vpm, swiz)); 138 } 139 } 140 } else if (chan->size == 16 && 141 (chan->type == UTIL_FORMAT_TYPE_UNSIGNED || 142 chan->type == UTIL_FORMAT_TYPE_SIGNED)) { 143 nir_ssa_def *vpm = vpm_reads[swiz / 2]; 144 145 /* Note that UNPACK_16F eats a half float, not ints, so we use 146 * UNPACK_16_I for all of these. 147 */ 148 if (chan->type == UTIL_FORMAT_TYPE_SIGNED) { 149 temp = nir_i2f(b, vc4_nir_unpack_16i(b, vpm, swiz & 1)); 150 if (chan->normalized) { 151 return nir_fmul(b, temp, 152 nir_imm_float(b, 1/32768.0f)); 153 } else { 154 return temp; 155 } 156 } else { 157 temp = nir_i2f(b, vc4_nir_unpack_16u(b, vpm, swiz & 1)); 158 if (chan->normalized) { 159 return nir_fmul(b, temp, 160 nir_imm_float(b, 1 / 65535.0)); 161 } else { 162 return temp; 163 } 164 } 165 } else { 166 return NULL; 167 } 168 } 169 170 static void 171 vc4_nir_lower_vertex_attr(struct vc4_compile *c, nir_builder *b, 172 nir_intrinsic_instr *intr) 173 { 174 b->cursor = nir_before_instr(&intr->instr); 175 176 int attr = nir_intrinsic_base(intr); 177 enum pipe_format format = c->vs_key->attr_formats[attr]; 178 uint32_t attr_size = util_format_get_blocksize(format); 179 180 /* We only accept direct outputs and TGSI only ever gives them to us 181 * with an offset value of 0. 182 */ 183 assert(nir_src_as_const_value(intr->src[0]) && 184 nir_src_as_const_value(intr->src[0])->u32[0] == 0); 185 186 /* Generate dword loads for the VPM values (Since these intrinsics may 187 * be reordered, the actual reads will be generated at the top of the 188 * shader by ntq_setup_inputs(). 189 */ 190 nir_ssa_def *vpm_reads[4]; 191 for (int i = 0; i < align(attr_size, 4) / 4; i++) { 192 nir_intrinsic_instr *intr_comp = 193 nir_intrinsic_instr_create(c->s, 194 nir_intrinsic_load_input); 195 intr_comp->num_components = 1; 196 nir_intrinsic_set_base(intr_comp, nir_intrinsic_base(intr)); 197 nir_intrinsic_set_component(intr_comp, i); 198 intr_comp->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); 199 nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, 32, NULL); 200 nir_builder_instr_insert(b, &intr_comp->instr); 201 202 vpm_reads[i] = &intr_comp->dest.ssa; 203 } 204 205 bool format_warned = false; 206 const struct util_format_description *desc = 207 util_format_description(format); 208 209 nir_ssa_def *dests[4]; 210 for (int i = 0; i < intr->num_components; i++) { 211 uint8_t swiz = desc->swizzle[i]; 212 dests[i] = vc4_nir_get_vattr_channel_vpm(c, b, vpm_reads, swiz, 213 desc); 214 215 if (!dests[i]) { 216 if (!format_warned) { 217 fprintf(stderr, 218 "vtx element %d unsupported type: %s\n", 219 attr, util_format_name(format)); 220 format_warned = true; 221 } 222 dests[i] = nir_imm_float(b, 0.0); 223 } 224 } 225 226 replace_intrinsic_with_vec(b, intr, dests); 227 } 228 229 static bool 230 is_point_sprite(struct vc4_compile *c, nir_variable *var) 231 { 232 if (var->data.location < VARYING_SLOT_VAR0 || 233 var->data.location > VARYING_SLOT_VAR31) 234 return false; 235 236 return (c->fs_key->point_sprite_mask & 237 (1 << (var->data.location - VARYING_SLOT_VAR0))); 238 } 239 240 static void 241 vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b, 242 nir_intrinsic_instr *intr) 243 { 244 b->cursor = nir_after_instr(&intr->instr); 245 246 if (nir_intrinsic_base(intr) >= VC4_NIR_TLB_COLOR_READ_INPUT && 247 nir_intrinsic_base(intr) < (VC4_NIR_TLB_COLOR_READ_INPUT + 248 VC4_MAX_SAMPLES)) { 249 /* This doesn't need any lowering. */ 250 return; 251 } 252 253 nir_variable *input_var = NULL; 254 nir_foreach_variable(var, &c->s->inputs) { 255 if (var->data.driver_location == nir_intrinsic_base(intr)) { 256 input_var = var; 257 break; 258 } 259 } 260 assert(input_var); 261 262 int comp = nir_intrinsic_component(intr); 263 264 /* Lower away point coordinates, and fix up PNTC. */ 265 if (is_point_sprite(c, input_var) || 266 input_var->data.location == VARYING_SLOT_PNTC) { 267 assert(intr->num_components == 1); 268 269 nir_ssa_def *result = &intr->dest.ssa; 270 271 switch (comp) { 272 case 0: 273 case 1: 274 /* If we're not rendering points, we need to set a 275 * defined value for the input that would come from 276 * PNTC. 277 */ 278 if (!c->fs_key->is_points) 279 result = nir_imm_float(b, 0.0); 280 break; 281 case 2: 282 result = nir_imm_float(b, 0.0); 283 break; 284 case 3: 285 result = nir_imm_float(b, 1.0); 286 break; 287 } 288 289 if (c->fs_key->point_coord_upper_left && comp == 1) 290 result = nir_fsub(b, nir_imm_float(b, 1.0), result); 291 292 if (result != &intr->dest.ssa) { 293 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, 294 nir_src_for_ssa(result), 295 result->parent_instr); 296 } 297 } 298 } 299 300 static void 301 vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b, 302 nir_intrinsic_instr *intr) 303 { 304 nir_variable *output_var = NULL; 305 nir_foreach_variable(var, &c->s->outputs) { 306 if (var->data.driver_location == nir_intrinsic_base(intr)) { 307 output_var = var; 308 break; 309 } 310 } 311 assert(output_var); 312 313 if (c->stage == QSTAGE_COORD && 314 output_var->data.location != VARYING_SLOT_POS && 315 output_var->data.location != VARYING_SLOT_PSIZ) { 316 nir_instr_remove(&intr->instr); 317 return; 318 } 319 } 320 321 static void 322 vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b, 323 nir_intrinsic_instr *intr) 324 { 325 b->cursor = nir_before_instr(&intr->instr); 326 327 /* Generate scalar loads equivalent to the original vector. */ 328 nir_ssa_def *dests[4]; 329 for (unsigned i = 0; i < intr->num_components; i++) { 330 nir_intrinsic_instr *intr_comp = 331 nir_intrinsic_instr_create(c->s, intr->intrinsic); 332 intr_comp->num_components = 1; 333 nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1, 32, NULL); 334 335 /* Convert the uniform offset to bytes. If it happens 336 * to be a constant, constant-folding will clean up 337 * the shift for us. 338 */ 339 nir_intrinsic_set_base(intr_comp, 340 nir_intrinsic_base(intr) * 16 + 341 i * 4); 342 343 intr_comp->src[0] = 344 nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa, 345 nir_imm_int(b, 4))); 346 347 dests[i] = &intr_comp->dest.ssa; 348 349 nir_builder_instr_insert(b, &intr_comp->instr); 350 } 351 352 replace_intrinsic_with_vec(b, intr, dests); 353 } 354 355 static void 356 vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b, 357 struct nir_instr *instr) 358 { 359 if (instr->type != nir_instr_type_intrinsic) 360 return; 361 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 362 363 switch (intr->intrinsic) { 364 case nir_intrinsic_load_input: 365 if (c->stage == QSTAGE_FRAG) 366 vc4_nir_lower_fs_input(c, b, intr); 367 else 368 vc4_nir_lower_vertex_attr(c, b, intr); 369 break; 370 371 case nir_intrinsic_store_output: 372 vc4_nir_lower_output(c, b, intr); 373 break; 374 375 case nir_intrinsic_load_uniform: 376 vc4_nir_lower_uniform(c, b, intr); 377 break; 378 379 case nir_intrinsic_load_user_clip_plane: 380 default: 381 break; 382 } 383 } 384 385 static bool 386 vc4_nir_lower_io_impl(struct vc4_compile *c, nir_function_impl *impl) 387 { 388 nir_builder b; 389 nir_builder_init(&b, impl); 390 391 nir_foreach_block(block, impl) { 392 nir_foreach_instr_safe(instr, block) 393 vc4_nir_lower_io_instr(c, &b, instr); 394 } 395 396 nir_metadata_preserve(impl, nir_metadata_block_index | 397 nir_metadata_dominance); 398 399 return true; 400 } 401 402 void 403 vc4_nir_lower_io(nir_shader *s, struct vc4_compile *c) 404 { 405 nir_foreach_function(function, s) { 406 if (function->impl) 407 vc4_nir_lower_io_impl(c, function->impl); 408 } 409 } 410