1 /* 2 * Copyright 2015 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 /* 25 * This lowering pass supports (as configured via nir_lower_tex_options) 26 * various texture related conversions: 27 * + texture projector lowering: converts the coordinate division for 28 * texture projection to be done in ALU instructions instead of 29 * asking the texture operation to do so. 30 * + lowering RECT: converts the un-normalized RECT texture coordinates 31 * to normalized coordinates with txs plus ALU instructions 32 * + saturate s/t/r coords: to emulate certain texture clamp/wrap modes, 33 * inserts instructions to clamp specified coordinates to [0.0, 1.0]. 34 * Note that this automatically triggers texture projector lowering if 35 * needed, since clamping must happen after projector lowering. 36 */ 37 38 #include "nir.h" 39 #include "nir_builder.h" 40 41 static void 42 project_src(nir_builder *b, nir_tex_instr *tex) 43 { 44 /* Find the projector in the srcs list, if present. */ 45 int proj_index = nir_tex_instr_src_index(tex, nir_tex_src_projector); 46 if (proj_index < 0) 47 return; 48 49 b->cursor = nir_before_instr(&tex->instr); 50 51 nir_ssa_def *inv_proj = 52 nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1)); 53 54 /* Walk through the sources projecting the arguments. */ 55 for (unsigned i = 0; i < tex->num_srcs; i++) { 56 switch (tex->src[i].src_type) { 57 case nir_tex_src_coord: 58 case nir_tex_src_comparator: 59 break; 60 default: 61 continue; 62 } 63 nir_ssa_def *unprojected = 64 nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i)); 65 nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj); 66 67 /* Array indices don't get projected, so make an new vector with the 68 * coordinate's array index untouched. 69 */ 70 if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) { 71 switch (tex->coord_components) { 72 case 4: 73 projected = nir_vec4(b, 74 nir_channel(b, projected, 0), 75 nir_channel(b, projected, 1), 76 nir_channel(b, projected, 2), 77 nir_channel(b, unprojected, 3)); 78 break; 79 case 3: 80 projected = nir_vec3(b, 81 nir_channel(b, projected, 0), 82 nir_channel(b, projected, 1), 83 nir_channel(b, unprojected, 2)); 84 break; 85 case 2: 86 projected = nir_vec2(b, 87 nir_channel(b, projected, 0), 88 nir_channel(b, unprojected, 1)); 89 break; 90 default: 91 unreachable("bad texture coord count for array"); 92 break; 93 } 94 } 95 96 nir_instr_rewrite_src(&tex->instr, 97 &tex->src[i].src, 98 nir_src_for_ssa(projected)); 99 } 100 101 nir_tex_instr_remove_src(tex, proj_index); 102 } 103 104 static bool 105 lower_offset(nir_builder *b, nir_tex_instr *tex) 106 { 107 int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset); 108 if (offset_index < 0) 109 return false; 110 111 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); 112 assert(coord_index >= 0); 113 114 assert(tex->src[offset_index].src.is_ssa); 115 assert(tex->src[coord_index].src.is_ssa); 116 nir_ssa_def *offset = tex->src[offset_index].src.ssa; 117 nir_ssa_def *coord = tex->src[coord_index].src.ssa; 118 119 b->cursor = nir_before_instr(&tex->instr); 120 121 nir_ssa_def *offset_coord; 122 if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) { 123 assert(tex->sampler_dim == GLSL_SAMPLER_DIM_RECT); 124 offset_coord = nir_fadd(b, coord, nir_i2f(b, offset)); 125 } else { 126 offset_coord = nir_iadd(b, coord, offset); 127 } 128 129 if (tex->is_array) { 130 /* The offset is not applied to the array index */ 131 if (tex->coord_components == 2) { 132 offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0), 133 nir_channel(b, coord, 1)); 134 } else if (tex->coord_components == 3) { 135 offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0), 136 nir_channel(b, offset_coord, 1), 137 nir_channel(b, coord, 2)); 138 } else { 139 unreachable("Invalid number of components"); 140 } 141 } 142 143 nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src, 144 nir_src_for_ssa(offset_coord)); 145 146 nir_tex_instr_remove_src(tex, offset_index); 147 148 return true; 149 } 150 151 152 static nir_ssa_def * 153 get_texture_size(nir_builder *b, nir_tex_instr *tex) 154 { 155 b->cursor = nir_before_instr(&tex->instr); 156 157 nir_tex_instr *txs; 158 159 txs = nir_tex_instr_create(b->shader, 1); 160 txs->op = nir_texop_txs; 161 txs->sampler_dim = tex->sampler_dim; 162 txs->is_array = tex->is_array; 163 txs->is_shadow = tex->is_shadow; 164 txs->is_new_style_shadow = tex->is_new_style_shadow; 165 txs->texture_index = tex->texture_index; 166 txs->texture = nir_deref_var_clone(tex->texture, txs); 167 txs->sampler_index = tex->sampler_index; 168 txs->sampler = nir_deref_var_clone(tex->sampler, txs); 169 txs->dest_type = nir_type_int; 170 171 /* only single src, the lod: */ 172 txs->src[0].src = nir_src_for_ssa(nir_imm_int(b, 0)); 173 txs->src[0].src_type = nir_tex_src_lod; 174 175 nir_ssa_dest_init(&txs->instr, &txs->dest, tex->coord_components, 32, NULL); 176 nir_builder_instr_insert(b, &txs->instr); 177 178 return nir_i2f(b, &txs->dest.ssa); 179 } 180 181 static void 182 lower_rect(nir_builder *b, nir_tex_instr *tex) 183 { 184 nir_ssa_def *txs = get_texture_size(b, tex); 185 nir_ssa_def *scale = nir_frcp(b, txs); 186 187 /* Walk through the sources normalizing the requested arguments. */ 188 for (unsigned i = 0; i < tex->num_srcs; i++) { 189 if (tex->src[i].src_type != nir_tex_src_coord) 190 continue; 191 192 nir_ssa_def *coords = 193 nir_ssa_for_src(b, tex->src[i].src, tex->coord_components); 194 nir_instr_rewrite_src(&tex->instr, 195 &tex->src[i].src, 196 nir_src_for_ssa(nir_fmul(b, coords, scale))); 197 } 198 199 tex->sampler_dim = GLSL_SAMPLER_DIM_2D; 200 } 201 202 static nir_ssa_def * 203 sample_plane(nir_builder *b, nir_tex_instr *tex, int plane) 204 { 205 assert(tex->dest.is_ssa); 206 assert(nir_tex_instr_dest_size(tex) == 4); 207 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float); 208 assert(tex->op == nir_texop_tex); 209 assert(tex->coord_components == 2); 210 211 nir_tex_instr *plane_tex = nir_tex_instr_create(b->shader, 2); 212 nir_src_copy(&plane_tex->src[0].src, &tex->src[0].src, plane_tex); 213 plane_tex->src[0].src_type = nir_tex_src_coord; 214 plane_tex->src[1].src = nir_src_for_ssa(nir_imm_int(b, plane)); 215 plane_tex->src[1].src_type = nir_tex_src_plane; 216 plane_tex->op = nir_texop_tex; 217 plane_tex->sampler_dim = GLSL_SAMPLER_DIM_2D; 218 plane_tex->dest_type = nir_type_float; 219 plane_tex->coord_components = 2; 220 221 plane_tex->texture_index = tex->texture_index; 222 plane_tex->texture = nir_deref_var_clone(tex->texture, plane_tex); 223 plane_tex->sampler_index = tex->sampler_index; 224 plane_tex->sampler = nir_deref_var_clone(tex->sampler, plane_tex); 225 226 nir_ssa_dest_init(&plane_tex->instr, &plane_tex->dest, 4, 32, NULL); 227 228 nir_builder_instr_insert(b, &plane_tex->instr); 229 230 return &plane_tex->dest.ssa; 231 } 232 233 static void 234 convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex, 235 nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v) 236 { 237 nir_const_value m[3] = { 238 { .f32 = { 1.0f, 0.0f, 1.59602678f, 0.0f } }, 239 { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } }, 240 { .f32 = { 1.0f, 2.01723214f, 0.0f, 0.0f } } 241 }; 242 243 nir_ssa_def *yuv = 244 nir_vec4(b, 245 nir_fmul(b, nir_imm_float(b, 1.16438356f), 246 nir_fadd(b, y, nir_imm_float(b, -0.0625f))), 247 nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -0.5f)), 0), 248 nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -0.5f)), 0), 249 nir_imm_float(b, 0.0)); 250 251 nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0])); 252 nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[1])); 253 nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2])); 254 255 nir_ssa_def *result = nir_vec4(b, red, green, blue, nir_imm_float(b, 1.0f)); 256 257 nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(result)); 258 } 259 260 static void 261 lower_y_uv_external(nir_builder *b, nir_tex_instr *tex) 262 { 263 b->cursor = nir_after_instr(&tex->instr); 264 265 nir_ssa_def *y = sample_plane(b, tex, 0); 266 nir_ssa_def *uv = sample_plane(b, tex, 1); 267 268 convert_yuv_to_rgb(b, tex, 269 nir_channel(b, y, 0), 270 nir_channel(b, uv, 0), 271 nir_channel(b, uv, 1)); 272 } 273 274 static void 275 lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex) 276 { 277 b->cursor = nir_after_instr(&tex->instr); 278 279 nir_ssa_def *y = sample_plane(b, tex, 0); 280 nir_ssa_def *u = sample_plane(b, tex, 1); 281 nir_ssa_def *v = sample_plane(b, tex, 2); 282 283 convert_yuv_to_rgb(b, tex, 284 nir_channel(b, y, 0), 285 nir_channel(b, u, 0), 286 nir_channel(b, v, 0)); 287 } 288 289 static void 290 lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex) 291 { 292 b->cursor = nir_after_instr(&tex->instr); 293 294 nir_ssa_def *y = sample_plane(b, tex, 0); 295 nir_ssa_def *xuxv = sample_plane(b, tex, 1); 296 297 convert_yuv_to_rgb(b, tex, 298 nir_channel(b, y, 0), 299 nir_channel(b, xuxv, 1), 300 nir_channel(b, xuxv, 3)); 301 } 302 303 /* 304 * Emits a textureLod operation used to replace an existing 305 * textureGrad instruction. 306 */ 307 static void 308 replace_gradient_with_lod(nir_builder *b, nir_ssa_def *lod, nir_tex_instr *tex) 309 { 310 /* We are going to emit a textureLod() with the same parameters except that 311 * we replace ddx/ddy with lod. 312 */ 313 int num_srcs = tex->num_srcs - 1; 314 nir_tex_instr *txl = nir_tex_instr_create(b->shader, num_srcs); 315 316 txl->op = nir_texop_txl; 317 txl->sampler_dim = tex->sampler_dim; 318 txl->texture_index = tex->texture_index; 319 txl->dest_type = tex->dest_type; 320 txl->is_array = tex->is_array; 321 txl->is_shadow = tex->is_shadow; 322 txl->is_new_style_shadow = tex->is_new_style_shadow; 323 txl->sampler_index = tex->sampler_index; 324 txl->texture = nir_deref_var_clone(tex->texture, txl); 325 txl->sampler = nir_deref_var_clone(tex->sampler, txl); 326 txl->coord_components = tex->coord_components; 327 328 nir_ssa_dest_init(&txl->instr, &txl->dest, 4, 32, NULL); 329 330 int src_num = 0; 331 for (int i = 0; i < tex->num_srcs; i++) { 332 if (tex->src[i].src_type == nir_tex_src_ddx || 333 tex->src[i].src_type == nir_tex_src_ddy) 334 continue; 335 nir_src_copy(&txl->src[src_num].src, &tex->src[i].src, txl); 336 txl->src[src_num].src_type = tex->src[i].src_type; 337 src_num++; 338 } 339 340 txl->src[src_num].src = nir_src_for_ssa(lod); 341 txl->src[src_num].src_type = nir_tex_src_lod; 342 src_num++; 343 344 assert(src_num == num_srcs); 345 346 nir_ssa_dest_init(&txl->instr, &txl->dest, 347 tex->dest.ssa.num_components, 32, NULL); 348 nir_builder_instr_insert(b, &txl->instr); 349 350 nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(&txl->dest.ssa)); 351 352 nir_instr_remove(&tex->instr); 353 } 354 355 static void 356 lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex) 357 { 358 assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE); 359 assert(tex->op == nir_texop_txd); 360 assert(tex->dest.is_ssa); 361 362 /* Use textureSize() to get the width and height of LOD 0 */ 363 nir_ssa_def *size = get_texture_size(b, tex); 364 365 /* Cubemap texture lookups first generate a texture coordinate normalized 366 * to [-1, 1] on the appropiate face. The appropiate face is determined 367 * by which component has largest magnitude and its sign. The texture 368 * coordinate is the quotient of the remaining texture coordinates against 369 * that absolute value of the component of largest magnitude. This 370 * division requires that the computing of the derivative of the texel 371 * coordinate must use the quotient rule. The high level GLSL code is as 372 * follows: 373 * 374 * Step 1: selection 375 * 376 * vec3 abs_p, Q, dQdx, dQdy; 377 * abs_p = abs(ir->coordinate); 378 * if (abs_p.x >= max(abs_p.y, abs_p.z)) { 379 * Q = ir->coordinate.yzx; 380 * dQdx = ir->lod_info.grad.dPdx.yzx; 381 * dQdy = ir->lod_info.grad.dPdy.yzx; 382 * } 383 * if (abs_p.y >= max(abs_p.x, abs_p.z)) { 384 * Q = ir->coordinate.xzy; 385 * dQdx = ir->lod_info.grad.dPdx.xzy; 386 * dQdy = ir->lod_info.grad.dPdy.xzy; 387 * } 388 * if (abs_p.z >= max(abs_p.x, abs_p.y)) { 389 * Q = ir->coordinate; 390 * dQdx = ir->lod_info.grad.dPdx; 391 * dQdy = ir->lod_info.grad.dPdy; 392 * } 393 * 394 * Step 2: use quotient rule to compute derivative. The normalized to 395 * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are 396 * only concerned with the magnitudes of the derivatives whose values are 397 * not affected by the sign. We drop the sign from the computation. 398 * 399 * vec2 dx, dy; 400 * float recip; 401 * 402 * recip = 1.0 / Q.z; 403 * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) ); 404 * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) ); 405 * 406 * Step 3: compute LOD. At this point we have the derivatives of the 407 * texture coordinates normalized to [-1,1]. We take the LOD to be 408 * result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L) 409 * = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L) 410 * = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L) 411 * = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy)))) 412 * = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy))) 413 * where L is the dimension of the cubemap. The code is: 414 * 415 * float M, result; 416 * M = max(dot(dx, dx), dot(dy, dy)); 417 * L = textureSize(sampler, 0).x; 418 * result = -1.0 + 0.5 * log2(L * L * M); 419 */ 420 421 /* coordinate */ 422 nir_ssa_def *p = 423 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa; 424 425 /* unmodified dPdx, dPdy values */ 426 nir_ssa_def *dPdx = 427 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa; 428 nir_ssa_def *dPdy = 429 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa; 430 431 nir_ssa_def *abs_p = nir_fabs(b, p); 432 nir_ssa_def *abs_p_x = nir_channel(b, abs_p, 0); 433 nir_ssa_def *abs_p_y = nir_channel(b, abs_p, 1); 434 nir_ssa_def *abs_p_z = nir_channel(b, abs_p, 2); 435 436 /* 1. compute selector */ 437 nir_ssa_def *Q, *dQdx, *dQdy; 438 439 nir_ssa_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y)); 440 nir_ssa_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z)); 441 442 unsigned yzx[4] = { 1, 2, 0, 0 }; 443 unsigned xzy[4] = { 0, 2, 1, 0 }; 444 445 Q = nir_bcsel(b, cond_z, 446 p, 447 nir_bcsel(b, cond_y, 448 nir_swizzle(b, p, xzy, 3, false), 449 nir_swizzle(b, p, yzx, 3, false))); 450 451 dQdx = nir_bcsel(b, cond_z, 452 dPdx, 453 nir_bcsel(b, cond_y, 454 nir_swizzle(b, dPdx, xzy, 3, false), 455 nir_swizzle(b, dPdx, yzx, 3, false))); 456 457 dQdy = nir_bcsel(b, cond_z, 458 dPdy, 459 nir_bcsel(b, cond_y, 460 nir_swizzle(b, dPdy, xzy, 3, false), 461 nir_swizzle(b, dPdy, yzx, 3, false))); 462 463 /* 2. quotient rule */ 464 465 /* tmp = Q.xy * recip; 466 * dx = recip * ( dQdx.xy - (tmp * dQdx.z) ); 467 * dy = recip * ( dQdy.xy - (tmp * dQdy.z) ); 468 */ 469 nir_ssa_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2)); 470 471 unsigned xy[4] = { 0, 1, 0, 0 }; 472 nir_ssa_def *Q_xy = nir_swizzle(b, Q, xy, 2, false); 473 nir_ssa_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z); 474 475 nir_ssa_def *dQdx_xy = nir_swizzle(b, dQdx, xy, 2, false); 476 nir_ssa_def *dQdx_z = nir_channel(b, dQdx, 2); 477 nir_ssa_def *dx = 478 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z))); 479 480 nir_ssa_def *dQdy_xy = nir_swizzle(b, dQdy, xy, 2, false); 481 nir_ssa_def *dQdy_z = nir_channel(b, dQdy, 2); 482 nir_ssa_def *dy = 483 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z))); 484 485 /* M = max(dot(dx, dx), dot(dy, dy)); */ 486 nir_ssa_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy)); 487 488 /* size has textureSize() of LOD 0 */ 489 nir_ssa_def *L = nir_channel(b, size, 0); 490 491 /* lod = -1.0 + 0.5 * log2(L * L * M); */ 492 nir_ssa_def *lod = 493 nir_fadd(b, 494 nir_imm_float(b, -1.0f), 495 nir_fmul(b, 496 nir_imm_float(b, 0.5f), 497 nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M))))); 498 499 /* 3. Replace the gradient instruction with an equivalent lod instruction */ 500 replace_gradient_with_lod(b, lod, tex); 501 } 502 503 static void 504 lower_gradient_shadow(nir_builder *b, nir_tex_instr *tex) 505 { 506 assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE); 507 assert(tex->is_shadow); 508 assert(tex->op == nir_texop_txd); 509 assert(tex->dest.is_ssa); 510 511 /* Use textureSize() to get the width and height of LOD 0 */ 512 unsigned component_mask; 513 switch (tex->sampler_dim) { 514 case GLSL_SAMPLER_DIM_3D: 515 component_mask = 7; 516 break; 517 case GLSL_SAMPLER_DIM_1D: 518 component_mask = 1; 519 break; 520 default: 521 component_mask = 3; 522 break; 523 } 524 525 nir_ssa_def *size = 526 nir_channels(b, get_texture_size(b, tex), component_mask); 527 528 /* Scale the gradients by width and height. Effectively, the incoming 529 * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the 530 * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y). 531 */ 532 nir_ssa_def *ddx = 533 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa; 534 nir_ssa_def *ddy = 535 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa; 536 537 nir_ssa_def *dPdx = nir_fmul(b, ddx, size); 538 nir_ssa_def *dPdy = nir_fmul(b, ddy, size); 539 540 nir_ssa_def *rho; 541 if (dPdx->num_components == 1) { 542 rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy)); 543 } else { 544 rho = nir_fmax(b, 545 nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)), 546 nir_fsqrt(b, nir_fdot(b, dPdy, dPdy))); 547 } 548 549 /* lod = log2(rho). We're ignoring GL state biases for now. */ 550 nir_ssa_def *lod = nir_flog2(b, rho); 551 552 /* Replace the gradient instruction with an equivalent lod instruction */ 553 replace_gradient_with_lod(b, lod, tex); 554 } 555 556 static void 557 saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask) 558 { 559 b->cursor = nir_before_instr(&tex->instr); 560 561 /* Walk through the sources saturating the requested arguments. */ 562 for (unsigned i = 0; i < tex->num_srcs; i++) { 563 if (tex->src[i].src_type != nir_tex_src_coord) 564 continue; 565 566 nir_ssa_def *src = 567 nir_ssa_for_src(b, tex->src[i].src, tex->coord_components); 568 569 /* split src into components: */ 570 nir_ssa_def *comp[4]; 571 572 assume(tex->coord_components >= 1); 573 574 for (unsigned j = 0; j < tex->coord_components; j++) 575 comp[j] = nir_channel(b, src, j); 576 577 /* clamp requested components, array index does not get clamped: */ 578 unsigned ncomp = tex->coord_components; 579 if (tex->is_array) 580 ncomp--; 581 582 for (unsigned j = 0; j < ncomp; j++) { 583 if ((1 << j) & sat_mask) { 584 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) { 585 /* non-normalized texture coords, so clamp to texture 586 * size rather than [0.0, 1.0] 587 */ 588 nir_ssa_def *txs = get_texture_size(b, tex); 589 comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0)); 590 comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j)); 591 } else { 592 comp[j] = nir_fsat(b, comp[j]); 593 } 594 } 595 } 596 597 /* and move the result back into a single vecN: */ 598 src = nir_vec(b, comp, tex->coord_components); 599 600 nir_instr_rewrite_src(&tex->instr, 601 &tex->src[i].src, 602 nir_src_for_ssa(src)); 603 } 604 } 605 606 static nir_ssa_def * 607 get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val) 608 { 609 nir_const_value v; 610 611 memset(&v, 0, sizeof(v)); 612 613 if (swizzle_val == 4) { 614 v.u32[0] = v.u32[1] = v.u32[2] = v.u32[3] = 0; 615 } else { 616 assert(swizzle_val == 5); 617 if (type == nir_type_float) 618 v.f32[0] = v.f32[1] = v.f32[2] = v.f32[3] = 1.0; 619 else 620 v.u32[0] = v.u32[1] = v.u32[2] = v.u32[3] = 1; 621 } 622 623 return nir_build_imm(b, 4, 32, v); 624 } 625 626 static void 627 swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4]) 628 { 629 assert(tex->dest.is_ssa); 630 631 b->cursor = nir_after_instr(&tex->instr); 632 633 nir_ssa_def *swizzled; 634 if (tex->op == nir_texop_tg4) { 635 if (swizzle[tex->component] < 4) { 636 /* This one's easy */ 637 tex->component = swizzle[tex->component]; 638 return; 639 } else { 640 swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]); 641 } 642 } else { 643 assert(nir_tex_instr_dest_size(tex) == 4); 644 if (swizzle[0] < 4 && swizzle[1] < 4 && 645 swizzle[2] < 4 && swizzle[3] < 4) { 646 unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] }; 647 /* We have no 0's or 1's, just emit a swizzling MOV */ 648 swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4, false); 649 } else { 650 nir_ssa_def *srcs[4]; 651 for (unsigned i = 0; i < 4; i++) { 652 if (swizzle[i] < 4) { 653 srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]); 654 } else { 655 srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]); 656 } 657 } 658 swizzled = nir_vec(b, srcs, 4); 659 } 660 } 661 662 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled), 663 swizzled->parent_instr); 664 } 665 666 static void 667 linearize_srgb_result(nir_builder *b, nir_tex_instr *tex) 668 { 669 assert(tex->dest.is_ssa); 670 assert(nir_tex_instr_dest_size(tex) == 4); 671 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float); 672 673 b->cursor = nir_after_instr(&tex->instr); 674 675 static const unsigned swiz[4] = {0, 1, 2, 0}; 676 nir_ssa_def *comp = nir_swizzle(b, &tex->dest.ssa, swiz, 3, true); 677 678 /* Formula is: 679 * (comp <= 0.04045) ? 680 * (comp / 12.92) : 681 * pow((comp + 0.055) / 1.055, 2.4) 682 */ 683 nir_ssa_def *low = nir_fmul(b, comp, nir_imm_float(b, 1.0 / 12.92)); 684 nir_ssa_def *high = nir_fpow(b, 685 nir_fmul(b, 686 nir_fadd(b, 687 comp, 688 nir_imm_float(b, 0.055)), 689 nir_imm_float(b, 1.0 / 1.055)), 690 nir_imm_float(b, 2.4)); 691 nir_ssa_def *cond = nir_fge(b, nir_imm_float(b, 0.04045), comp); 692 nir_ssa_def *rgb = nir_bcsel(b, cond, low, high); 693 694 /* alpha is untouched: */ 695 nir_ssa_def *result = nir_vec4(b, 696 nir_channel(b, rgb, 0), 697 nir_channel(b, rgb, 1), 698 nir_channel(b, rgb, 2), 699 nir_channel(b, &tex->dest.ssa, 3)); 700 701 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(result), 702 result->parent_instr); 703 } 704 705 static bool 706 nir_lower_tex_block(nir_block *block, nir_builder *b, 707 const nir_lower_tex_options *options) 708 { 709 bool progress = false; 710 711 nir_foreach_instr_safe(instr, block) { 712 if (instr->type != nir_instr_type_tex) 713 continue; 714 715 nir_tex_instr *tex = nir_instr_as_tex(instr); 716 bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim)); 717 718 /* mask of src coords to saturate (clamp): */ 719 unsigned sat_mask = 0; 720 721 if ((1 << tex->sampler_index) & options->saturate_r) 722 sat_mask |= (1 << 2); /* .z */ 723 if ((1 << tex->sampler_index) & options->saturate_t) 724 sat_mask |= (1 << 1); /* .y */ 725 if ((1 << tex->sampler_index) & options->saturate_s) 726 sat_mask |= (1 << 0); /* .x */ 727 728 /* If we are clamping any coords, we must lower projector first 729 * as clamping happens *after* projection: 730 */ 731 if (lower_txp || sat_mask) { 732 project_src(b, tex); 733 progress = true; 734 } 735 736 if ((tex->op == nir_texop_txf && options->lower_txf_offset) || 737 (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT && 738 options->lower_rect_offset)) { 739 progress = lower_offset(b, tex) || progress; 740 } 741 742 if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect) { 743 lower_rect(b, tex); 744 progress = true; 745 } 746 747 if ((1 << tex->texture_index) & options->lower_y_uv_external) { 748 lower_y_uv_external(b, tex); 749 progress = true; 750 } 751 752 if ((1 << tex->texture_index) & options->lower_y_u_v_external) { 753 lower_y_u_v_external(b, tex); 754 progress = true; 755 } 756 757 if ((1 << tex->texture_index) & options->lower_yx_xuxv_external) { 758 lower_yx_xuxv_external(b, tex); 759 progress = true; 760 } 761 762 763 if (sat_mask) { 764 saturate_src(b, tex, sat_mask); 765 progress = true; 766 } 767 768 if (((1 << tex->texture_index) & options->swizzle_result) && 769 !nir_tex_instr_is_query(tex) && 770 !(tex->is_shadow && tex->is_new_style_shadow)) { 771 swizzle_result(b, tex, options->swizzles[tex->texture_index]); 772 progress = true; 773 } 774 775 /* should be after swizzle so we know which channels are rgb: */ 776 if (((1 << tex->texture_index) & options->lower_srgb) && 777 !nir_tex_instr_is_query(tex) && !tex->is_shadow) { 778 linearize_srgb_result(b, tex); 779 progress = true; 780 } 781 782 if (tex->op == nir_texop_txd && 783 tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && 784 (options->lower_txd_cube_map || 785 (tex->is_shadow && options->lower_txd_shadow))) { 786 lower_gradient_cube_map(b, tex); 787 progress = true; 788 continue; 789 } 790 791 if (tex->op == nir_texop_txd && options->lower_txd_shadow && 792 tex->is_shadow && tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE) { 793 lower_gradient_shadow(b, tex); 794 progress = true; 795 continue; 796 } 797 } 798 799 return progress; 800 } 801 802 static bool 803 nir_lower_tex_impl(nir_function_impl *impl, 804 const nir_lower_tex_options *options) 805 { 806 bool progress = false; 807 nir_builder builder; 808 nir_builder_init(&builder, impl); 809 810 nir_foreach_block(block, impl) { 811 progress |= nir_lower_tex_block(block, &builder, options); 812 } 813 814 nir_metadata_preserve(impl, nir_metadata_block_index | 815 nir_metadata_dominance); 816 return progress; 817 } 818 819 bool 820 nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options) 821 { 822 bool progress = false; 823 824 nir_foreach_function(function, shader) { 825 if (function->impl) 826 progress |= nir_lower_tex_impl(function->impl, options); 827 } 828 829 return progress; 830 } 831