Home | History | Annotate | Download | only in nir
      1 /*
      2  * Copyright  2015 Broadcom
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 /*
     25  * This lowering pass supports (as configured via nir_lower_tex_options)
     26  * various texture related conversions:
     27  *   + texture projector lowering: converts the coordinate division for
     28  *     texture projection to be done in ALU instructions instead of
     29  *     asking the texture operation to do so.
     30  *   + lowering RECT: converts the un-normalized RECT texture coordinates
     31  *     to normalized coordinates with txs plus ALU instructions
     32  *   + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
     33  *     inserts instructions to clamp specified coordinates to [0.0, 1.0].
     34  *     Note that this automatically triggers texture projector lowering if
     35  *     needed, since clamping must happen after projector lowering.
     36  */
     37 
     38 #include "nir.h"
     39 #include "nir_builder.h"
     40 
     41 static void
     42 project_src(nir_builder *b, nir_tex_instr *tex)
     43 {
     44    /* Find the projector in the srcs list, if present. */
     45    int proj_index = nir_tex_instr_src_index(tex, nir_tex_src_projector);
     46    if (proj_index < 0)
     47       return;
     48 
     49    b->cursor = nir_before_instr(&tex->instr);
     50 
     51    nir_ssa_def *inv_proj =
     52       nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
     53 
     54    /* Walk through the sources projecting the arguments. */
     55    for (unsigned i = 0; i < tex->num_srcs; i++) {
     56       switch (tex->src[i].src_type) {
     57       case nir_tex_src_coord:
     58       case nir_tex_src_comparator:
     59          break;
     60       default:
     61          continue;
     62       }
     63       nir_ssa_def *unprojected =
     64          nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
     65       nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
     66 
     67       /* Array indices don't get projected, so make an new vector with the
     68        * coordinate's array index untouched.
     69        */
     70       if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
     71          switch (tex->coord_components) {
     72          case 4:
     73             projected = nir_vec4(b,
     74                                  nir_channel(b, projected, 0),
     75                                  nir_channel(b, projected, 1),
     76                                  nir_channel(b, projected, 2),
     77                                  nir_channel(b, unprojected, 3));
     78             break;
     79          case 3:
     80             projected = nir_vec3(b,
     81                                  nir_channel(b, projected, 0),
     82                                  nir_channel(b, projected, 1),
     83                                  nir_channel(b, unprojected, 2));
     84             break;
     85          case 2:
     86             projected = nir_vec2(b,
     87                                  nir_channel(b, projected, 0),
     88                                  nir_channel(b, unprojected, 1));
     89             break;
     90          default:
     91             unreachable("bad texture coord count for array");
     92             break;
     93          }
     94       }
     95 
     96       nir_instr_rewrite_src(&tex->instr,
     97                             &tex->src[i].src,
     98                             nir_src_for_ssa(projected));
     99    }
    100 
    101    nir_tex_instr_remove_src(tex, proj_index);
    102 }
    103 
    104 static bool
    105 lower_offset(nir_builder *b, nir_tex_instr *tex)
    106 {
    107    int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset);
    108    if (offset_index < 0)
    109       return false;
    110 
    111    int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
    112    assert(coord_index >= 0);
    113 
    114    assert(tex->src[offset_index].src.is_ssa);
    115    assert(tex->src[coord_index].src.is_ssa);
    116    nir_ssa_def *offset = tex->src[offset_index].src.ssa;
    117    nir_ssa_def *coord = tex->src[coord_index].src.ssa;
    118 
    119    b->cursor = nir_before_instr(&tex->instr);
    120 
    121    nir_ssa_def *offset_coord;
    122    if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) {
    123       assert(tex->sampler_dim == GLSL_SAMPLER_DIM_RECT);
    124       offset_coord = nir_fadd(b, coord, nir_i2f(b, offset));
    125    } else {
    126       offset_coord = nir_iadd(b, coord, offset);
    127    }
    128 
    129    if (tex->is_array) {
    130       /* The offset is not applied to the array index */
    131       if (tex->coord_components == 2) {
    132          offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0),
    133                                     nir_channel(b, coord, 1));
    134       } else if (tex->coord_components == 3) {
    135          offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0),
    136                                     nir_channel(b, offset_coord, 1),
    137                                     nir_channel(b, coord, 2));
    138       } else {
    139          unreachable("Invalid number of components");
    140       }
    141    }
    142 
    143    nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
    144                          nir_src_for_ssa(offset_coord));
    145 
    146    nir_tex_instr_remove_src(tex, offset_index);
    147 
    148    return true;
    149 }
    150 
    151 
    152 static nir_ssa_def *
    153 get_texture_size(nir_builder *b, nir_tex_instr *tex)
    154 {
    155    b->cursor = nir_before_instr(&tex->instr);
    156 
    157    nir_tex_instr *txs;
    158 
    159    txs = nir_tex_instr_create(b->shader, 1);
    160    txs->op = nir_texop_txs;
    161    txs->sampler_dim = tex->sampler_dim;
    162    txs->is_array = tex->is_array;
    163    txs->is_shadow = tex->is_shadow;
    164    txs->is_new_style_shadow = tex->is_new_style_shadow;
    165    txs->texture_index = tex->texture_index;
    166    txs->texture = nir_deref_var_clone(tex->texture, txs);
    167    txs->sampler_index = tex->sampler_index;
    168    txs->sampler = nir_deref_var_clone(tex->sampler, txs);
    169    txs->dest_type = nir_type_int;
    170 
    171    /* only single src, the lod: */
    172    txs->src[0].src = nir_src_for_ssa(nir_imm_int(b, 0));
    173    txs->src[0].src_type = nir_tex_src_lod;
    174 
    175    nir_ssa_dest_init(&txs->instr, &txs->dest, tex->coord_components, 32, NULL);
    176    nir_builder_instr_insert(b, &txs->instr);
    177 
    178    return nir_i2f(b, &txs->dest.ssa);
    179 }
    180 
    181 static void
    182 lower_rect(nir_builder *b, nir_tex_instr *tex)
    183 {
    184    nir_ssa_def *txs = get_texture_size(b, tex);
    185    nir_ssa_def *scale = nir_frcp(b, txs);
    186 
    187    /* Walk through the sources normalizing the requested arguments. */
    188    for (unsigned i = 0; i < tex->num_srcs; i++) {
    189       if (tex->src[i].src_type != nir_tex_src_coord)
    190          continue;
    191 
    192       nir_ssa_def *coords =
    193          nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
    194       nir_instr_rewrite_src(&tex->instr,
    195                             &tex->src[i].src,
    196                             nir_src_for_ssa(nir_fmul(b, coords, scale)));
    197    }
    198 
    199    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
    200 }
    201 
    202 static nir_ssa_def *
    203 sample_plane(nir_builder *b, nir_tex_instr *tex, int plane)
    204 {
    205    assert(tex->dest.is_ssa);
    206    assert(nir_tex_instr_dest_size(tex) == 4);
    207    assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
    208    assert(tex->op == nir_texop_tex);
    209    assert(tex->coord_components == 2);
    210 
    211    nir_tex_instr *plane_tex = nir_tex_instr_create(b->shader, 2);
    212    nir_src_copy(&plane_tex->src[0].src, &tex->src[0].src, plane_tex);
    213    plane_tex->src[0].src_type = nir_tex_src_coord;
    214    plane_tex->src[1].src = nir_src_for_ssa(nir_imm_int(b, plane));
    215    plane_tex->src[1].src_type = nir_tex_src_plane;
    216    plane_tex->op = nir_texop_tex;
    217    plane_tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
    218    plane_tex->dest_type = nir_type_float;
    219    plane_tex->coord_components = 2;
    220 
    221    plane_tex->texture_index = tex->texture_index;
    222    plane_tex->texture = nir_deref_var_clone(tex->texture, plane_tex);
    223    plane_tex->sampler_index = tex->sampler_index;
    224    plane_tex->sampler = nir_deref_var_clone(tex->sampler, plane_tex);
    225 
    226    nir_ssa_dest_init(&plane_tex->instr, &plane_tex->dest, 4, 32, NULL);
    227 
    228    nir_builder_instr_insert(b, &plane_tex->instr);
    229 
    230    return &plane_tex->dest.ssa;
    231 }
    232 
    233 static void
    234 convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
    235                    nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v)
    236 {
    237    nir_const_value m[3] = {
    238       { .f32 = { 1.0f,  0.0f,         1.59602678f, 0.0f } },
    239       { .f32 = { 1.0f, -0.39176229f, -0.81296764f, 0.0f } },
    240       { .f32 = { 1.0f,  2.01723214f,  0.0f,        0.0f } }
    241    };
    242 
    243    nir_ssa_def *yuv =
    244       nir_vec4(b,
    245                nir_fmul(b, nir_imm_float(b, 1.16438356f),
    246                         nir_fadd(b, y, nir_imm_float(b, -0.0625f))),
    247                nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -0.5f)), 0),
    248                nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -0.5f)), 0),
    249                nir_imm_float(b, 0.0));
    250 
    251    nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0]));
    252    nir_ssa_def *green = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[1]));
    253    nir_ssa_def *blue = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[2]));
    254 
    255    nir_ssa_def *result = nir_vec4(b, red, green, blue, nir_imm_float(b, 1.0f));
    256 
    257    nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(result));
    258 }
    259 
    260 static void
    261 lower_y_uv_external(nir_builder *b, nir_tex_instr *tex)
    262 {
    263    b->cursor = nir_after_instr(&tex->instr);
    264 
    265    nir_ssa_def *y = sample_plane(b, tex, 0);
    266    nir_ssa_def *uv = sample_plane(b, tex, 1);
    267 
    268    convert_yuv_to_rgb(b, tex,
    269                       nir_channel(b, y, 0),
    270                       nir_channel(b, uv, 0),
    271                       nir_channel(b, uv, 1));
    272 }
    273 
    274 static void
    275 lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex)
    276 {
    277    b->cursor = nir_after_instr(&tex->instr);
    278 
    279    nir_ssa_def *y = sample_plane(b, tex, 0);
    280    nir_ssa_def *u = sample_plane(b, tex, 1);
    281    nir_ssa_def *v = sample_plane(b, tex, 2);
    282 
    283    convert_yuv_to_rgb(b, tex,
    284                       nir_channel(b, y, 0),
    285                       nir_channel(b, u, 0),
    286                       nir_channel(b, v, 0));
    287 }
    288 
    289 static void
    290 lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex)
    291 {
    292    b->cursor = nir_after_instr(&tex->instr);
    293 
    294    nir_ssa_def *y = sample_plane(b, tex, 0);
    295    nir_ssa_def *xuxv = sample_plane(b, tex, 1);
    296 
    297    convert_yuv_to_rgb(b, tex,
    298                       nir_channel(b, y, 0),
    299                       nir_channel(b, xuxv, 1),
    300                       nir_channel(b, xuxv, 3));
    301 }
    302 
    303 /*
    304  * Emits a textureLod operation used to replace an existing
    305  * textureGrad instruction.
    306  */
    307 static void
    308 replace_gradient_with_lod(nir_builder *b, nir_ssa_def *lod, nir_tex_instr *tex)
    309 {
    310    /* We are going to emit a textureLod() with the same parameters except that
    311     * we replace ddx/ddy with lod.
    312     */
    313    int num_srcs = tex->num_srcs - 1;
    314    nir_tex_instr *txl = nir_tex_instr_create(b->shader, num_srcs);
    315 
    316    txl->op = nir_texop_txl;
    317    txl->sampler_dim = tex->sampler_dim;
    318    txl->texture_index = tex->texture_index;
    319    txl->dest_type = tex->dest_type;
    320    txl->is_array = tex->is_array;
    321    txl->is_shadow = tex->is_shadow;
    322    txl->is_new_style_shadow = tex->is_new_style_shadow;
    323    txl->sampler_index = tex->sampler_index;
    324    txl->texture = nir_deref_var_clone(tex->texture, txl);
    325    txl->sampler = nir_deref_var_clone(tex->sampler, txl);
    326    txl->coord_components = tex->coord_components;
    327 
    328    nir_ssa_dest_init(&txl->instr, &txl->dest, 4, 32, NULL);
    329 
    330    int src_num = 0;
    331    for (int i = 0; i < tex->num_srcs; i++) {
    332       if (tex->src[i].src_type == nir_tex_src_ddx ||
    333           tex->src[i].src_type == nir_tex_src_ddy)
    334          continue;
    335       nir_src_copy(&txl->src[src_num].src, &tex->src[i].src, txl);
    336       txl->src[src_num].src_type = tex->src[i].src_type;
    337       src_num++;
    338    }
    339 
    340    txl->src[src_num].src = nir_src_for_ssa(lod);
    341    txl->src[src_num].src_type = nir_tex_src_lod;
    342    src_num++;
    343 
    344    assert(src_num == num_srcs);
    345 
    346    nir_ssa_dest_init(&txl->instr, &txl->dest,
    347                      tex->dest.ssa.num_components, 32, NULL);
    348    nir_builder_instr_insert(b, &txl->instr);
    349 
    350    nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_src_for_ssa(&txl->dest.ssa));
    351 
    352    nir_instr_remove(&tex->instr);
    353 }
    354 
    355 static void
    356 lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex)
    357 {
    358    assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
    359    assert(tex->op == nir_texop_txd);
    360    assert(tex->dest.is_ssa);
    361 
    362    /* Use textureSize() to get the width and height of LOD 0 */
    363    nir_ssa_def *size = get_texture_size(b, tex);
    364 
    365    /* Cubemap texture lookups first generate a texture coordinate normalized
    366     * to [-1, 1] on the appropiate face. The appropiate face is determined
    367     * by which component has largest magnitude and its sign. The texture
    368     * coordinate is the quotient of the remaining texture coordinates against
    369     * that absolute value of the component of largest magnitude. This
    370     * division requires that the computing of the derivative of the texel
    371     * coordinate must use the quotient rule. The high level GLSL code is as
    372     * follows:
    373     *
    374     * Step 1: selection
    375     *
    376     * vec3 abs_p, Q, dQdx, dQdy;
    377     * abs_p = abs(ir->coordinate);
    378     * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
    379     *    Q = ir->coordinate.yzx;
    380     *    dQdx = ir->lod_info.grad.dPdx.yzx;
    381     *    dQdy = ir->lod_info.grad.dPdy.yzx;
    382     * }
    383     * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
    384     *    Q = ir->coordinate.xzy;
    385     *    dQdx = ir->lod_info.grad.dPdx.xzy;
    386     *    dQdy = ir->lod_info.grad.dPdy.xzy;
    387     * }
    388     * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
    389     *    Q = ir->coordinate;
    390     *    dQdx = ir->lod_info.grad.dPdx;
    391     *    dQdy = ir->lod_info.grad.dPdy;
    392     * }
    393     *
    394     * Step 2: use quotient rule to compute derivative. The normalized to
    395     * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
    396     * only concerned with the magnitudes of the derivatives whose values are
    397     * not affected by the sign. We drop the sign from the computation.
    398     *
    399     * vec2 dx, dy;
    400     * float recip;
    401     *
    402     * recip = 1.0 / Q.z;
    403     * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
    404     * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
    405     *
    406     * Step 3: compute LOD. At this point we have the derivatives of the
    407     * texture coordinates normalized to [-1,1]. We take the LOD to be
    408     *  result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
    409     *         = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
    410     *         = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
    411     *         = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
    412     *         = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
    413     * where L is the dimension of the cubemap. The code is:
    414     *
    415     * float M, result;
    416     * M = max(dot(dx, dx), dot(dy, dy));
    417     * L = textureSize(sampler, 0).x;
    418     * result = -1.0 + 0.5 * log2(L * L * M);
    419     */
    420 
    421    /* coordinate */
    422    nir_ssa_def *p =
    423       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa;
    424 
    425    /* unmodified dPdx, dPdy values */
    426    nir_ssa_def *dPdx =
    427       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
    428    nir_ssa_def *dPdy =
    429       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
    430 
    431    nir_ssa_def *abs_p = nir_fabs(b, p);
    432    nir_ssa_def *abs_p_x = nir_channel(b, abs_p, 0);
    433    nir_ssa_def *abs_p_y = nir_channel(b, abs_p, 1);
    434    nir_ssa_def *abs_p_z = nir_channel(b, abs_p, 2);
    435 
    436    /* 1. compute selector */
    437    nir_ssa_def *Q, *dQdx, *dQdy;
    438 
    439    nir_ssa_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y));
    440    nir_ssa_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z));
    441 
    442    unsigned yzx[4] = { 1, 2, 0, 0 };
    443    unsigned xzy[4] = { 0, 2, 1, 0 };
    444 
    445    Q = nir_bcsel(b, cond_z,
    446                  p,
    447                  nir_bcsel(b, cond_y,
    448                            nir_swizzle(b, p, xzy, 3, false),
    449                            nir_swizzle(b, p, yzx, 3, false)));
    450 
    451    dQdx = nir_bcsel(b, cond_z,
    452                     dPdx,
    453                     nir_bcsel(b, cond_y,
    454                               nir_swizzle(b, dPdx, xzy, 3, false),
    455                               nir_swizzle(b, dPdx, yzx, 3, false)));
    456 
    457    dQdy = nir_bcsel(b, cond_z,
    458                     dPdy,
    459                     nir_bcsel(b, cond_y,
    460                               nir_swizzle(b, dPdy, xzy, 3, false),
    461                               nir_swizzle(b, dPdy, yzx, 3, false)));
    462 
    463    /* 2. quotient rule */
    464 
    465    /* tmp = Q.xy * recip;
    466     * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
    467     * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
    468     */
    469    nir_ssa_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2));
    470 
    471    unsigned xy[4] = { 0, 1, 0, 0 };
    472    nir_ssa_def *Q_xy = nir_swizzle(b, Q, xy, 2, false);
    473    nir_ssa_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z);
    474 
    475    nir_ssa_def *dQdx_xy = nir_swizzle(b, dQdx, xy, 2, false);
    476    nir_ssa_def *dQdx_z = nir_channel(b, dQdx, 2);
    477    nir_ssa_def *dx =
    478       nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z)));
    479 
    480    nir_ssa_def *dQdy_xy = nir_swizzle(b, dQdy, xy, 2, false);
    481    nir_ssa_def *dQdy_z = nir_channel(b, dQdy, 2);
    482    nir_ssa_def *dy =
    483       nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z)));
    484 
    485    /* M = max(dot(dx, dx), dot(dy, dy)); */
    486    nir_ssa_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy));
    487 
    488    /* size has textureSize() of LOD 0 */
    489    nir_ssa_def *L = nir_channel(b, size, 0);
    490 
    491    /* lod = -1.0 + 0.5 * log2(L * L * M); */
    492    nir_ssa_def *lod =
    493       nir_fadd(b,
    494                nir_imm_float(b, -1.0f),
    495                nir_fmul(b,
    496                         nir_imm_float(b, 0.5f),
    497                         nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M)))));
    498 
    499    /* 3. Replace the gradient instruction with an equivalent lod instruction */
    500    replace_gradient_with_lod(b, lod, tex);
    501 }
    502 
    503 static void
    504 lower_gradient_shadow(nir_builder *b, nir_tex_instr *tex)
    505 {
    506    assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE);
    507    assert(tex->is_shadow);
    508    assert(tex->op == nir_texop_txd);
    509    assert(tex->dest.is_ssa);
    510 
    511    /* Use textureSize() to get the width and height of LOD 0 */
    512    unsigned component_mask;
    513    switch (tex->sampler_dim) {
    514    case GLSL_SAMPLER_DIM_3D:
    515       component_mask = 7;
    516       break;
    517    case GLSL_SAMPLER_DIM_1D:
    518       component_mask = 1;
    519       break;
    520    default:
    521       component_mask = 3;
    522       break;
    523    }
    524 
    525    nir_ssa_def *size =
    526       nir_channels(b, get_texture_size(b, tex), component_mask);
    527 
    528    /* Scale the gradients by width and height.  Effectively, the incoming
    529     * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
    530     * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
    531     */
    532    nir_ssa_def *ddx =
    533       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
    534    nir_ssa_def *ddy =
    535       tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
    536 
    537    nir_ssa_def *dPdx = nir_fmul(b, ddx, size);
    538    nir_ssa_def *dPdy = nir_fmul(b, ddy, size);
    539 
    540    nir_ssa_def *rho;
    541    if (dPdx->num_components == 1) {
    542       rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy));
    543    } else {
    544       rho = nir_fmax(b,
    545                      nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)),
    546                      nir_fsqrt(b, nir_fdot(b, dPdy, dPdy)));
    547    }
    548 
    549    /* lod = log2(rho).  We're ignoring GL state biases for now. */
    550    nir_ssa_def *lod = nir_flog2(b, rho);
    551 
    552    /* Replace the gradient instruction with an equivalent lod instruction */
    553    replace_gradient_with_lod(b, lod, tex);
    554 }
    555 
    556 static void
    557 saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
    558 {
    559    b->cursor = nir_before_instr(&tex->instr);
    560 
    561    /* Walk through the sources saturating the requested arguments. */
    562    for (unsigned i = 0; i < tex->num_srcs; i++) {
    563       if (tex->src[i].src_type != nir_tex_src_coord)
    564          continue;
    565 
    566       nir_ssa_def *src =
    567          nir_ssa_for_src(b, tex->src[i].src, tex->coord_components);
    568 
    569       /* split src into components: */
    570       nir_ssa_def *comp[4];
    571 
    572       assume(tex->coord_components >= 1);
    573 
    574       for (unsigned j = 0; j < tex->coord_components; j++)
    575          comp[j] = nir_channel(b, src, j);
    576 
    577       /* clamp requested components, array index does not get clamped: */
    578       unsigned ncomp = tex->coord_components;
    579       if (tex->is_array)
    580          ncomp--;
    581 
    582       for (unsigned j = 0; j < ncomp; j++) {
    583          if ((1 << j) & sat_mask) {
    584             if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
    585                /* non-normalized texture coords, so clamp to texture
    586                 * size rather than [0.0, 1.0]
    587                 */
    588                nir_ssa_def *txs = get_texture_size(b, tex);
    589                comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0));
    590                comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j));
    591             } else {
    592                comp[j] = nir_fsat(b, comp[j]);
    593             }
    594          }
    595       }
    596 
    597       /* and move the result back into a single vecN: */
    598       src = nir_vec(b, comp, tex->coord_components);
    599 
    600       nir_instr_rewrite_src(&tex->instr,
    601                             &tex->src[i].src,
    602                             nir_src_for_ssa(src));
    603    }
    604 }
    605 
    606 static nir_ssa_def *
    607 get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
    608 {
    609    nir_const_value v;
    610 
    611    memset(&v, 0, sizeof(v));
    612 
    613    if (swizzle_val == 4) {
    614       v.u32[0] = v.u32[1] = v.u32[2] = v.u32[3] = 0;
    615    } else {
    616       assert(swizzle_val == 5);
    617       if (type == nir_type_float)
    618          v.f32[0] = v.f32[1] = v.f32[2] = v.f32[3] = 1.0;
    619       else
    620          v.u32[0] = v.u32[1] = v.u32[2] = v.u32[3] = 1;
    621    }
    622 
    623    return nir_build_imm(b, 4, 32, v);
    624 }
    625 
    626 static void
    627 swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
    628 {
    629    assert(tex->dest.is_ssa);
    630 
    631    b->cursor = nir_after_instr(&tex->instr);
    632 
    633    nir_ssa_def *swizzled;
    634    if (tex->op == nir_texop_tg4) {
    635       if (swizzle[tex->component] < 4) {
    636          /* This one's easy */
    637          tex->component = swizzle[tex->component];
    638          return;
    639       } else {
    640          swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]);
    641       }
    642    } else {
    643       assert(nir_tex_instr_dest_size(tex) == 4);
    644       if (swizzle[0] < 4 && swizzle[1] < 4 &&
    645           swizzle[2] < 4 && swizzle[3] < 4) {
    646          unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] };
    647          /* We have no 0's or 1's, just emit a swizzling MOV */
    648          swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4, false);
    649       } else {
    650          nir_ssa_def *srcs[4];
    651          for (unsigned i = 0; i < 4; i++) {
    652             if (swizzle[i] < 4) {
    653                srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]);
    654             } else {
    655                srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]);
    656             }
    657          }
    658          swizzled = nir_vec(b, srcs, 4);
    659       }
    660    }
    661 
    662    nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled),
    663                                   swizzled->parent_instr);
    664 }
    665 
    666 static void
    667 linearize_srgb_result(nir_builder *b, nir_tex_instr *tex)
    668 {
    669    assert(tex->dest.is_ssa);
    670    assert(nir_tex_instr_dest_size(tex) == 4);
    671    assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
    672 
    673    b->cursor = nir_after_instr(&tex->instr);
    674 
    675    static const unsigned swiz[4] = {0, 1, 2, 0};
    676    nir_ssa_def *comp = nir_swizzle(b, &tex->dest.ssa, swiz, 3, true);
    677 
    678    /* Formula is:
    679     *    (comp <= 0.04045) ?
    680     *          (comp / 12.92) :
    681     *          pow((comp + 0.055) / 1.055, 2.4)
    682     */
    683    nir_ssa_def *low  = nir_fmul(b, comp, nir_imm_float(b, 1.0 / 12.92));
    684    nir_ssa_def *high = nir_fpow(b,
    685                                 nir_fmul(b,
    686                                          nir_fadd(b,
    687                                                   comp,
    688                                                   nir_imm_float(b, 0.055)),
    689                                          nir_imm_float(b, 1.0 / 1.055)),
    690                                 nir_imm_float(b, 2.4));
    691    nir_ssa_def *cond = nir_fge(b, nir_imm_float(b, 0.04045), comp);
    692    nir_ssa_def *rgb  = nir_bcsel(b, cond, low, high);
    693 
    694    /* alpha is untouched: */
    695    nir_ssa_def *result = nir_vec4(b,
    696                                   nir_channel(b, rgb, 0),
    697                                   nir_channel(b, rgb, 1),
    698                                   nir_channel(b, rgb, 2),
    699                                   nir_channel(b, &tex->dest.ssa, 3));
    700 
    701    nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(result),
    702                                   result->parent_instr);
    703 }
    704 
    705 static bool
    706 nir_lower_tex_block(nir_block *block, nir_builder *b,
    707                     const nir_lower_tex_options *options)
    708 {
    709    bool progress = false;
    710 
    711    nir_foreach_instr_safe(instr, block) {
    712       if (instr->type != nir_instr_type_tex)
    713          continue;
    714 
    715       nir_tex_instr *tex = nir_instr_as_tex(instr);
    716       bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim));
    717 
    718       /* mask of src coords to saturate (clamp): */
    719       unsigned sat_mask = 0;
    720 
    721       if ((1 << tex->sampler_index) & options->saturate_r)
    722          sat_mask |= (1 << 2);    /* .z */
    723       if ((1 << tex->sampler_index) & options->saturate_t)
    724          sat_mask |= (1 << 1);    /* .y */
    725       if ((1 << tex->sampler_index) & options->saturate_s)
    726          sat_mask |= (1 << 0);    /* .x */
    727 
    728       /* If we are clamping any coords, we must lower projector first
    729        * as clamping happens *after* projection:
    730        */
    731       if (lower_txp || sat_mask) {
    732          project_src(b, tex);
    733          progress = true;
    734       }
    735 
    736       if ((tex->op == nir_texop_txf && options->lower_txf_offset) ||
    737           (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT &&
    738            options->lower_rect_offset)) {
    739          progress = lower_offset(b, tex) || progress;
    740       }
    741 
    742       if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect) {
    743          lower_rect(b, tex);
    744          progress = true;
    745       }
    746 
    747       if ((1 << tex->texture_index) & options->lower_y_uv_external) {
    748          lower_y_uv_external(b, tex);
    749          progress = true;
    750       }
    751 
    752       if ((1 << tex->texture_index) & options->lower_y_u_v_external) {
    753          lower_y_u_v_external(b, tex);
    754          progress = true;
    755       }
    756 
    757       if ((1 << tex->texture_index) & options->lower_yx_xuxv_external) {
    758          lower_yx_xuxv_external(b, tex);
    759          progress = true;
    760       }
    761 
    762 
    763       if (sat_mask) {
    764          saturate_src(b, tex, sat_mask);
    765          progress = true;
    766       }
    767 
    768       if (((1 << tex->texture_index) & options->swizzle_result) &&
    769           !nir_tex_instr_is_query(tex) &&
    770           !(tex->is_shadow && tex->is_new_style_shadow)) {
    771          swizzle_result(b, tex, options->swizzles[tex->texture_index]);
    772          progress = true;
    773       }
    774 
    775       /* should be after swizzle so we know which channels are rgb: */
    776       if (((1 << tex->texture_index) & options->lower_srgb) &&
    777           !nir_tex_instr_is_query(tex) && !tex->is_shadow) {
    778          linearize_srgb_result(b, tex);
    779          progress = true;
    780       }
    781 
    782       if (tex->op == nir_texop_txd &&
    783           tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
    784           (options->lower_txd_cube_map ||
    785            (tex->is_shadow && options->lower_txd_shadow))) {
    786          lower_gradient_cube_map(b, tex);
    787          progress = true;
    788          continue;
    789       }
    790 
    791       if (tex->op == nir_texop_txd && options->lower_txd_shadow &&
    792           tex->is_shadow && tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE) {
    793          lower_gradient_shadow(b, tex);
    794          progress = true;
    795          continue;
    796       }
    797    }
    798 
    799    return progress;
    800 }
    801 
    802 static bool
    803 nir_lower_tex_impl(nir_function_impl *impl,
    804                    const nir_lower_tex_options *options)
    805 {
    806    bool progress = false;
    807    nir_builder builder;
    808    nir_builder_init(&builder, impl);
    809 
    810    nir_foreach_block(block, impl) {
    811       progress |= nir_lower_tex_block(block, &builder, options);
    812    }
    813 
    814    nir_metadata_preserve(impl, nir_metadata_block_index |
    815                                nir_metadata_dominance);
    816    return progress;
    817 }
    818 
    819 bool
    820 nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
    821 {
    822    bool progress = false;
    823 
    824    nir_foreach_function(function, shader) {
    825       if (function->impl)
    826          progress |= nir_lower_tex_impl(function->impl, options);
    827    }
    828 
    829    return progress;
    830 }
    831