Home | History | Annotate | Download | only in spirv
      1 /*
      2  * Copyright  2015 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *    Jason Ekstrand (jason (at) jlekstrand.net)
     25  *
     26  */
     27 
     28 #include "vtn_private.h"
     29 #include "GLSL.std.450.h"
     30 
     31 #define M_PIf   ((float) M_PI)
     32 #define M_PI_2f ((float) M_PI_2)
     33 #define M_PI_4f ((float) M_PI_4)
     34 
     35 static nir_ssa_def *
     36 build_mat2_det(nir_builder *b, nir_ssa_def *col[2])
     37 {
     38    unsigned swiz[4] = {1, 0, 0, 0};
     39    nir_ssa_def *p = nir_fmul(b, col[0], nir_swizzle(b, col[1], swiz, 2, true));
     40    return nir_fsub(b, nir_channel(b, p, 0), nir_channel(b, p, 1));
     41 }
     42 
     43 static nir_ssa_def *
     44 build_mat3_det(nir_builder *b, nir_ssa_def *col[3])
     45 {
     46    unsigned yzx[4] = {1, 2, 0, 0};
     47    unsigned zxy[4] = {2, 0, 1, 0};
     48 
     49    nir_ssa_def *prod0 =
     50       nir_fmul(b, col[0],
     51                nir_fmul(b, nir_swizzle(b, col[1], yzx, 3, true),
     52                            nir_swizzle(b, col[2], zxy, 3, true)));
     53    nir_ssa_def *prod1 =
     54       nir_fmul(b, col[0],
     55                nir_fmul(b, nir_swizzle(b, col[1], zxy, 3, true),
     56                            nir_swizzle(b, col[2], yzx, 3, true)));
     57 
     58    nir_ssa_def *diff = nir_fsub(b, prod0, prod1);
     59 
     60    return nir_fadd(b, nir_channel(b, diff, 0),
     61                       nir_fadd(b, nir_channel(b, diff, 1),
     62                                   nir_channel(b, diff, 2)));
     63 }
     64 
     65 static nir_ssa_def *
     66 build_mat4_det(nir_builder *b, nir_ssa_def **col)
     67 {
     68    nir_ssa_def *subdet[4];
     69    for (unsigned i = 0; i < 4; i++) {
     70       unsigned swiz[3];
     71       for (unsigned j = 0; j < 3; j++)
     72          swiz[j] = j + (j >= i);
     73 
     74       nir_ssa_def *subcol[3];
     75       subcol[0] = nir_swizzle(b, col[1], swiz, 3, true);
     76       subcol[1] = nir_swizzle(b, col[2], swiz, 3, true);
     77       subcol[2] = nir_swizzle(b, col[3], swiz, 3, true);
     78 
     79       subdet[i] = build_mat3_det(b, subcol);
     80    }
     81 
     82    nir_ssa_def *prod = nir_fmul(b, col[0], nir_vec(b, subdet, 4));
     83 
     84    return nir_fadd(b, nir_fsub(b, nir_channel(b, prod, 0),
     85                                   nir_channel(b, prod, 1)),
     86                       nir_fsub(b, nir_channel(b, prod, 2),
     87                                   nir_channel(b, prod, 3)));
     88 }
     89 
     90 static nir_ssa_def *
     91 build_mat_det(struct vtn_builder *b, struct vtn_ssa_value *src)
     92 {
     93    unsigned size = glsl_get_vector_elements(src->type);
     94 
     95    nir_ssa_def *cols[4];
     96    for (unsigned i = 0; i < size; i++)
     97       cols[i] = src->elems[i]->def;
     98 
     99    switch(size) {
    100    case 2: return build_mat2_det(&b->nb, cols);
    101    case 3: return build_mat3_det(&b->nb, cols);
    102    case 4: return build_mat4_det(&b->nb, cols);
    103    default:
    104       unreachable("Invalid matrix size");
    105    }
    106 }
    107 
    108 /* Computes the determinate of the submatrix given by taking src and
    109  * removing the specified row and column.
    110  */
    111 static nir_ssa_def *
    112 build_mat_subdet(struct nir_builder *b, struct vtn_ssa_value *src,
    113                  unsigned size, unsigned row, unsigned col)
    114 {
    115    assert(row < size && col < size);
    116    if (size == 2) {
    117       return nir_channel(b, src->elems[1 - col]->def, 1 - row);
    118    } else {
    119       /* Swizzle to get all but the specified row */
    120       unsigned swiz[3];
    121       for (unsigned j = 0; j < 3; j++)
    122          swiz[j] = j + (j >= row);
    123 
    124       /* Grab all but the specified column */
    125       nir_ssa_def *subcol[3];
    126       for (unsigned j = 0; j < size; j++) {
    127          if (j != col) {
    128             subcol[j - (j > col)] = nir_swizzle(b, src->elems[j]->def,
    129                                                 swiz, size - 1, true);
    130          }
    131       }
    132 
    133       if (size == 3) {
    134          return build_mat2_det(b, subcol);
    135       } else {
    136          assert(size == 4);
    137          return build_mat3_det(b, subcol);
    138       }
    139    }
    140 }
    141 
    142 static struct vtn_ssa_value *
    143 matrix_inverse(struct vtn_builder *b, struct vtn_ssa_value *src)
    144 {
    145    nir_ssa_def *adj_col[4];
    146    unsigned size = glsl_get_vector_elements(src->type);
    147 
    148    /* Build up an adjugate matrix */
    149    for (unsigned c = 0; c < size; c++) {
    150       nir_ssa_def *elem[4];
    151       for (unsigned r = 0; r < size; r++) {
    152          elem[r] = build_mat_subdet(&b->nb, src, size, c, r);
    153 
    154          if ((r + c) % 2)
    155             elem[r] = nir_fneg(&b->nb, elem[r]);
    156       }
    157 
    158       adj_col[c] = nir_vec(&b->nb, elem, size);
    159    }
    160 
    161    nir_ssa_def *det_inv = nir_frcp(&b->nb, build_mat_det(b, src));
    162 
    163    struct vtn_ssa_value *val = vtn_create_ssa_value(b, src->type);
    164    for (unsigned i = 0; i < size; i++)
    165       val->elems[i]->def = nir_fmul(&b->nb, adj_col[i], det_inv);
    166 
    167    return val;
    168 }
    169 
    170 static nir_ssa_def*
    171 build_length(nir_builder *b, nir_ssa_def *vec)
    172 {
    173    switch (vec->num_components) {
    174    case 1: return nir_fsqrt(b, nir_fmul(b, vec, vec));
    175    case 2: return nir_fsqrt(b, nir_fdot2(b, vec, vec));
    176    case 3: return nir_fsqrt(b, nir_fdot3(b, vec, vec));
    177    case 4: return nir_fsqrt(b, nir_fdot4(b, vec, vec));
    178    default:
    179       unreachable("Invalid number of components");
    180    }
    181 }
    182 
    183 static inline nir_ssa_def *
    184 build_fclamp(nir_builder *b,
    185              nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val)
    186 {
    187    return nir_fmin(b, nir_fmax(b, x, min_val), max_val);
    188 }
    189 
    190 /**
    191  * Return e^x.
    192  */
    193 static nir_ssa_def *
    194 build_exp(nir_builder *b, nir_ssa_def *x)
    195 {
    196    return nir_fexp2(b, nir_fmul(b, x, nir_imm_float(b, M_LOG2E)));
    197 }
    198 
    199 /**
    200  * Return ln(x) - the natural logarithm of x.
    201  */
    202 static nir_ssa_def *
    203 build_log(nir_builder *b, nir_ssa_def *x)
    204 {
    205    return nir_fmul(b, nir_flog2(b, x), nir_imm_float(b, 1.0 / M_LOG2E));
    206 }
    207 
    208 /**
    209  * Approximate asin(x) by the formula:
    210  *    asin~(x) = sign(x) * (pi/2 - sqrt(1 - |x|) * (pi/2 + |x|(pi/4 - 1 + |x|(p0 + |x|p1))))
    211  *
    212  * which is correct to first order at x=0 and x=1 regardless of the p
    213  * coefficients but can be made second-order correct at both ends by selecting
    214  * the fit coefficients appropriately.  Different p coefficients can be used
    215  * in the asin and acos implementation to minimize some relative error metric
    216  * in each case.
    217  */
    218 static nir_ssa_def *
    219 build_asin(nir_builder *b, nir_ssa_def *x, float p0, float p1)
    220 {
    221    nir_ssa_def *abs_x = nir_fabs(b, x);
    222    return nir_fmul(b, nir_fsign(b, x),
    223                    nir_fsub(b, nir_imm_float(b, M_PI_2f),
    224                             nir_fmul(b, nir_fsqrt(b, nir_fsub(b, nir_imm_float(b, 1.0f), abs_x)),
    225                                      nir_fadd(b, nir_imm_float(b, M_PI_2f),
    226                                               nir_fmul(b, abs_x,
    227                                                        nir_fadd(b, nir_imm_float(b, M_PI_4f - 1.0f),
    228                                                                 nir_fmul(b, abs_x,
    229                                                                          nir_fadd(b, nir_imm_float(b, p0),
    230                                                                                   nir_fmul(b, abs_x,
    231                                                                                            nir_imm_float(b, p1))))))))));
    232 }
    233 
    234 /**
    235  * Compute xs[0] + xs[1] + xs[2] + ... using fadd.
    236  */
    237 static nir_ssa_def *
    238 build_fsum(nir_builder *b, nir_ssa_def **xs, int terms)
    239 {
    240    nir_ssa_def *accum = xs[0];
    241 
    242    for (int i = 1; i < terms; i++)
    243       accum = nir_fadd(b, accum, xs[i]);
    244 
    245    return accum;
    246 }
    247 
    248 static nir_ssa_def *
    249 build_atan(nir_builder *b, nir_ssa_def *y_over_x)
    250 {
    251    nir_ssa_def *abs_y_over_x = nir_fabs(b, y_over_x);
    252    nir_ssa_def *one = nir_imm_float(b, 1.0f);
    253 
    254    /*
    255     * range-reduction, first step:
    256     *
    257     *      / y_over_x         if |y_over_x| <= 1.0;
    258     * x = <
    259     *      \ 1.0 / y_over_x   otherwise
    260     */
    261    nir_ssa_def *x = nir_fdiv(b, nir_fmin(b, abs_y_over_x, one),
    262                                 nir_fmax(b, abs_y_over_x, one));
    263 
    264    /*
    265     * approximate atan by evaluating polynomial:
    266     *
    267     * x   * 0.9999793128310355 - x^3  * 0.3326756418091246 +
    268     * x^5 * 0.1938924977115610 - x^7  * 0.1173503194786851 +
    269     * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444
    270     */
    271    nir_ssa_def *x_2  = nir_fmul(b, x,   x);
    272    nir_ssa_def *x_3  = nir_fmul(b, x_2, x);
    273    nir_ssa_def *x_5  = nir_fmul(b, x_3, x_2);
    274    nir_ssa_def *x_7  = nir_fmul(b, x_5, x_2);
    275    nir_ssa_def *x_9  = nir_fmul(b, x_7, x_2);
    276    nir_ssa_def *x_11 = nir_fmul(b, x_9, x_2);
    277 
    278    nir_ssa_def *polynomial_terms[] = {
    279       nir_fmul(b, x,    nir_imm_float(b,  0.9999793128310355f)),
    280       nir_fmul(b, x_3,  nir_imm_float(b, -0.3326756418091246f)),
    281       nir_fmul(b, x_5,  nir_imm_float(b,  0.1938924977115610f)),
    282       nir_fmul(b, x_7,  nir_imm_float(b, -0.1173503194786851f)),
    283       nir_fmul(b, x_9,  nir_imm_float(b,  0.0536813784310406f)),
    284       nir_fmul(b, x_11, nir_imm_float(b, -0.0121323213173444f)),
    285    };
    286 
    287    nir_ssa_def *tmp =
    288       build_fsum(b, polynomial_terms, ARRAY_SIZE(polynomial_terms));
    289 
    290    /* range-reduction fixup */
    291    tmp = nir_fadd(b, tmp,
    292                   nir_fmul(b,
    293                            nir_b2f(b, nir_flt(b, one, abs_y_over_x)),
    294                            nir_fadd(b, nir_fmul(b, tmp,
    295                                                 nir_imm_float(b, -2.0f)),
    296                                        nir_imm_float(b, M_PI_2f))));
    297 
    298    /* sign fixup */
    299    return nir_fmul(b, tmp, nir_fsign(b, y_over_x));
    300 }
    301 
    302 static nir_ssa_def *
    303 build_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x)
    304 {
    305    nir_ssa_def *zero = nir_imm_float(b, 0.0f);
    306 
    307    /* If |x| >= 1.0e-8 * |y|: */
    308    nir_ssa_def *condition =
    309       nir_fge(b, nir_fabs(b, x),
    310               nir_fmul(b, nir_imm_float(b, 1.0e-8f), nir_fabs(b, y)));
    311 
    312    /* Then...call atan(y/x) and fix it up: */
    313    nir_ssa_def *atan1 = build_atan(b, nir_fdiv(b, y, x));
    314    nir_ssa_def *r_then =
    315       nir_bcsel(b, nir_flt(b, x, zero),
    316                    nir_fadd(b, atan1,
    317                                nir_bcsel(b, nir_fge(b, y, zero),
    318                                             nir_imm_float(b, M_PIf),
    319                                             nir_imm_float(b, -M_PIf))),
    320                    atan1);
    321 
    322    /* Else... */
    323    nir_ssa_def *r_else =
    324       nir_fmul(b, nir_fsign(b, y), nir_imm_float(b, M_PI_2f));
    325 
    326    return nir_bcsel(b, condition, r_then, r_else);
    327 }
    328 
    329 static nir_ssa_def *
    330 build_frexp(nir_builder *b, nir_ssa_def *x, nir_ssa_def **exponent)
    331 {
    332    nir_ssa_def *abs_x = nir_fabs(b, x);
    333    nir_ssa_def *zero = nir_imm_float(b, 0.0f);
    334 
    335    /* Single-precision floating-point values are stored as
    336     *   1 sign bit;
    337     *   8 exponent bits;
    338     *   23 mantissa bits.
    339     *
    340     * An exponent shift of 23 will shift the mantissa out, leaving only the
    341     * exponent and sign bit (which itself may be zero, if the absolute value
    342     * was taken before the bitcast and shift.
    343     */
    344    nir_ssa_def *exponent_shift = nir_imm_int(b, 23);
    345    nir_ssa_def *exponent_bias = nir_imm_int(b, -126);
    346 
    347    nir_ssa_def *sign_mantissa_mask = nir_imm_int(b, 0x807fffffu);
    348 
    349    /* Exponent of floating-point values in the range [0.5, 1.0). */
    350    nir_ssa_def *exponent_value = nir_imm_int(b, 0x3f000000u);
    351 
    352    nir_ssa_def *is_not_zero = nir_fne(b, abs_x, zero);
    353 
    354    *exponent =
    355       nir_iadd(b, nir_ushr(b, abs_x, exponent_shift),
    356                   nir_bcsel(b, is_not_zero, exponent_bias, zero));
    357 
    358    return nir_ior(b, nir_iand(b, x, sign_mantissa_mask),
    359                      nir_bcsel(b, is_not_zero, exponent_value, zero));
    360 }
    361 
    362 static nir_op
    363 vtn_nir_alu_op_for_spirv_glsl_opcode(enum GLSLstd450 opcode)
    364 {
    365    switch (opcode) {
    366    case GLSLstd450Round:         return nir_op_fround_even;
    367    case GLSLstd450RoundEven:     return nir_op_fround_even;
    368    case GLSLstd450Trunc:         return nir_op_ftrunc;
    369    case GLSLstd450FAbs:          return nir_op_fabs;
    370    case GLSLstd450SAbs:          return nir_op_iabs;
    371    case GLSLstd450FSign:         return nir_op_fsign;
    372    case GLSLstd450SSign:         return nir_op_isign;
    373    case GLSLstd450Floor:         return nir_op_ffloor;
    374    case GLSLstd450Ceil:          return nir_op_fceil;
    375    case GLSLstd450Fract:         return nir_op_ffract;
    376    case GLSLstd450Sin:           return nir_op_fsin;
    377    case GLSLstd450Cos:           return nir_op_fcos;
    378    case GLSLstd450Pow:           return nir_op_fpow;
    379    case GLSLstd450Exp2:          return nir_op_fexp2;
    380    case GLSLstd450Log2:          return nir_op_flog2;
    381    case GLSLstd450Sqrt:          return nir_op_fsqrt;
    382    case GLSLstd450InverseSqrt:   return nir_op_frsq;
    383    case GLSLstd450FMin:          return nir_op_fmin;
    384    case GLSLstd450UMin:          return nir_op_umin;
    385    case GLSLstd450SMin:          return nir_op_imin;
    386    case GLSLstd450FMax:          return nir_op_fmax;
    387    case GLSLstd450UMax:          return nir_op_umax;
    388    case GLSLstd450SMax:          return nir_op_imax;
    389    case GLSLstd450FMix:          return nir_op_flrp;
    390    case GLSLstd450Fma:           return nir_op_ffma;
    391    case GLSLstd450Ldexp:         return nir_op_ldexp;
    392    case GLSLstd450FindILsb:      return nir_op_find_lsb;
    393    case GLSLstd450FindSMsb:      return nir_op_ifind_msb;
    394    case GLSLstd450FindUMsb:      return nir_op_ufind_msb;
    395 
    396    /* Packing/Unpacking functions */
    397    case GLSLstd450PackSnorm4x8:     return nir_op_pack_snorm_4x8;
    398    case GLSLstd450PackUnorm4x8:     return nir_op_pack_unorm_4x8;
    399    case GLSLstd450PackSnorm2x16:    return nir_op_pack_snorm_2x16;
    400    case GLSLstd450PackUnorm2x16:    return nir_op_pack_unorm_2x16;
    401    case GLSLstd450PackHalf2x16:     return nir_op_pack_half_2x16;
    402    case GLSLstd450PackDouble2x32:   return nir_op_pack_double_2x32;
    403    case GLSLstd450UnpackSnorm4x8:   return nir_op_unpack_snorm_4x8;
    404    case GLSLstd450UnpackUnorm4x8:   return nir_op_unpack_unorm_4x8;
    405    case GLSLstd450UnpackSnorm2x16:  return nir_op_unpack_snorm_2x16;
    406    case GLSLstd450UnpackUnorm2x16:  return nir_op_unpack_unorm_2x16;
    407    case GLSLstd450UnpackHalf2x16:   return nir_op_unpack_half_2x16;
    408    case GLSLstd450UnpackDouble2x32: return nir_op_unpack_double_2x32;
    409 
    410    default:
    411       unreachable("No NIR equivalent");
    412    }
    413 }
    414 
    415 static void
    416 handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
    417                    const uint32_t *w, unsigned count)
    418 {
    419    struct nir_builder *nb = &b->nb;
    420    const struct glsl_type *dest_type =
    421       vtn_value(b, w[1], vtn_value_type_type)->type->type;
    422 
    423    struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
    424    val->ssa = vtn_create_ssa_value(b, dest_type);
    425 
    426    /* Collect the various SSA sources */
    427    unsigned num_inputs = count - 5;
    428    nir_ssa_def *src[3] = { NULL, };
    429    for (unsigned i = 0; i < num_inputs; i++)
    430       src[i] = vtn_ssa_value(b, w[i + 5])->def;
    431 
    432    switch (entrypoint) {
    433    case GLSLstd450Radians:
    434       val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 0.01745329251));
    435       return;
    436    case GLSLstd450Degrees:
    437       val->ssa->def = nir_fmul(nb, src[0], nir_imm_float(nb, 57.2957795131));
    438       return;
    439    case GLSLstd450Tan:
    440       val->ssa->def = nir_fdiv(nb, nir_fsin(nb, src[0]),
    441                                nir_fcos(nb, src[0]));
    442       return;
    443 
    444    case GLSLstd450Modf: {
    445       nir_ssa_def *sign = nir_fsign(nb, src[0]);
    446       nir_ssa_def *abs = nir_fabs(nb, src[0]);
    447       val->ssa->def = nir_fmul(nb, sign, nir_ffract(nb, abs));
    448       nir_store_deref_var(nb, vtn_nir_deref(b, w[6]),
    449                           nir_fmul(nb, sign, nir_ffloor(nb, abs)), 0xf);
    450       return;
    451    }
    452 
    453    case GLSLstd450ModfStruct: {
    454       nir_ssa_def *sign = nir_fsign(nb, src[0]);
    455       nir_ssa_def *abs = nir_fabs(nb, src[0]);
    456       assert(glsl_type_is_struct(val->ssa->type));
    457       val->ssa->elems[0]->def = nir_fmul(nb, sign, nir_ffract(nb, abs));
    458       val->ssa->elems[1]->def = nir_fmul(nb, sign, nir_ffloor(nb, abs));
    459       return;
    460    }
    461 
    462    case GLSLstd450Step:
    463       val->ssa->def = nir_sge(nb, src[1], src[0]);
    464       return;
    465 
    466    case GLSLstd450Length:
    467       val->ssa->def = build_length(nb, src[0]);
    468       return;
    469    case GLSLstd450Distance:
    470       val->ssa->def = build_length(nb, nir_fsub(nb, src[0], src[1]));
    471       return;
    472    case GLSLstd450Normalize:
    473       val->ssa->def = nir_fdiv(nb, src[0], build_length(nb, src[0]));
    474       return;
    475 
    476    case GLSLstd450Exp:
    477       val->ssa->def = build_exp(nb, src[0]);
    478       return;
    479 
    480    case GLSLstd450Log:
    481       val->ssa->def = build_log(nb, src[0]);
    482       return;
    483 
    484    case GLSLstd450FClamp:
    485       val->ssa->def = build_fclamp(nb, src[0], src[1], src[2]);
    486       return;
    487    case GLSLstd450UClamp:
    488       val->ssa->def = nir_umin(nb, nir_umax(nb, src[0], src[1]), src[2]);
    489       return;
    490    case GLSLstd450SClamp:
    491       val->ssa->def = nir_imin(nb, nir_imax(nb, src[0], src[1]), src[2]);
    492       return;
    493 
    494    case GLSLstd450Cross: {
    495       unsigned yzx[4] = { 1, 2, 0, 0 };
    496       unsigned zxy[4] = { 2, 0, 1, 0 };
    497       val->ssa->def =
    498          nir_fsub(nb, nir_fmul(nb, nir_swizzle(nb, src[0], yzx, 3, true),
    499                                    nir_swizzle(nb, src[1], zxy, 3, true)),
    500                       nir_fmul(nb, nir_swizzle(nb, src[0], zxy, 3, true),
    501                                    nir_swizzle(nb, src[1], yzx, 3, true)));
    502       return;
    503    }
    504 
    505    case GLSLstd450SmoothStep: {
    506       /* t = clamp((x - edge0) / (edge1 - edge0), 0, 1) */
    507       nir_ssa_def *t =
    508          build_fclamp(nb, nir_fdiv(nb, nir_fsub(nb, src[2], src[0]),
    509                                        nir_fsub(nb, src[1], src[0])),
    510                           nir_imm_float(nb, 0.0), nir_imm_float(nb, 1.0));
    511       /* result = t * t * (3 - 2 * t) */
    512       val->ssa->def =
    513          nir_fmul(nb, t, nir_fmul(nb, t,
    514             nir_fsub(nb, nir_imm_float(nb, 3.0),
    515                          nir_fmul(nb, nir_imm_float(nb, 2.0), t))));
    516       return;
    517    }
    518 
    519    case GLSLstd450FaceForward:
    520       val->ssa->def =
    521          nir_bcsel(nb, nir_flt(nb, nir_fdot(nb, src[2], src[1]),
    522                                    nir_imm_float(nb, 0.0)),
    523                        src[0], nir_fneg(nb, src[0]));
    524       return;
    525 
    526    case GLSLstd450Reflect:
    527       /* I - 2 * dot(N, I) * N */
    528       val->ssa->def =
    529          nir_fsub(nb, src[0], nir_fmul(nb, nir_imm_float(nb, 2.0),
    530                               nir_fmul(nb, nir_fdot(nb, src[0], src[1]),
    531                                            src[1])));
    532       return;
    533 
    534    case GLSLstd450Refract: {
    535       nir_ssa_def *I = src[0];
    536       nir_ssa_def *N = src[1];
    537       nir_ssa_def *eta = src[2];
    538       nir_ssa_def *n_dot_i = nir_fdot(nb, N, I);
    539       nir_ssa_def *one = nir_imm_float(nb, 1.0);
    540       nir_ssa_def *zero = nir_imm_float(nb, 0.0);
    541       /* k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) */
    542       nir_ssa_def *k =
    543          nir_fsub(nb, one, nir_fmul(nb, eta, nir_fmul(nb, eta,
    544                       nir_fsub(nb, one, nir_fmul(nb, n_dot_i, n_dot_i)))));
    545       nir_ssa_def *result =
    546          nir_fsub(nb, nir_fmul(nb, eta, I),
    547                       nir_fmul(nb, nir_fadd(nb, nir_fmul(nb, eta, n_dot_i),
    548                                                 nir_fsqrt(nb, k)), N));
    549       /* XXX: bcsel, or if statement? */
    550       val->ssa->def = nir_bcsel(nb, nir_flt(nb, k, zero), zero, result);
    551       return;
    552    }
    553 
    554    case GLSLstd450Sinh:
    555       /* 0.5 * (e^x - e^(-x)) */
    556       val->ssa->def =
    557          nir_fmul(nb, nir_imm_float(nb, 0.5f),
    558                       nir_fsub(nb, build_exp(nb, src[0]),
    559                                    build_exp(nb, nir_fneg(nb, src[0]))));
    560       return;
    561 
    562    case GLSLstd450Cosh:
    563       /* 0.5 * (e^x + e^(-x)) */
    564       val->ssa->def =
    565          nir_fmul(nb, nir_imm_float(nb, 0.5f),
    566                       nir_fadd(nb, build_exp(nb, src[0]),
    567                                    build_exp(nb, nir_fneg(nb, src[0]))));
    568       return;
    569 
    570    case GLSLstd450Tanh: {
    571       /* tanh(x) := (0.5 * (e^x - e^(-x))) / (0.5 * (e^x + e^(-x)))
    572        *
    573        * With a little algebra this reduces to (e^2x - 1) / (e^2x + 1)
    574        *
    575        * We clamp x to (-inf, +10] to avoid precision problems.  When x > 10,
    576        * e^2x is so much larger than 1.0 that 1.0 gets flushed to zero in the
    577        * computation e^2x +/- 1 so it can be ignored.
    578        */
    579       nir_ssa_def *x = nir_fmin(nb, src[0], nir_imm_float(nb, 10));
    580       nir_ssa_def *exp2x = build_exp(nb, nir_fmul(nb, x, nir_imm_float(nb, 2)));
    581       val->ssa->def = nir_fdiv(nb, nir_fsub(nb, exp2x, nir_imm_float(nb, 1)),
    582                                    nir_fadd(nb, exp2x, nir_imm_float(nb, 1)));
    583       return;
    584    }
    585 
    586    case GLSLstd450Asinh:
    587       val->ssa->def = nir_fmul(nb, nir_fsign(nb, src[0]),
    588          build_log(nb, nir_fadd(nb, nir_fabs(nb, src[0]),
    589                        nir_fsqrt(nb, nir_fadd(nb, nir_fmul(nb, src[0], src[0]),
    590                                                   nir_imm_float(nb, 1.0f))))));
    591       return;
    592    case GLSLstd450Acosh:
    593       val->ssa->def = build_log(nb, nir_fadd(nb, src[0],
    594          nir_fsqrt(nb, nir_fsub(nb, nir_fmul(nb, src[0], src[0]),
    595                                     nir_imm_float(nb, 1.0f)))));
    596       return;
    597    case GLSLstd450Atanh: {
    598       nir_ssa_def *one = nir_imm_float(nb, 1.0);
    599       val->ssa->def = nir_fmul(nb, nir_imm_float(nb, 0.5f),
    600          build_log(nb, nir_fdiv(nb, nir_fadd(nb, one, src[0]),
    601                                     nir_fsub(nb, one, src[0]))));
    602       return;
    603    }
    604 
    605    case GLSLstd450Asin:
    606       val->ssa->def = build_asin(nb, src[0], 0.086566724, -0.03102955);
    607       return;
    608 
    609    case GLSLstd450Acos:
    610       val->ssa->def = nir_fsub(nb, nir_imm_float(nb, M_PI_2f),
    611                                build_asin(nb, src[0], 0.08132463, -0.02363318));
    612       return;
    613 
    614    case GLSLstd450Atan:
    615       val->ssa->def = build_atan(nb, src[0]);
    616       return;
    617 
    618    case GLSLstd450Atan2:
    619       val->ssa->def = build_atan2(nb, src[0], src[1]);
    620       return;
    621 
    622    case GLSLstd450Frexp: {
    623       nir_ssa_def *exponent;
    624       val->ssa->def = build_frexp(nb, src[0], &exponent);
    625       nir_store_deref_var(nb, vtn_nir_deref(b, w[6]), exponent, 0xf);
    626       return;
    627    }
    628 
    629    case GLSLstd450FrexpStruct: {
    630       assert(glsl_type_is_struct(val->ssa->type));
    631       val->ssa->elems[0]->def = build_frexp(nb, src[0],
    632                                             &val->ssa->elems[1]->def);
    633       return;
    634    }
    635 
    636    default:
    637       val->ssa->def =
    638          nir_build_alu(&b->nb, vtn_nir_alu_op_for_spirv_glsl_opcode(entrypoint),
    639                        src[0], src[1], src[2], NULL);
    640       return;
    641    }
    642 }
    643 
    644 static void
    645 handle_glsl450_interpolation(struct vtn_builder *b, enum GLSLstd450 opcode,
    646                              const uint32_t *w, unsigned count)
    647 {
    648    const struct glsl_type *dest_type =
    649       vtn_value(b, w[1], vtn_value_type_type)->type->type;
    650 
    651    struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
    652    val->ssa = vtn_create_ssa_value(b, dest_type);
    653 
    654    nir_intrinsic_op op;
    655    switch (opcode) {
    656    case GLSLstd450InterpolateAtCentroid:
    657       op = nir_intrinsic_interp_var_at_centroid;
    658       break;
    659    case GLSLstd450InterpolateAtSample:
    660       op = nir_intrinsic_interp_var_at_sample;
    661       break;
    662    case GLSLstd450InterpolateAtOffset:
    663       op = nir_intrinsic_interp_var_at_offset;
    664       break;
    665    default:
    666       unreachable("Invalid opcode");
    667    }
    668 
    669    nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->nb.shader, op);
    670 
    671    nir_deref_var *deref = vtn_nir_deref(b, w[5]);
    672    intrin->variables[0] = nir_deref_var_clone(deref, intrin);
    673 
    674    switch (opcode) {
    675    case GLSLstd450InterpolateAtCentroid:
    676       break;
    677    case GLSLstd450InterpolateAtSample:
    678    case GLSLstd450InterpolateAtOffset:
    679       intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def);
    680       break;
    681    default:
    682       unreachable("Invalid opcode");
    683    }
    684 
    685    intrin->num_components = glsl_get_vector_elements(dest_type);
    686    nir_ssa_dest_init(&intrin->instr, &intrin->dest,
    687                      glsl_get_vector_elements(dest_type),
    688                      glsl_get_bit_size(dest_type), NULL);
    689    val->ssa->def = &intrin->dest.ssa;
    690 
    691    nir_builder_instr_insert(&b->nb, &intrin->instr);
    692 }
    693 
    694 bool
    695 vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode,
    696                                const uint32_t *w, unsigned count)
    697 {
    698    switch ((enum GLSLstd450)ext_opcode) {
    699    case GLSLstd450Determinant: {
    700       struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
    701       val->ssa = rzalloc(b, struct vtn_ssa_value);
    702       val->ssa->type = vtn_value(b, w[1], vtn_value_type_type)->type->type;
    703       val->ssa->def = build_mat_det(b, vtn_ssa_value(b, w[5]));
    704       break;
    705    }
    706 
    707    case GLSLstd450MatrixInverse: {
    708       struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
    709       val->ssa = matrix_inverse(b, vtn_ssa_value(b, w[5]));
    710       break;
    711    }
    712 
    713    case GLSLstd450InterpolateAtCentroid:
    714    case GLSLstd450InterpolateAtSample:
    715    case GLSLstd450InterpolateAtOffset:
    716       handle_glsl450_interpolation(b, ext_opcode, w, count);
    717       break;
    718 
    719    default:
    720       handle_glsl450_alu(b, (enum GLSLstd450)ext_opcode, w, count);
    721    }
    722 
    723    return true;
    724 }
    725