Home | History | Annotate | Download | only in program
      1 /*
      2  * Mesa 3-D graphics library
      3  * Version:  7.3
      4  *
      5  * Copyright (C) 1999-2008  Brian Paul   All Rights Reserved.
      6  *
      7  * Permission is hereby granted, free of charge, to any person obtaining a
      8  * copy of this software and associated documentation files (the "Software"),
      9  * to deal in the Software without restriction, including without limitation
     10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     11  * and/or sell copies of the Software, and to permit persons to whom the
     12  * Software is furnished to do so, subject to the following conditions:
     13  *
     14  * The above copyright notice and this permission notice shall be included
     15  * in all copies or substantial portions of the Software.
     16  *
     17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     20  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
     21  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
     22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     23  */
     24 
     25 /**
     26  * \file prog_execute.c
     27  * Software interpreter for vertex/fragment programs.
     28  * \author Brian Paul
     29  */
     30 
     31 /*
     32  * NOTE: we do everything in single-precision floating point; we don't
     33  * currently observe the single/half/fixed-precision qualifiers.
     34  *
     35  */
     36 
     37 
     38 #include "main/glheader.h"
     39 #include "main/colormac.h"
     40 #include "main/macros.h"
     41 #include "prog_execute.h"
     42 #include "prog_instruction.h"
     43 #include "prog_parameter.h"
     44 #include "prog_print.h"
     45 #include "prog_noise.h"
     46 
     47 
     48 /* debug predicate */
     49 #define DEBUG_PROG 0
     50 
     51 
     52 /**
     53  * Set x to positive or negative infinity.
     54  */
     55 #if defined(USE_IEEE) || defined(_WIN32)
     56 #define SET_POS_INFINITY(x)                  \
     57    do {                                      \
     58          fi_type fi;                         \
     59          fi.i = 0x7F800000;                  \
     60          x = fi.f;                           \
     61    } while (0)
     62 #define SET_NEG_INFINITY(x)                  \
     63    do {                                      \
     64          fi_type fi;                         \
     65          fi.i = 0xFF800000;                  \
     66          x = fi.f;                           \
     67    } while (0)
     68 #elif defined(VMS)
     69 #define SET_POS_INFINITY(x)  x = __MAXFLOAT
     70 #define SET_NEG_INFINITY(x)  x = -__MAXFLOAT
     71 #else
     72 #define SET_POS_INFINITY(x)  x = (GLfloat) HUGE_VAL
     73 #define SET_NEG_INFINITY(x)  x = (GLfloat) -HUGE_VAL
     74 #endif
     75 
     76 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
     77 
     78 
     79 static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
     80 
     81 
     82 
     83 /**
     84  * Return TRUE for +0 and other positive values, FALSE otherwise.
     85  * Used for RCC opcode.
     86  */
     87 static inline GLboolean
     88 positive(float x)
     89 {
     90    fi_type fi;
     91    fi.f = x;
     92    if (fi.i & 0x80000000)
     93       return GL_FALSE;
     94    return GL_TRUE;
     95 }
     96 
     97 
     98 
     99 /**
    100  * Return a pointer to the 4-element float vector specified by the given
    101  * source register.
    102  */
    103 static inline const GLfloat *
    104 get_src_register_pointer(const struct prog_src_register *source,
    105                          const struct gl_program_machine *machine)
    106 {
    107    const struct gl_program *prog = machine->CurProgram;
    108    GLint reg = source->Index;
    109 
    110    if (source->RelAddr) {
    111       /* add address register value to src index/offset */
    112       reg += machine->AddressReg[0][0];
    113       if (reg < 0) {
    114          return ZeroVec;
    115       }
    116    }
    117 
    118    switch (source->File) {
    119    case PROGRAM_TEMPORARY:
    120       if (reg >= MAX_PROGRAM_TEMPS)
    121          return ZeroVec;
    122       return machine->Temporaries[reg];
    123 
    124    case PROGRAM_INPUT:
    125       if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
    126          if (reg >= VERT_ATTRIB_MAX)
    127             return ZeroVec;
    128          return machine->VertAttribs[reg];
    129       }
    130       else {
    131          if (reg >= FRAG_ATTRIB_MAX)
    132             return ZeroVec;
    133          return machine->Attribs[reg][machine->CurElement];
    134       }
    135 
    136    case PROGRAM_OUTPUT:
    137       if (reg >= MAX_PROGRAM_OUTPUTS)
    138          return ZeroVec;
    139       return machine->Outputs[reg];
    140 
    141    case PROGRAM_LOCAL_PARAM:
    142       if (reg >= MAX_PROGRAM_LOCAL_PARAMS)
    143          return ZeroVec;
    144       return machine->CurProgram->LocalParams[reg];
    145 
    146    case PROGRAM_ENV_PARAM:
    147       if (reg >= MAX_PROGRAM_ENV_PARAMS)
    148          return ZeroVec;
    149       return machine->EnvParams[reg];
    150 
    151    case PROGRAM_STATE_VAR:
    152       /* Fallthrough */
    153    case PROGRAM_CONSTANT:
    154       /* Fallthrough */
    155    case PROGRAM_UNIFORM:
    156       /* Fallthrough */
    157    case PROGRAM_NAMED_PARAM:
    158       if (reg >= (GLint) prog->Parameters->NumParameters)
    159          return ZeroVec;
    160       return (GLfloat *) prog->Parameters->ParameterValues[reg];
    161 
    162    case PROGRAM_SYSTEM_VALUE:
    163       assert(reg < Elements(machine->SystemValues));
    164       return machine->SystemValues[reg];
    165 
    166    default:
    167       _mesa_problem(NULL,
    168          "Invalid src register file %d in get_src_register_pointer()",
    169          source->File);
    170       return NULL;
    171    }
    172 }
    173 
    174 
    175 /**
    176  * Return a pointer to the 4-element float vector specified by the given
    177  * destination register.
    178  */
    179 static inline GLfloat *
    180 get_dst_register_pointer(const struct prog_dst_register *dest,
    181                          struct gl_program_machine *machine)
    182 {
    183    static GLfloat dummyReg[4];
    184    GLint reg = dest->Index;
    185 
    186    if (dest->RelAddr) {
    187       /* add address register value to src index/offset */
    188       reg += machine->AddressReg[0][0];
    189       if (reg < 0) {
    190          return dummyReg;
    191       }
    192    }
    193 
    194    switch (dest->File) {
    195    case PROGRAM_TEMPORARY:
    196       if (reg >= MAX_PROGRAM_TEMPS)
    197          return dummyReg;
    198       return machine->Temporaries[reg];
    199 
    200    case PROGRAM_OUTPUT:
    201       if (reg >= MAX_PROGRAM_OUTPUTS)
    202          return dummyReg;
    203       return machine->Outputs[reg];
    204 
    205    case PROGRAM_WRITE_ONLY:
    206       return dummyReg;
    207 
    208    default:
    209       _mesa_problem(NULL,
    210          "Invalid dest register file %d in get_dst_register_pointer()",
    211          dest->File);
    212       return NULL;
    213    }
    214 }
    215 
    216 
    217 
    218 /**
    219  * Fetch a 4-element float vector from the given source register.
    220  * Apply swizzling and negating as needed.
    221  */
    222 static void
    223 fetch_vector4(const struct prog_src_register *source,
    224               const struct gl_program_machine *machine, GLfloat result[4])
    225 {
    226    const GLfloat *src = get_src_register_pointer(source, machine);
    227    ASSERT(src);
    228 
    229    if (source->Swizzle == SWIZZLE_NOOP) {
    230       /* no swizzling */
    231       COPY_4V(result, src);
    232    }
    233    else {
    234       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
    235       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
    236       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
    237       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
    238       result[0] = src[GET_SWZ(source->Swizzle, 0)];
    239       result[1] = src[GET_SWZ(source->Swizzle, 1)];
    240       result[2] = src[GET_SWZ(source->Swizzle, 2)];
    241       result[3] = src[GET_SWZ(source->Swizzle, 3)];
    242    }
    243 
    244    if (source->Abs) {
    245       result[0] = FABSF(result[0]);
    246       result[1] = FABSF(result[1]);
    247       result[2] = FABSF(result[2]);
    248       result[3] = FABSF(result[3]);
    249    }
    250    if (source->Negate) {
    251       ASSERT(source->Negate == NEGATE_XYZW);
    252       result[0] = -result[0];
    253       result[1] = -result[1];
    254       result[2] = -result[2];
    255       result[3] = -result[3];
    256    }
    257 
    258 #ifdef NAN_CHECK
    259    assert(!IS_INF_OR_NAN(result[0]));
    260    assert(!IS_INF_OR_NAN(result[0]));
    261    assert(!IS_INF_OR_NAN(result[0]));
    262    assert(!IS_INF_OR_NAN(result[0]));
    263 #endif
    264 }
    265 
    266 
    267 /**
    268  * Fetch a 4-element uint vector from the given source register.
    269  * Apply swizzling but not negation/abs.
    270  */
    271 static void
    272 fetch_vector4ui(const struct prog_src_register *source,
    273                 const struct gl_program_machine *machine, GLuint result[4])
    274 {
    275    const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
    276    ASSERT(src);
    277 
    278    if (source->Swizzle == SWIZZLE_NOOP) {
    279       /* no swizzling */
    280       COPY_4V(result, src);
    281    }
    282    else {
    283       ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
    284       ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
    285       ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
    286       ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
    287       result[0] = src[GET_SWZ(source->Swizzle, 0)];
    288       result[1] = src[GET_SWZ(source->Swizzle, 1)];
    289       result[2] = src[GET_SWZ(source->Swizzle, 2)];
    290       result[3] = src[GET_SWZ(source->Swizzle, 3)];
    291    }
    292 
    293    /* Note: no Negate or Abs here */
    294 }
    295 
    296 
    297 
    298 /**
    299  * Fetch the derivative with respect to X or Y for the given register.
    300  * XXX this currently only works for fragment program input attribs.
    301  */
    302 static void
    303 fetch_vector4_deriv(struct gl_context * ctx,
    304                     const struct prog_src_register *source,
    305                     const struct gl_program_machine *machine,
    306                     char xOrY, GLfloat result[4])
    307 {
    308    if (source->File == PROGRAM_INPUT &&
    309        source->Index < (GLint) machine->NumDeriv) {
    310       const GLint col = machine->CurElement;
    311       const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3];
    312       const GLfloat invQ = 1.0f / w;
    313       GLfloat deriv[4];
    314 
    315       if (xOrY == 'X') {
    316          deriv[0] = machine->DerivX[source->Index][0] * invQ;
    317          deriv[1] = machine->DerivX[source->Index][1] * invQ;
    318          deriv[2] = machine->DerivX[source->Index][2] * invQ;
    319          deriv[3] = machine->DerivX[source->Index][3] * invQ;
    320       }
    321       else {
    322          deriv[0] = machine->DerivY[source->Index][0] * invQ;
    323          deriv[1] = machine->DerivY[source->Index][1] * invQ;
    324          deriv[2] = machine->DerivY[source->Index][2] * invQ;
    325          deriv[3] = machine->DerivY[source->Index][3] * invQ;
    326       }
    327 
    328       result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
    329       result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
    330       result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
    331       result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
    332 
    333       if (source->Abs) {
    334          result[0] = FABSF(result[0]);
    335          result[1] = FABSF(result[1]);
    336          result[2] = FABSF(result[2]);
    337          result[3] = FABSF(result[3]);
    338       }
    339       if (source->Negate) {
    340          ASSERT(source->Negate == NEGATE_XYZW);
    341          result[0] = -result[0];
    342          result[1] = -result[1];
    343          result[2] = -result[2];
    344          result[3] = -result[3];
    345       }
    346    }
    347    else {
    348       ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
    349    }
    350 }
    351 
    352 
    353 /**
    354  * As above, but only return result[0] element.
    355  */
    356 static void
    357 fetch_vector1(const struct prog_src_register *source,
    358               const struct gl_program_machine *machine, GLfloat result[4])
    359 {
    360    const GLfloat *src = get_src_register_pointer(source, machine);
    361    ASSERT(src);
    362 
    363    result[0] = src[GET_SWZ(source->Swizzle, 0)];
    364 
    365    if (source->Abs) {
    366       result[0] = FABSF(result[0]);
    367    }
    368    if (source->Negate) {
    369       result[0] = -result[0];
    370    }
    371 }
    372 
    373 
    374 static GLuint
    375 fetch_vector1ui(const struct prog_src_register *source,
    376                 const struct gl_program_machine *machine)
    377 {
    378    const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
    379    return src[GET_SWZ(source->Swizzle, 0)];
    380 }
    381 
    382 
    383 /**
    384  * Fetch texel from texture.  Use partial derivatives when possible.
    385  */
    386 static inline void
    387 fetch_texel(struct gl_context *ctx,
    388             const struct gl_program_machine *machine,
    389             const struct prog_instruction *inst,
    390             const GLfloat texcoord[4], GLfloat lodBias,
    391             GLfloat color[4])
    392 {
    393    const GLuint unit = machine->Samplers[inst->TexSrcUnit];
    394 
    395    /* Note: we only have the right derivatives for fragment input attribs.
    396     */
    397    if (machine->NumDeriv > 0 &&
    398        inst->SrcReg[0].File == PROGRAM_INPUT &&
    399        inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) {
    400       /* simple texture fetch for which we should have derivatives */
    401       GLuint attr = inst->SrcReg[0].Index;
    402       machine->FetchTexelDeriv(ctx, texcoord,
    403                                machine->DerivX[attr],
    404                                machine->DerivY[attr],
    405                                lodBias, unit, color);
    406    }
    407    else {
    408       machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
    409    }
    410 }
    411 
    412 
    413 /**
    414  * Test value against zero and return GT, LT, EQ or UN if NaN.
    415  */
    416 static inline GLuint
    417 generate_cc(float value)
    418 {
    419    if (value != value)
    420       return COND_UN;           /* NaN */
    421    if (value > 0.0F)
    422       return COND_GT;
    423    if (value < 0.0F)
    424       return COND_LT;
    425    return COND_EQ;
    426 }
    427 
    428 
    429 /**
    430  * Test if the ccMaskRule is satisfied by the given condition code.
    431  * Used to mask destination writes according to the current condition code.
    432  */
    433 static inline GLboolean
    434 test_cc(GLuint condCode, GLuint ccMaskRule)
    435 {
    436    switch (ccMaskRule) {
    437    case COND_EQ: return (condCode == COND_EQ);
    438    case COND_NE: return (condCode != COND_EQ);
    439    case COND_LT: return (condCode == COND_LT);
    440    case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
    441    case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
    442    case COND_GT: return (condCode == COND_GT);
    443    case COND_TR: return GL_TRUE;
    444    case COND_FL: return GL_FALSE;
    445    default:      return GL_TRUE;
    446    }
    447 }
    448 
    449 
    450 /**
    451  * Evaluate the 4 condition codes against a predicate and return GL_TRUE
    452  * or GL_FALSE to indicate result.
    453  */
    454 static inline GLboolean
    455 eval_condition(const struct gl_program_machine *machine,
    456                const struct prog_instruction *inst)
    457 {
    458    const GLuint swizzle = inst->DstReg.CondSwizzle;
    459    const GLuint condMask = inst->DstReg.CondMask;
    460    if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
    461        test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
    462        test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
    463        test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
    464       return GL_TRUE;
    465    }
    466    else {
    467       return GL_FALSE;
    468    }
    469 }
    470 
    471 
    472 
    473 /**
    474  * Store 4 floats into a register.  Observe the instructions saturate and
    475  * set-condition-code flags.
    476  */
    477 static void
    478 store_vector4(const struct prog_instruction *inst,
    479               struct gl_program_machine *machine, const GLfloat value[4])
    480 {
    481    const struct prog_dst_register *dstReg = &(inst->DstReg);
    482    const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
    483    GLuint writeMask = dstReg->WriteMask;
    484    GLfloat clampedValue[4];
    485    GLfloat *dst = get_dst_register_pointer(dstReg, machine);
    486 
    487 #if 0
    488    if (value[0] > 1.0e10 ||
    489        IS_INF_OR_NAN(value[0]) ||
    490        IS_INF_OR_NAN(value[1]) ||
    491        IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
    492       printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
    493 #endif
    494 
    495    if (clamp) {
    496       clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
    497       clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
    498       clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
    499       clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
    500       value = clampedValue;
    501    }
    502 
    503    if (dstReg->CondMask != COND_TR) {
    504       /* condition codes may turn off some writes */
    505       if (writeMask & WRITEMASK_X) {
    506          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
    507                       dstReg->CondMask))
    508             writeMask &= ~WRITEMASK_X;
    509       }
    510       if (writeMask & WRITEMASK_Y) {
    511          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
    512                       dstReg->CondMask))
    513             writeMask &= ~WRITEMASK_Y;
    514       }
    515       if (writeMask & WRITEMASK_Z) {
    516          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
    517                       dstReg->CondMask))
    518             writeMask &= ~WRITEMASK_Z;
    519       }
    520       if (writeMask & WRITEMASK_W) {
    521          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
    522                       dstReg->CondMask))
    523             writeMask &= ~WRITEMASK_W;
    524       }
    525    }
    526 
    527 #ifdef NAN_CHECK
    528    assert(!IS_INF_OR_NAN(value[0]));
    529    assert(!IS_INF_OR_NAN(value[0]));
    530    assert(!IS_INF_OR_NAN(value[0]));
    531    assert(!IS_INF_OR_NAN(value[0]));
    532 #endif
    533 
    534    if (writeMask & WRITEMASK_X)
    535       dst[0] = value[0];
    536    if (writeMask & WRITEMASK_Y)
    537       dst[1] = value[1];
    538    if (writeMask & WRITEMASK_Z)
    539       dst[2] = value[2];
    540    if (writeMask & WRITEMASK_W)
    541       dst[3] = value[3];
    542 
    543    if (inst->CondUpdate) {
    544       if (writeMask & WRITEMASK_X)
    545          machine->CondCodes[0] = generate_cc(value[0]);
    546       if (writeMask & WRITEMASK_Y)
    547          machine->CondCodes[1] = generate_cc(value[1]);
    548       if (writeMask & WRITEMASK_Z)
    549          machine->CondCodes[2] = generate_cc(value[2]);
    550       if (writeMask & WRITEMASK_W)
    551          machine->CondCodes[3] = generate_cc(value[3]);
    552 #if DEBUG_PROG
    553       printf("CondCodes=(%s,%s,%s,%s) for:\n",
    554              _mesa_condcode_string(machine->CondCodes[0]),
    555              _mesa_condcode_string(machine->CondCodes[1]),
    556              _mesa_condcode_string(machine->CondCodes[2]),
    557              _mesa_condcode_string(machine->CondCodes[3]));
    558 #endif
    559    }
    560 }
    561 
    562 
    563 /**
    564  * Store 4 uints into a register.  Observe the set-condition-code flags.
    565  */
    566 static void
    567 store_vector4ui(const struct prog_instruction *inst,
    568                 struct gl_program_machine *machine, const GLuint value[4])
    569 {
    570    const struct prog_dst_register *dstReg = &(inst->DstReg);
    571    GLuint writeMask = dstReg->WriteMask;
    572    GLuint *dst = (GLuint *) get_dst_register_pointer(dstReg, machine);
    573 
    574    if (dstReg->CondMask != COND_TR) {
    575       /* condition codes may turn off some writes */
    576       if (writeMask & WRITEMASK_X) {
    577          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
    578                       dstReg->CondMask))
    579             writeMask &= ~WRITEMASK_X;
    580       }
    581       if (writeMask & WRITEMASK_Y) {
    582          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
    583                       dstReg->CondMask))
    584             writeMask &= ~WRITEMASK_Y;
    585       }
    586       if (writeMask & WRITEMASK_Z) {
    587          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
    588                       dstReg->CondMask))
    589             writeMask &= ~WRITEMASK_Z;
    590       }
    591       if (writeMask & WRITEMASK_W) {
    592          if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
    593                       dstReg->CondMask))
    594             writeMask &= ~WRITEMASK_W;
    595       }
    596    }
    597 
    598    if (writeMask & WRITEMASK_X)
    599       dst[0] = value[0];
    600    if (writeMask & WRITEMASK_Y)
    601       dst[1] = value[1];
    602    if (writeMask & WRITEMASK_Z)
    603       dst[2] = value[2];
    604    if (writeMask & WRITEMASK_W)
    605       dst[3] = value[3];
    606 
    607    if (inst->CondUpdate) {
    608       if (writeMask & WRITEMASK_X)
    609          machine->CondCodes[0] = generate_cc((float)value[0]);
    610       if (writeMask & WRITEMASK_Y)
    611          machine->CondCodes[1] = generate_cc((float)value[1]);
    612       if (writeMask & WRITEMASK_Z)
    613          machine->CondCodes[2] = generate_cc((float)value[2]);
    614       if (writeMask & WRITEMASK_W)
    615          machine->CondCodes[3] = generate_cc((float)value[3]);
    616 #if DEBUG_PROG
    617       printf("CondCodes=(%s,%s,%s,%s) for:\n",
    618              _mesa_condcode_string(machine->CondCodes[0]),
    619              _mesa_condcode_string(machine->CondCodes[1]),
    620              _mesa_condcode_string(machine->CondCodes[2]),
    621              _mesa_condcode_string(machine->CondCodes[3]));
    622 #endif
    623    }
    624 }
    625 
    626 
    627 
    628 /**
    629  * Execute the given vertex/fragment program.
    630  *
    631  * \param ctx  rendering context
    632  * \param program  the program to execute
    633  * \param machine  machine state (must be initialized)
    634  * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
    635  */
    636 GLboolean
    637 _mesa_execute_program(struct gl_context * ctx,
    638                       const struct gl_program *program,
    639                       struct gl_program_machine *machine)
    640 {
    641    const GLuint numInst = program->NumInstructions;
    642    const GLuint maxExec = 65536;
    643    GLuint pc, numExec = 0;
    644 
    645    machine->CurProgram = program;
    646 
    647    if (DEBUG_PROG) {
    648       printf("execute program %u --------------------\n", program->Id);
    649    }
    650 
    651    if (program->Target == GL_VERTEX_PROGRAM_ARB) {
    652       machine->EnvParams = ctx->VertexProgram.Parameters;
    653    }
    654    else {
    655       machine->EnvParams = ctx->FragmentProgram.Parameters;
    656    }
    657 
    658    for (pc = 0; pc < numInst; pc++) {
    659       const struct prog_instruction *inst = program->Instructions + pc;
    660 
    661       if (DEBUG_PROG) {
    662          _mesa_print_instruction(inst);
    663       }
    664 
    665       switch (inst->Opcode) {
    666       case OPCODE_ABS:
    667          {
    668             GLfloat a[4], result[4];
    669             fetch_vector4(&inst->SrcReg[0], machine, a);
    670             result[0] = FABSF(a[0]);
    671             result[1] = FABSF(a[1]);
    672             result[2] = FABSF(a[2]);
    673             result[3] = FABSF(a[3]);
    674             store_vector4(inst, machine, result);
    675          }
    676          break;
    677       case OPCODE_ADD:
    678          {
    679             GLfloat a[4], b[4], result[4];
    680             fetch_vector4(&inst->SrcReg[0], machine, a);
    681             fetch_vector4(&inst->SrcReg[1], machine, b);
    682             result[0] = a[0] + b[0];
    683             result[1] = a[1] + b[1];
    684             result[2] = a[2] + b[2];
    685             result[3] = a[3] + b[3];
    686             store_vector4(inst, machine, result);
    687             if (DEBUG_PROG) {
    688                printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
    689                       result[0], result[1], result[2], result[3],
    690                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
    691             }
    692          }
    693          break;
    694       case OPCODE_AND:     /* bitwise AND */
    695          {
    696             GLuint a[4], b[4], result[4];
    697             fetch_vector4ui(&inst->SrcReg[0], machine, a);
    698             fetch_vector4ui(&inst->SrcReg[1], machine, b);
    699             result[0] = a[0] & b[0];
    700             result[1] = a[1] & b[1];
    701             result[2] = a[2] & b[2];
    702             result[3] = a[3] & b[3];
    703             store_vector4ui(inst, machine, result);
    704          }
    705          break;
    706       case OPCODE_ARL:
    707          {
    708             GLfloat t[4];
    709             fetch_vector4(&inst->SrcReg[0], machine, t);
    710             machine->AddressReg[0][0] = IFLOOR(t[0]);
    711             if (DEBUG_PROG) {
    712                printf("ARL %d\n", machine->AddressReg[0][0]);
    713             }
    714          }
    715          break;
    716       case OPCODE_BGNLOOP:
    717          /* no-op */
    718          ASSERT(program->Instructions[inst->BranchTarget].Opcode
    719                 == OPCODE_ENDLOOP);
    720          break;
    721       case OPCODE_ENDLOOP:
    722          /* subtract 1 here since pc is incremented by for(pc) loop */
    723          ASSERT(program->Instructions[inst->BranchTarget].Opcode
    724                 == OPCODE_BGNLOOP);
    725          pc = inst->BranchTarget - 1;   /* go to matching BNGLOOP */
    726          break;
    727       case OPCODE_BGNSUB:      /* begin subroutine */
    728          break;
    729       case OPCODE_ENDSUB:      /* end subroutine */
    730          break;
    731       case OPCODE_BRA:         /* branch (conditional) */
    732          if (eval_condition(machine, inst)) {
    733             /* take branch */
    734             /* Subtract 1 here since we'll do pc++ below */
    735             pc = inst->BranchTarget - 1;
    736          }
    737          break;
    738       case OPCODE_BRK:         /* break out of loop (conditional) */
    739          ASSERT(program->Instructions[inst->BranchTarget].Opcode
    740                 == OPCODE_ENDLOOP);
    741          if (eval_condition(machine, inst)) {
    742             /* break out of loop */
    743             /* pc++ at end of for-loop will put us after the ENDLOOP inst */
    744             pc = inst->BranchTarget;
    745          }
    746          break;
    747       case OPCODE_CONT:        /* continue loop (conditional) */
    748          ASSERT(program->Instructions[inst->BranchTarget].Opcode
    749                 == OPCODE_ENDLOOP);
    750          if (eval_condition(machine, inst)) {
    751             /* continue at ENDLOOP */
    752             /* Subtract 1 here since we'll do pc++ at end of for-loop */
    753             pc = inst->BranchTarget - 1;
    754          }
    755          break;
    756       case OPCODE_CAL:         /* Call subroutine (conditional) */
    757          if (eval_condition(machine, inst)) {
    758             /* call the subroutine */
    759             if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
    760                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
    761             }
    762             machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
    763             /* Subtract 1 here since we'll do pc++ at end of for-loop */
    764             pc = inst->BranchTarget - 1;
    765          }
    766          break;
    767       case OPCODE_CMP:
    768          {
    769             GLfloat a[4], b[4], c[4], result[4];
    770             fetch_vector4(&inst->SrcReg[0], machine, a);
    771             fetch_vector4(&inst->SrcReg[1], machine, b);
    772             fetch_vector4(&inst->SrcReg[2], machine, c);
    773             result[0] = a[0] < 0.0F ? b[0] : c[0];
    774             result[1] = a[1] < 0.0F ? b[1] : c[1];
    775             result[2] = a[2] < 0.0F ? b[2] : c[2];
    776             result[3] = a[3] < 0.0F ? b[3] : c[3];
    777             store_vector4(inst, machine, result);
    778             if (DEBUG_PROG) {
    779                printf("CMP (%g %g %g %g) = (%g %g %g %g) < 0 ? (%g %g %g %g) : (%g %g %g %g)\n",
    780                       result[0], result[1], result[2], result[3],
    781                       a[0], a[1], a[2], a[3],
    782                       b[0], b[1], b[2], b[3],
    783                       c[0], c[1], c[2], c[3]);
    784             }
    785          }
    786          break;
    787       case OPCODE_COS:
    788          {
    789             GLfloat a[4], result[4];
    790             fetch_vector1(&inst->SrcReg[0], machine, a);
    791             result[0] = result[1] = result[2] = result[3]
    792                = (GLfloat) cos(a[0]);
    793             store_vector4(inst, machine, result);
    794          }
    795          break;
    796       case OPCODE_DDX:         /* Partial derivative with respect to X */
    797          {
    798             GLfloat result[4];
    799             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
    800                                 'X', result);
    801             store_vector4(inst, machine, result);
    802          }
    803          break;
    804       case OPCODE_DDY:         /* Partial derivative with respect to Y */
    805          {
    806             GLfloat result[4];
    807             fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
    808                                 'Y', result);
    809             store_vector4(inst, machine, result);
    810          }
    811          break;
    812       case OPCODE_DP2:
    813          {
    814             GLfloat a[4], b[4], result[4];
    815             fetch_vector4(&inst->SrcReg[0], machine, a);
    816             fetch_vector4(&inst->SrcReg[1], machine, b);
    817             result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
    818             store_vector4(inst, machine, result);
    819             if (DEBUG_PROG) {
    820                printf("DP2 %g = (%g %g) . (%g %g)\n",
    821                       result[0], a[0], a[1], b[0], b[1]);
    822             }
    823          }
    824          break;
    825       case OPCODE_DP2A:
    826          {
    827             GLfloat a[4], b[4], c, result[4];
    828             fetch_vector4(&inst->SrcReg[0], machine, a);
    829             fetch_vector4(&inst->SrcReg[1], machine, b);
    830             fetch_vector1(&inst->SrcReg[1], machine, &c);
    831             result[0] = result[1] = result[2] = result[3] = DOT2(a, b) + c;
    832             store_vector4(inst, machine, result);
    833             if (DEBUG_PROG) {
    834                printf("DP2A %g = (%g %g) . (%g %g) + %g\n",
    835                       result[0], a[0], a[1], b[0], b[1], c);
    836             }
    837          }
    838          break;
    839       case OPCODE_DP3:
    840          {
    841             GLfloat a[4], b[4], result[4];
    842             fetch_vector4(&inst->SrcReg[0], machine, a);
    843             fetch_vector4(&inst->SrcReg[1], machine, b);
    844             result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
    845             store_vector4(inst, machine, result);
    846             if (DEBUG_PROG) {
    847                printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
    848                       result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
    849             }
    850          }
    851          break;
    852       case OPCODE_DP4:
    853          {
    854             GLfloat a[4], b[4], result[4];
    855             fetch_vector4(&inst->SrcReg[0], machine, a);
    856             fetch_vector4(&inst->SrcReg[1], machine, b);
    857             result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
    858             store_vector4(inst, machine, result);
    859             if (DEBUG_PROG) {
    860                printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
    861                       result[0], a[0], a[1], a[2], a[3],
    862                       b[0], b[1], b[2], b[3]);
    863             }
    864          }
    865          break;
    866       case OPCODE_DPH:
    867          {
    868             GLfloat a[4], b[4], result[4];
    869             fetch_vector4(&inst->SrcReg[0], machine, a);
    870             fetch_vector4(&inst->SrcReg[1], machine, b);
    871             result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
    872             store_vector4(inst, machine, result);
    873          }
    874          break;
    875       case OPCODE_DST:         /* Distance vector */
    876          {
    877             GLfloat a[4], b[4], result[4];
    878             fetch_vector4(&inst->SrcReg[0], machine, a);
    879             fetch_vector4(&inst->SrcReg[1], machine, b);
    880             result[0] = 1.0F;
    881             result[1] = a[1] * b[1];
    882             result[2] = a[2];
    883             result[3] = b[3];
    884             store_vector4(inst, machine, result);
    885          }
    886          break;
    887       case OPCODE_EXP:
    888          {
    889             GLfloat t[4], q[4], floor_t0;
    890             fetch_vector1(&inst->SrcReg[0], machine, t);
    891             floor_t0 = FLOORF(t[0]);
    892             if (floor_t0 > FLT_MAX_EXP) {
    893                SET_POS_INFINITY(q[0]);
    894                SET_POS_INFINITY(q[2]);
    895             }
    896             else if (floor_t0 < FLT_MIN_EXP) {
    897                q[0] = 0.0F;
    898                q[2] = 0.0F;
    899             }
    900             else {
    901                q[0] = LDEXPF(1.0, (int) floor_t0);
    902                /* Note: GL_NV_vertex_program expects
    903                 * result.z = result.x * APPX(result.y)
    904                 * We do what the ARB extension says.
    905                 */
    906                q[2] = (GLfloat) pow(2.0, t[0]);
    907             }
    908             q[1] = t[0] - floor_t0;
    909             q[3] = 1.0F;
    910             store_vector4( inst, machine, q );
    911          }
    912          break;
    913       case OPCODE_EX2:         /* Exponential base 2 */
    914          {
    915             GLfloat a[4], result[4], val;
    916             fetch_vector1(&inst->SrcReg[0], machine, a);
    917             val = (GLfloat) pow(2.0, a[0]);
    918             /*
    919             if (IS_INF_OR_NAN(val))
    920                val = 1.0e10;
    921             */
    922             result[0] = result[1] = result[2] = result[3] = val;
    923             store_vector4(inst, machine, result);
    924          }
    925          break;
    926       case OPCODE_FLR:
    927          {
    928             GLfloat a[4], result[4];
    929             fetch_vector4(&inst->SrcReg[0], machine, a);
    930             result[0] = FLOORF(a[0]);
    931             result[1] = FLOORF(a[1]);
    932             result[2] = FLOORF(a[2]);
    933             result[3] = FLOORF(a[3]);
    934             store_vector4(inst, machine, result);
    935          }
    936          break;
    937       case OPCODE_FRC:
    938          {
    939             GLfloat a[4], result[4];
    940             fetch_vector4(&inst->SrcReg[0], machine, a);
    941             result[0] = a[0] - FLOORF(a[0]);
    942             result[1] = a[1] - FLOORF(a[1]);
    943             result[2] = a[2] - FLOORF(a[2]);
    944             result[3] = a[3] - FLOORF(a[3]);
    945             store_vector4(inst, machine, result);
    946          }
    947          break;
    948       case OPCODE_IF:
    949          {
    950             GLboolean cond;
    951             ASSERT(program->Instructions[inst->BranchTarget].Opcode
    952                    == OPCODE_ELSE ||
    953                    program->Instructions[inst->BranchTarget].Opcode
    954                    == OPCODE_ENDIF);
    955             /* eval condition */
    956             if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
    957                GLfloat a[4];
    958                fetch_vector1(&inst->SrcReg[0], machine, a);
    959                cond = (a[0] != 0.0);
    960             }
    961             else {
    962                cond = eval_condition(machine, inst);
    963             }
    964             if (DEBUG_PROG) {
    965                printf("IF: %d\n", cond);
    966             }
    967             /* do if/else */
    968             if (cond) {
    969                /* do if-clause (just continue execution) */
    970             }
    971             else {
    972                /* go to the instruction after ELSE or ENDIF */
    973                assert(inst->BranchTarget >= 0);
    974                pc = inst->BranchTarget;
    975             }
    976          }
    977          break;
    978       case OPCODE_ELSE:
    979          /* goto ENDIF */
    980          ASSERT(program->Instructions[inst->BranchTarget].Opcode
    981                 == OPCODE_ENDIF);
    982          assert(inst->BranchTarget >= 0);
    983          pc = inst->BranchTarget;
    984          break;
    985       case OPCODE_ENDIF:
    986          /* nothing */
    987          break;
    988       case OPCODE_KIL_NV:      /* NV_f_p only (conditional) */
    989          if (eval_condition(machine, inst)) {
    990             return GL_FALSE;
    991          }
    992          break;
    993       case OPCODE_KIL:         /* ARB_f_p only */
    994          {
    995             GLfloat a[4];
    996             fetch_vector4(&inst->SrcReg[0], machine, a);
    997             if (DEBUG_PROG) {
    998                printf("KIL if (%g %g %g %g) <= 0.0\n",
    999                       a[0], a[1], a[2], a[3]);
   1000             }
   1001 
   1002             if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
   1003                return GL_FALSE;
   1004             }
   1005          }
   1006          break;
   1007       case OPCODE_LG2:         /* log base 2 */
   1008          {
   1009             GLfloat a[4], result[4], val;
   1010             fetch_vector1(&inst->SrcReg[0], machine, a);
   1011 	    /* The fast LOG2 macro doesn't meet the precision requirements.
   1012 	     */
   1013             if (a[0] == 0.0F) {
   1014                val = -FLT_MAX;
   1015             }
   1016             else {
   1017                val = (float)(log(a[0]) * 1.442695F);
   1018             }
   1019             result[0] = result[1] = result[2] = result[3] = val;
   1020             store_vector4(inst, machine, result);
   1021          }
   1022          break;
   1023       case OPCODE_LIT:
   1024          {
   1025             const GLfloat epsilon = 1.0F / 256.0F;      /* from NV VP spec */
   1026             GLfloat a[4], result[4];
   1027             fetch_vector4(&inst->SrcReg[0], machine, a);
   1028             a[0] = MAX2(a[0], 0.0F);
   1029             a[1] = MAX2(a[1], 0.0F);
   1030             /* XXX ARB version clamps a[3], NV version doesn't */
   1031             a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
   1032             result[0] = 1.0F;
   1033             result[1] = a[0];
   1034             /* XXX we could probably just use pow() here */
   1035             if (a[0] > 0.0F) {
   1036                if (a[1] == 0.0 && a[3] == 0.0)
   1037                   result[2] = 1.0F;
   1038                else
   1039                   result[2] = (GLfloat) pow(a[1], a[3]);
   1040             }
   1041             else {
   1042                result[2] = 0.0F;
   1043             }
   1044             result[3] = 1.0F;
   1045             store_vector4(inst, machine, result);
   1046             if (DEBUG_PROG) {
   1047                printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
   1048                       result[0], result[1], result[2], result[3],
   1049                       a[0], a[1], a[2], a[3]);
   1050             }
   1051          }
   1052          break;
   1053       case OPCODE_LOG:
   1054          {
   1055             GLfloat t[4], q[4], abs_t0;
   1056             fetch_vector1(&inst->SrcReg[0], machine, t);
   1057             abs_t0 = FABSF(t[0]);
   1058             if (abs_t0 != 0.0F) {
   1059                /* Since we really can't handle infinite values on VMS
   1060                 * like other OSes we'll use __MAXFLOAT to represent
   1061                 * infinity.  This may need some tweaking.
   1062                 */
   1063 #ifdef VMS
   1064                if (abs_t0 == __MAXFLOAT)
   1065 #else
   1066                if (IS_INF_OR_NAN(abs_t0))
   1067 #endif
   1068                {
   1069                   SET_POS_INFINITY(q[0]);
   1070                   q[1] = 1.0F;
   1071                   SET_POS_INFINITY(q[2]);
   1072                }
   1073                else {
   1074                   int exponent;
   1075                   GLfloat mantissa = FREXPF(t[0], &exponent);
   1076                   q[0] = (GLfloat) (exponent - 1);
   1077                   q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
   1078 
   1079 		  /* The fast LOG2 macro doesn't meet the precision
   1080 		   * requirements.
   1081 		   */
   1082                   q[2] = (float)(log(t[0]) * 1.442695F);
   1083                }
   1084             }
   1085             else {
   1086                SET_NEG_INFINITY(q[0]);
   1087                q[1] = 1.0F;
   1088                SET_NEG_INFINITY(q[2]);
   1089             }
   1090             q[3] = 1.0;
   1091             store_vector4(inst, machine, q);
   1092          }
   1093          break;
   1094       case OPCODE_LRP:
   1095          {
   1096             GLfloat a[4], b[4], c[4], result[4];
   1097             fetch_vector4(&inst->SrcReg[0], machine, a);
   1098             fetch_vector4(&inst->SrcReg[1], machine, b);
   1099             fetch_vector4(&inst->SrcReg[2], machine, c);
   1100             result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
   1101             result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
   1102             result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
   1103             result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
   1104             store_vector4(inst, machine, result);
   1105             if (DEBUG_PROG) {
   1106                printf("LRP (%g %g %g %g) = (%g %g %g %g), "
   1107                       "(%g %g %g %g), (%g %g %g %g)\n",
   1108                       result[0], result[1], result[2], result[3],
   1109                       a[0], a[1], a[2], a[3],
   1110                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
   1111             }
   1112          }
   1113          break;
   1114       case OPCODE_MAD:
   1115          {
   1116             GLfloat a[4], b[4], c[4], result[4];
   1117             fetch_vector4(&inst->SrcReg[0], machine, a);
   1118             fetch_vector4(&inst->SrcReg[1], machine, b);
   1119             fetch_vector4(&inst->SrcReg[2], machine, c);
   1120             result[0] = a[0] * b[0] + c[0];
   1121             result[1] = a[1] * b[1] + c[1];
   1122             result[2] = a[2] * b[2] + c[2];
   1123             result[3] = a[3] * b[3] + c[3];
   1124             store_vector4(inst, machine, result);
   1125             if (DEBUG_PROG) {
   1126                printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
   1127                       "(%g %g %g %g) + (%g %g %g %g)\n",
   1128                       result[0], result[1], result[2], result[3],
   1129                       a[0], a[1], a[2], a[3],
   1130                       b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
   1131             }
   1132          }
   1133          break;
   1134       case OPCODE_MAX:
   1135          {
   1136             GLfloat a[4], b[4], result[4];
   1137             fetch_vector4(&inst->SrcReg[0], machine, a);
   1138             fetch_vector4(&inst->SrcReg[1], machine, b);
   1139             result[0] = MAX2(a[0], b[0]);
   1140             result[1] = MAX2(a[1], b[1]);
   1141             result[2] = MAX2(a[2], b[2]);
   1142             result[3] = MAX2(a[3], b[3]);
   1143             store_vector4(inst, machine, result);
   1144             if (DEBUG_PROG) {
   1145                printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
   1146                       result[0], result[1], result[2], result[3],
   1147                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
   1148             }
   1149          }
   1150          break;
   1151       case OPCODE_MIN:
   1152          {
   1153             GLfloat a[4], b[4], result[4];
   1154             fetch_vector4(&inst->SrcReg[0], machine, a);
   1155             fetch_vector4(&inst->SrcReg[1], machine, b);
   1156             result[0] = MIN2(a[0], b[0]);
   1157             result[1] = MIN2(a[1], b[1]);
   1158             result[2] = MIN2(a[2], b[2]);
   1159             result[3] = MIN2(a[3], b[3]);
   1160             store_vector4(inst, machine, result);
   1161          }
   1162          break;
   1163       case OPCODE_MOV:
   1164          {
   1165             GLfloat result[4];
   1166             fetch_vector4(&inst->SrcReg[0], machine, result);
   1167             store_vector4(inst, machine, result);
   1168             if (DEBUG_PROG) {
   1169                printf("MOV (%g %g %g %g)\n",
   1170                       result[0], result[1], result[2], result[3]);
   1171             }
   1172          }
   1173          break;
   1174       case OPCODE_MUL:
   1175          {
   1176             GLfloat a[4], b[4], result[4];
   1177             fetch_vector4(&inst->SrcReg[0], machine, a);
   1178             fetch_vector4(&inst->SrcReg[1], machine, b);
   1179             result[0] = a[0] * b[0];
   1180             result[1] = a[1] * b[1];
   1181             result[2] = a[2] * b[2];
   1182             result[3] = a[3] * b[3];
   1183             store_vector4(inst, machine, result);
   1184             if (DEBUG_PROG) {
   1185                printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
   1186                       result[0], result[1], result[2], result[3],
   1187                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
   1188             }
   1189          }
   1190          break;
   1191       case OPCODE_NOISE1:
   1192          {
   1193             GLfloat a[4], result[4];
   1194             fetch_vector1(&inst->SrcReg[0], machine, a);
   1195             result[0] =
   1196                result[1] =
   1197                result[2] =
   1198                result[3] = _mesa_noise1(a[0]);
   1199             store_vector4(inst, machine, result);
   1200          }
   1201          break;
   1202       case OPCODE_NOISE2:
   1203          {
   1204             GLfloat a[4], result[4];
   1205             fetch_vector4(&inst->SrcReg[0], machine, a);
   1206             result[0] =
   1207                result[1] =
   1208                result[2] = result[3] = _mesa_noise2(a[0], a[1]);
   1209             store_vector4(inst, machine, result);
   1210          }
   1211          break;
   1212       case OPCODE_NOISE3:
   1213          {
   1214             GLfloat a[4], result[4];
   1215             fetch_vector4(&inst->SrcReg[0], machine, a);
   1216             result[0] =
   1217                result[1] =
   1218                result[2] =
   1219                result[3] = _mesa_noise3(a[0], a[1], a[2]);
   1220             store_vector4(inst, machine, result);
   1221          }
   1222          break;
   1223       case OPCODE_NOISE4:
   1224          {
   1225             GLfloat a[4], result[4];
   1226             fetch_vector4(&inst->SrcReg[0], machine, a);
   1227             result[0] =
   1228                result[1] =
   1229                result[2] =
   1230                result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]);
   1231             store_vector4(inst, machine, result);
   1232          }
   1233          break;
   1234       case OPCODE_NOP:
   1235          break;
   1236       case OPCODE_NOT:         /* bitwise NOT */
   1237          {
   1238             GLuint a[4], result[4];
   1239             fetch_vector4ui(&inst->SrcReg[0], machine, a);
   1240             result[0] = ~a[0];
   1241             result[1] = ~a[1];
   1242             result[2] = ~a[2];
   1243             result[3] = ~a[3];
   1244             store_vector4ui(inst, machine, result);
   1245          }
   1246          break;
   1247       case OPCODE_NRM3:        /* 3-component normalization */
   1248          {
   1249             GLfloat a[4], result[4];
   1250             GLfloat tmp;
   1251             fetch_vector4(&inst->SrcReg[0], machine, a);
   1252             tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2];
   1253             if (tmp != 0.0F)
   1254                tmp = INV_SQRTF(tmp);
   1255             result[0] = tmp * a[0];
   1256             result[1] = tmp * a[1];
   1257             result[2] = tmp * a[2];
   1258             result[3] = 0.0;  /* undefined, but prevent valgrind warnings */
   1259             store_vector4(inst, machine, result);
   1260          }
   1261          break;
   1262       case OPCODE_NRM4:        /* 4-component normalization */
   1263          {
   1264             GLfloat a[4], result[4];
   1265             GLfloat tmp;
   1266             fetch_vector4(&inst->SrcReg[0], machine, a);
   1267             tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3];
   1268             if (tmp != 0.0F)
   1269                tmp = INV_SQRTF(tmp);
   1270             result[0] = tmp * a[0];
   1271             result[1] = tmp * a[1];
   1272             result[2] = tmp * a[2];
   1273             result[3] = tmp * a[3];
   1274             store_vector4(inst, machine, result);
   1275          }
   1276          break;
   1277       case OPCODE_OR:          /* bitwise OR */
   1278          {
   1279             GLuint a[4], b[4], result[4];
   1280             fetch_vector4ui(&inst->SrcReg[0], machine, a);
   1281             fetch_vector4ui(&inst->SrcReg[1], machine, b);
   1282             result[0] = a[0] | b[0];
   1283             result[1] = a[1] | b[1];
   1284             result[2] = a[2] | b[2];
   1285             result[3] = a[3] | b[3];
   1286             store_vector4ui(inst, machine, result);
   1287          }
   1288          break;
   1289       case OPCODE_PK2H:        /* pack two 16-bit floats in one 32-bit float */
   1290          {
   1291             GLfloat a[4];
   1292             GLuint result[4];
   1293             GLhalfNV hx, hy;
   1294             fetch_vector4(&inst->SrcReg[0], machine, a);
   1295             hx = _mesa_float_to_half(a[0]);
   1296             hy = _mesa_float_to_half(a[1]);
   1297             result[0] =
   1298             result[1] =
   1299             result[2] =
   1300             result[3] = hx | (hy << 16);
   1301             store_vector4ui(inst, machine, result);
   1302          }
   1303          break;
   1304       case OPCODE_PK2US:       /* pack two GLushorts into one 32-bit float */
   1305          {
   1306             GLfloat a[4];
   1307             GLuint result[4], usx, usy;
   1308             fetch_vector4(&inst->SrcReg[0], machine, a);
   1309             a[0] = CLAMP(a[0], 0.0F, 1.0F);
   1310             a[1] = CLAMP(a[1], 0.0F, 1.0F);
   1311             usx = F_TO_I(a[0] * 65535.0F);
   1312             usy = F_TO_I(a[1] * 65535.0F);
   1313             result[0] =
   1314             result[1] =
   1315             result[2] =
   1316             result[3] = usx | (usy << 16);
   1317             store_vector4ui(inst, machine, result);
   1318          }
   1319          break;
   1320       case OPCODE_PK4B:        /* pack four GLbytes into one 32-bit float */
   1321          {
   1322             GLfloat a[4];
   1323             GLuint result[4], ubx, uby, ubz, ubw;
   1324             fetch_vector4(&inst->SrcReg[0], machine, a);
   1325             a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
   1326             a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
   1327             a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
   1328             a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
   1329             ubx = F_TO_I(127.0F * a[0] + 128.0F);
   1330             uby = F_TO_I(127.0F * a[1] + 128.0F);
   1331             ubz = F_TO_I(127.0F * a[2] + 128.0F);
   1332             ubw = F_TO_I(127.0F * a[3] + 128.0F);
   1333             result[0] =
   1334             result[1] =
   1335             result[2] =
   1336             result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
   1337             store_vector4ui(inst, machine, result);
   1338          }
   1339          break;
   1340       case OPCODE_PK4UB:       /* pack four GLubytes into one 32-bit float */
   1341          {
   1342             GLfloat a[4];
   1343             GLuint result[4], ubx, uby, ubz, ubw;
   1344             fetch_vector4(&inst->SrcReg[0], machine, a);
   1345             a[0] = CLAMP(a[0], 0.0F, 1.0F);
   1346             a[1] = CLAMP(a[1], 0.0F, 1.0F);
   1347             a[2] = CLAMP(a[2], 0.0F, 1.0F);
   1348             a[3] = CLAMP(a[3], 0.0F, 1.0F);
   1349             ubx = F_TO_I(255.0F * a[0]);
   1350             uby = F_TO_I(255.0F * a[1]);
   1351             ubz = F_TO_I(255.0F * a[2]);
   1352             ubw = F_TO_I(255.0F * a[3]);
   1353             result[0] =
   1354             result[1] =
   1355             result[2] =
   1356             result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
   1357             store_vector4ui(inst, machine, result);
   1358          }
   1359          break;
   1360       case OPCODE_POW:
   1361          {
   1362             GLfloat a[4], b[4], result[4];
   1363             fetch_vector1(&inst->SrcReg[0], machine, a);
   1364             fetch_vector1(&inst->SrcReg[1], machine, b);
   1365             result[0] = result[1] = result[2] = result[3]
   1366                = (GLfloat) pow(a[0], b[0]);
   1367             store_vector4(inst, machine, result);
   1368          }
   1369          break;
   1370       case OPCODE_RCC:  /* clamped riciprocal */
   1371          {
   1372             const float largest = 1.884467e+19, smallest = 5.42101e-20;
   1373             GLfloat a[4], r, result[4];
   1374             fetch_vector1(&inst->SrcReg[0], machine, a);
   1375             if (DEBUG_PROG) {
   1376                if (a[0] == 0)
   1377                   printf("RCC(0)\n");
   1378                else if (IS_INF_OR_NAN(a[0]))
   1379                   printf("RCC(inf)\n");
   1380             }
   1381             if (a[0] == 1.0F) {
   1382                r = 1.0F;
   1383             }
   1384             else {
   1385                r = 1.0F / a[0];
   1386             }
   1387             if (positive(r)) {
   1388                if (r > largest) {
   1389                   r = largest;
   1390                }
   1391                else if (r < smallest) {
   1392                   r = smallest;
   1393                }
   1394             }
   1395             else {
   1396                if (r < -largest) {
   1397                   r = -largest;
   1398                }
   1399                else if (r > -smallest) {
   1400                   r = -smallest;
   1401                }
   1402             }
   1403             result[0] = result[1] = result[2] = result[3] = r;
   1404             store_vector4(inst, machine, result);
   1405          }
   1406          break;
   1407 
   1408       case OPCODE_RCP:
   1409          {
   1410             GLfloat a[4], result[4];
   1411             fetch_vector1(&inst->SrcReg[0], machine, a);
   1412             if (DEBUG_PROG) {
   1413                if (a[0] == 0)
   1414                   printf("RCP(0)\n");
   1415                else if (IS_INF_OR_NAN(a[0]))
   1416                   printf("RCP(inf)\n");
   1417             }
   1418             result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
   1419             store_vector4(inst, machine, result);
   1420          }
   1421          break;
   1422       case OPCODE_RET:         /* return from subroutine (conditional) */
   1423          if (eval_condition(machine, inst)) {
   1424             if (machine->StackDepth == 0) {
   1425                return GL_TRUE;  /* Per GL_NV_vertex_program2 spec */
   1426             }
   1427             /* subtract one because of pc++ in the for loop */
   1428             pc = machine->CallStack[--machine->StackDepth] - 1;
   1429          }
   1430          break;
   1431       case OPCODE_RFL:         /* reflection vector */
   1432          {
   1433             GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
   1434             fetch_vector4(&inst->SrcReg[0], machine, axis);
   1435             fetch_vector4(&inst->SrcReg[1], machine, dir);
   1436             tmpW = DOT3(axis, axis);
   1437             tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
   1438             result[0] = tmpX * axis[0] - dir[0];
   1439             result[1] = tmpX * axis[1] - dir[1];
   1440             result[2] = tmpX * axis[2] - dir[2];
   1441             /* result[3] is never written! XXX enforce in parser! */
   1442             store_vector4(inst, machine, result);
   1443          }
   1444          break;
   1445       case OPCODE_RSQ:         /* 1 / sqrt() */
   1446          {
   1447             GLfloat a[4], result[4];
   1448             fetch_vector1(&inst->SrcReg[0], machine, a);
   1449             a[0] = FABSF(a[0]);
   1450             result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
   1451             store_vector4(inst, machine, result);
   1452             if (DEBUG_PROG) {
   1453                printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
   1454             }
   1455          }
   1456          break;
   1457       case OPCODE_SCS:         /* sine and cos */
   1458          {
   1459             GLfloat a[4], result[4];
   1460             fetch_vector1(&inst->SrcReg[0], machine, a);
   1461             result[0] = (GLfloat) cos(a[0]);
   1462             result[1] = (GLfloat) sin(a[0]);
   1463             result[2] = 0.0;    /* undefined! */
   1464             result[3] = 0.0;    /* undefined! */
   1465             store_vector4(inst, machine, result);
   1466          }
   1467          break;
   1468       case OPCODE_SEQ:         /* set on equal */
   1469          {
   1470             GLfloat a[4], b[4], result[4];
   1471             fetch_vector4(&inst->SrcReg[0], machine, a);
   1472             fetch_vector4(&inst->SrcReg[1], machine, b);
   1473             result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
   1474             result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
   1475             result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
   1476             result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
   1477             store_vector4(inst, machine, result);
   1478             if (DEBUG_PROG) {
   1479                printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n",
   1480                       result[0], result[1], result[2], result[3],
   1481                       a[0], a[1], a[2], a[3],
   1482                       b[0], b[1], b[2], b[3]);
   1483             }
   1484          }
   1485          break;
   1486       case OPCODE_SFL:         /* set false, operands ignored */
   1487          {
   1488             static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
   1489             store_vector4(inst, machine, result);
   1490          }
   1491          break;
   1492       case OPCODE_SGE:         /* set on greater or equal */
   1493          {
   1494             GLfloat a[4], b[4], result[4];
   1495             fetch_vector4(&inst->SrcReg[0], machine, a);
   1496             fetch_vector4(&inst->SrcReg[1], machine, b);
   1497             result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
   1498             result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
   1499             result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
   1500             result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
   1501             store_vector4(inst, machine, result);
   1502             if (DEBUG_PROG) {
   1503                printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
   1504                       result[0], result[1], result[2], result[3],
   1505                       a[0], a[1], a[2], a[3],
   1506                       b[0], b[1], b[2], b[3]);
   1507             }
   1508          }
   1509          break;
   1510       case OPCODE_SGT:         /* set on greater */
   1511          {
   1512             GLfloat a[4], b[4], result[4];
   1513             fetch_vector4(&inst->SrcReg[0], machine, a);
   1514             fetch_vector4(&inst->SrcReg[1], machine, b);
   1515             result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
   1516             result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
   1517             result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
   1518             result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
   1519             store_vector4(inst, machine, result);
   1520             if (DEBUG_PROG) {
   1521                printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n",
   1522                       result[0], result[1], result[2], result[3],
   1523                       a[0], a[1], a[2], a[3],
   1524                       b[0], b[1], b[2], b[3]);
   1525             }
   1526          }
   1527          break;
   1528       case OPCODE_SIN:
   1529          {
   1530             GLfloat a[4], result[4];
   1531             fetch_vector1(&inst->SrcReg[0], machine, a);
   1532             result[0] = result[1] = result[2] = result[3]
   1533                = (GLfloat) sin(a[0]);
   1534             store_vector4(inst, machine, result);
   1535          }
   1536          break;
   1537       case OPCODE_SLE:         /* set on less or equal */
   1538          {
   1539             GLfloat a[4], b[4], result[4];
   1540             fetch_vector4(&inst->SrcReg[0], machine, a);
   1541             fetch_vector4(&inst->SrcReg[1], machine, b);
   1542             result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
   1543             result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
   1544             result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
   1545             result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
   1546             store_vector4(inst, machine, result);
   1547             if (DEBUG_PROG) {
   1548                printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n",
   1549                       result[0], result[1], result[2], result[3],
   1550                       a[0], a[1], a[2], a[3],
   1551                       b[0], b[1], b[2], b[3]);
   1552             }
   1553          }
   1554          break;
   1555       case OPCODE_SLT:         /* set on less */
   1556          {
   1557             GLfloat a[4], b[4], result[4];
   1558             fetch_vector4(&inst->SrcReg[0], machine, a);
   1559             fetch_vector4(&inst->SrcReg[1], machine, b);
   1560             result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
   1561             result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
   1562             result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
   1563             result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
   1564             store_vector4(inst, machine, result);
   1565             if (DEBUG_PROG) {
   1566                printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
   1567                       result[0], result[1], result[2], result[3],
   1568                       a[0], a[1], a[2], a[3],
   1569                       b[0], b[1], b[2], b[3]);
   1570             }
   1571          }
   1572          break;
   1573       case OPCODE_SNE:         /* set on not equal */
   1574          {
   1575             GLfloat a[4], b[4], result[4];
   1576             fetch_vector4(&inst->SrcReg[0], machine, a);
   1577             fetch_vector4(&inst->SrcReg[1], machine, b);
   1578             result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
   1579             result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
   1580             result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
   1581             result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
   1582             store_vector4(inst, machine, result);
   1583             if (DEBUG_PROG) {
   1584                printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n",
   1585                       result[0], result[1], result[2], result[3],
   1586                       a[0], a[1], a[2], a[3],
   1587                       b[0], b[1], b[2], b[3]);
   1588             }
   1589          }
   1590          break;
   1591       case OPCODE_SSG:         /* set sign (-1, 0 or +1) */
   1592          {
   1593             GLfloat a[4], result[4];
   1594             fetch_vector4(&inst->SrcReg[0], machine, a);
   1595             result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
   1596             result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
   1597             result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
   1598             result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
   1599             store_vector4(inst, machine, result);
   1600          }
   1601          break;
   1602       case OPCODE_STR:         /* set true, operands ignored */
   1603          {
   1604             static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
   1605             store_vector4(inst, machine, result);
   1606          }
   1607          break;
   1608       case OPCODE_SUB:
   1609          {
   1610             GLfloat a[4], b[4], result[4];
   1611             fetch_vector4(&inst->SrcReg[0], machine, a);
   1612             fetch_vector4(&inst->SrcReg[1], machine, b);
   1613             result[0] = a[0] - b[0];
   1614             result[1] = a[1] - b[1];
   1615             result[2] = a[2] - b[2];
   1616             result[3] = a[3] - b[3];
   1617             store_vector4(inst, machine, result);
   1618             if (DEBUG_PROG) {
   1619                printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
   1620                       result[0], result[1], result[2], result[3],
   1621                       a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
   1622             }
   1623          }
   1624          break;
   1625       case OPCODE_SWZ:         /* extended swizzle */
   1626          {
   1627             const struct prog_src_register *source = &inst->SrcReg[0];
   1628             const GLfloat *src = get_src_register_pointer(source, machine);
   1629             GLfloat result[4];
   1630             GLuint i;
   1631             for (i = 0; i < 4; i++) {
   1632                const GLuint swz = GET_SWZ(source->Swizzle, i);
   1633                if (swz == SWIZZLE_ZERO)
   1634                   result[i] = 0.0;
   1635                else if (swz == SWIZZLE_ONE)
   1636                   result[i] = 1.0;
   1637                else {
   1638                   ASSERT(swz >= 0);
   1639                   ASSERT(swz <= 3);
   1640                   result[i] = src[swz];
   1641                }
   1642                if (source->Negate & (1 << i))
   1643                   result[i] = -result[i];
   1644             }
   1645             store_vector4(inst, machine, result);
   1646          }
   1647          break;
   1648       case OPCODE_TEX:         /* Both ARB and NV frag prog */
   1649          /* Simple texel lookup */
   1650          {
   1651             GLfloat texcoord[4], color[4];
   1652             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
   1653 
   1654             /* For TEX, texcoord.Q should not be used and its value should not
   1655              * matter (at most, we pass coord.xyz to texture3D() in GLSL).
   1656              * Set Q=1 so that FetchTexelDeriv() doesn't get a garbage value
   1657              * which is effectively what happens when the texcoord swizzle
   1658              * is .xyzz
   1659              */
   1660             texcoord[3] = 1.0f;
   1661 
   1662             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
   1663 
   1664             if (DEBUG_PROG) {
   1665                printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
   1666                       color[0], color[1], color[2], color[3],
   1667                       inst->TexSrcUnit,
   1668                       texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
   1669             }
   1670             store_vector4(inst, machine, color);
   1671          }
   1672          break;
   1673       case OPCODE_TXB:         /* GL_ARB_fragment_program only */
   1674          /* Texel lookup with LOD bias */
   1675          {
   1676             GLfloat texcoord[4], color[4], lodBias;
   1677 
   1678             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
   1679 
   1680             /* texcoord[3] is the bias to add to lambda */
   1681             lodBias = texcoord[3];
   1682 
   1683             fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
   1684 
   1685             if (DEBUG_PROG) {
   1686                printf("TXB (%g, %g, %g, %g) = texture[%d][%g %g %g %g]"
   1687                       "  bias %g\n",
   1688                       color[0], color[1], color[2], color[3],
   1689                       inst->TexSrcUnit,
   1690                       texcoord[0],
   1691                       texcoord[1],
   1692                       texcoord[2],
   1693                       texcoord[3],
   1694                       lodBias);
   1695             }
   1696 
   1697             store_vector4(inst, machine, color);
   1698          }
   1699          break;
   1700       case OPCODE_TXD:         /* GL_NV_fragment_program only */
   1701          /* Texture lookup w/ partial derivatives for LOD */
   1702          {
   1703             GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
   1704             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
   1705             fetch_vector4(&inst->SrcReg[1], machine, dtdx);
   1706             fetch_vector4(&inst->SrcReg[2], machine, dtdy);
   1707             machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
   1708                                      0.0, /* lodBias */
   1709                                      inst->TexSrcUnit, color);
   1710             store_vector4(inst, machine, color);
   1711          }
   1712          break;
   1713       case OPCODE_TXL:
   1714          /* Texel lookup with explicit LOD */
   1715          {
   1716             GLfloat texcoord[4], color[4], lod;
   1717 
   1718             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
   1719 
   1720             /* texcoord[3] is the LOD */
   1721             lod = texcoord[3];
   1722 
   1723 	    machine->FetchTexelLod(ctx, texcoord, lod,
   1724 				   machine->Samplers[inst->TexSrcUnit], color);
   1725 
   1726             store_vector4(inst, machine, color);
   1727          }
   1728          break;
   1729       case OPCODE_TXP:         /* GL_ARB_fragment_program only */
   1730          /* Texture lookup w/ projective divide */
   1731          {
   1732             GLfloat texcoord[4], color[4];
   1733 
   1734             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
   1735             /* Not so sure about this test - if texcoord[3] is
   1736              * zero, we'd probably be fine except for an ASSERT in
   1737              * IROUND_POS() which gets triggered by the inf values created.
   1738              */
   1739             if (texcoord[3] != 0.0) {
   1740                texcoord[0] /= texcoord[3];
   1741                texcoord[1] /= texcoord[3];
   1742                texcoord[2] /= texcoord[3];
   1743             }
   1744 
   1745             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
   1746 
   1747             store_vector4(inst, machine, color);
   1748          }
   1749          break;
   1750       case OPCODE_TXP_NV:      /* GL_NV_fragment_program only */
   1751          /* Texture lookup w/ projective divide, as above, but do not
   1752           * do the divide by w if sampling from a cube map.
   1753           */
   1754          {
   1755             GLfloat texcoord[4], color[4];
   1756 
   1757             fetch_vector4(&inst->SrcReg[0], machine, texcoord);
   1758             if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
   1759                 texcoord[3] != 0.0) {
   1760                texcoord[0] /= texcoord[3];
   1761                texcoord[1] /= texcoord[3];
   1762                texcoord[2] /= texcoord[3];
   1763             }
   1764 
   1765             fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
   1766 
   1767             store_vector4(inst, machine, color);
   1768          }
   1769          break;
   1770       case OPCODE_TRUNC:       /* truncate toward zero */
   1771          {
   1772             GLfloat a[4], result[4];
   1773             fetch_vector4(&inst->SrcReg[0], machine, a);
   1774             result[0] = (GLfloat) (GLint) a[0];
   1775             result[1] = (GLfloat) (GLint) a[1];
   1776             result[2] = (GLfloat) (GLint) a[2];
   1777             result[3] = (GLfloat) (GLint) a[3];
   1778             store_vector4(inst, machine, result);
   1779          }
   1780          break;
   1781       case OPCODE_UP2H:        /* unpack two 16-bit floats */
   1782          {
   1783             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
   1784             GLfloat result[4];
   1785             GLushort hx, hy;
   1786             hx = raw & 0xffff;
   1787             hy = raw >> 16;
   1788             result[0] = result[2] = _mesa_half_to_float(hx);
   1789             result[1] = result[3] = _mesa_half_to_float(hy);
   1790             store_vector4(inst, machine, result);
   1791          }
   1792          break;
   1793       case OPCODE_UP2US:       /* unpack two GLushorts */
   1794          {
   1795             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
   1796             GLfloat result[4];
   1797             GLushort usx, usy;
   1798             usx = raw & 0xffff;
   1799             usy = raw >> 16;
   1800             result[0] = result[2] = usx * (1.0f / 65535.0f);
   1801             result[1] = result[3] = usy * (1.0f / 65535.0f);
   1802             store_vector4(inst, machine, result);
   1803          }
   1804          break;
   1805       case OPCODE_UP4B:        /* unpack four GLbytes */
   1806          {
   1807             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
   1808             GLfloat result[4];
   1809             result[0] = (((raw >> 0) & 0xff) - 128) / 127.0F;
   1810             result[1] = (((raw >> 8) & 0xff) - 128) / 127.0F;
   1811             result[2] = (((raw >> 16) & 0xff) - 128) / 127.0F;
   1812             result[3] = (((raw >> 24) & 0xff) - 128) / 127.0F;
   1813             store_vector4(inst, machine, result);
   1814          }
   1815          break;
   1816       case OPCODE_UP4UB:       /* unpack four GLubytes */
   1817          {
   1818             const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
   1819             GLfloat result[4];
   1820             result[0] = ((raw >> 0) & 0xff) / 255.0F;
   1821             result[1] = ((raw >> 8) & 0xff) / 255.0F;
   1822             result[2] = ((raw >> 16) & 0xff) / 255.0F;
   1823             result[3] = ((raw >> 24) & 0xff) / 255.0F;
   1824             store_vector4(inst, machine, result);
   1825          }
   1826          break;
   1827       case OPCODE_XOR:         /* bitwise XOR */
   1828          {
   1829             GLuint a[4], b[4], result[4];
   1830             fetch_vector4ui(&inst->SrcReg[0], machine, a);
   1831             fetch_vector4ui(&inst->SrcReg[1], machine, b);
   1832             result[0] = a[0] ^ b[0];
   1833             result[1] = a[1] ^ b[1];
   1834             result[2] = a[2] ^ b[2];
   1835             result[3] = a[3] ^ b[3];
   1836             store_vector4ui(inst, machine, result);
   1837          }
   1838          break;
   1839       case OPCODE_XPD:         /* cross product */
   1840          {
   1841             GLfloat a[4], b[4], result[4];
   1842             fetch_vector4(&inst->SrcReg[0], machine, a);
   1843             fetch_vector4(&inst->SrcReg[1], machine, b);
   1844             result[0] = a[1] * b[2] - a[2] * b[1];
   1845             result[1] = a[2] * b[0] - a[0] * b[2];
   1846             result[2] = a[0] * b[1] - a[1] * b[0];
   1847             result[3] = 1.0;
   1848             store_vector4(inst, machine, result);
   1849             if (DEBUG_PROG) {
   1850                printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
   1851                       result[0], result[1], result[2], result[3],
   1852                       a[0], a[1], a[2], b[0], b[1], b[2]);
   1853             }
   1854          }
   1855          break;
   1856       case OPCODE_X2D:         /* 2-D matrix transform */
   1857          {
   1858             GLfloat a[4], b[4], c[4], result[4];
   1859             fetch_vector4(&inst->SrcReg[0], machine, a);
   1860             fetch_vector4(&inst->SrcReg[1], machine, b);
   1861             fetch_vector4(&inst->SrcReg[2], machine, c);
   1862             result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
   1863             result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
   1864             result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
   1865             result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
   1866             store_vector4(inst, machine, result);
   1867          }
   1868          break;
   1869       case OPCODE_PRINT:
   1870          {
   1871             if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
   1872                GLfloat a[4];
   1873                fetch_vector4(&inst->SrcReg[0], machine, a);
   1874                printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
   1875                             a[0], a[1], a[2], a[3]);
   1876             }
   1877             else {
   1878                printf("%s\n", (const char *) inst->Data);
   1879             }
   1880          }
   1881          break;
   1882       case OPCODE_END:
   1883          return GL_TRUE;
   1884       default:
   1885          _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
   1886                        inst->Opcode);
   1887          return GL_TRUE;        /* return value doesn't matter */
   1888       }
   1889 
   1890       numExec++;
   1891       if (numExec > maxExec) {
   1892 	 static GLboolean reported = GL_FALSE;
   1893 	 if (!reported) {
   1894 	    _mesa_problem(ctx, "Infinite loop detected in fragment program");
   1895 	    reported = GL_TRUE;
   1896 	 }
   1897          return GL_TRUE;
   1898       }
   1899 
   1900    } /* for pc */
   1901 
   1902    return GL_TRUE;
   1903 }
   1904