Home | History | Annotate | Download | only in program
      1 /*
      2  * Copyright  2010 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 #include "main/glheader.h"
     25 #include "main/context.h"
     26 #include "main/macros.h"
     27 #include "program.h"
     28 #include "prog_instruction.h"
     29 #include "prog_optimize.h"
     30 #include "prog_parameter.h"
     31 #include <stdbool.h>
     32 
     33 static bool
     34 src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs)
     35 {
     36    unsigned i;
     37 
     38    for (i = 0; i < num_srcs; i++) {
     39       if (inst->SrcReg[i].File != PROGRAM_CONSTANT)
     40 	 return false;
     41       if (inst->SrcReg[i].RelAddr)
     42          return false;
     43    }
     44 
     45    return true;
     46 }
     47 
     48 static struct prog_src_register
     49 src_reg_for_float(struct gl_program *prog, float val)
     50 {
     51    struct prog_src_register src;
     52    unsigned swiz;
     53 
     54    memset(&src, 0, sizeof(src));
     55 
     56    src.File = PROGRAM_CONSTANT;
     57    src.Index = _mesa_add_unnamed_constant(prog->Parameters,
     58 					  (gl_constant_value *) &val, 1, &swiz);
     59    src.Swizzle = swiz;
     60    return src;
     61 }
     62 
     63 static struct prog_src_register
     64 src_reg_for_vec4(struct gl_program *prog, const float *val)
     65 {
     66    struct prog_src_register src;
     67    unsigned swiz;
     68 
     69    memset(&src, 0, sizeof(src));
     70 
     71    src.File = PROGRAM_CONSTANT;
     72    src.Index = _mesa_add_unnamed_constant(prog->Parameters,
     73 					  (gl_constant_value *) val, 4, &swiz);
     74    src.Swizzle = swiz;
     75    return src;
     76 }
     77 
     78 static bool
     79 src_regs_are_same(const struct prog_src_register *a,
     80 		  const struct prog_src_register *b)
     81 {
     82    return (a->File == b->File)
     83       && (a->Index == b->Index)
     84       && (a->Swizzle == b->Swizzle)
     85       && (a->Negate == b->Negate)
     86       && (a->RelAddr == 0)
     87       && (b->RelAddr == 0);
     88 }
     89 
     90 static void
     91 get_value(struct gl_program *prog, struct prog_src_register *r, float *data)
     92 {
     93    const gl_constant_value *const value =
     94       prog->Parameters->ParameterValues[r->Index];
     95 
     96    data[0] = value[GET_SWZ(r->Swizzle, 0)].f;
     97    data[1] = value[GET_SWZ(r->Swizzle, 1)].f;
     98    data[2] = value[GET_SWZ(r->Swizzle, 2)].f;
     99    data[3] = value[GET_SWZ(r->Swizzle, 3)].f;
    100 
    101    if (r->Negate & 0x01) {
    102       data[0] = -data[0];
    103    }
    104 
    105    if (r->Negate & 0x02) {
    106       data[1] = -data[1];
    107    }
    108 
    109    if (r->Negate & 0x04) {
    110       data[2] = -data[2];
    111    }
    112 
    113    if (r->Negate & 0x08) {
    114       data[3] = -data[3];
    115    }
    116 }
    117 
    118 /**
    119  * Try to replace instructions that produce a constant result with simple moves
    120  *
    121  * The hope is that a following copy propagation pass will eliminate the
    122  * unnecessary move instructions.
    123  */
    124 GLboolean
    125 _mesa_constant_fold(struct gl_program *prog)
    126 {
    127    bool progress = false;
    128    unsigned i;
    129 
    130    for (i = 0; i < prog->arb.NumInstructions; i++) {
    131       struct prog_instruction *const inst = &prog->arb.Instructions[i];
    132 
    133       switch (inst->Opcode) {
    134       case OPCODE_ADD:
    135 	 if (src_regs_are_constant(inst, 2)) {
    136 	    float a[4];
    137 	    float b[4];
    138 	    float result[4];
    139 
    140 	    get_value(prog, &inst->SrcReg[0], a);
    141 	    get_value(prog, &inst->SrcReg[1], b);
    142 
    143 	    result[0] = a[0] + b[0];
    144 	    result[1] = a[1] + b[1];
    145 	    result[2] = a[2] + b[2];
    146 	    result[3] = a[3] + b[3];
    147 
    148 	    inst->Opcode = OPCODE_MOV;
    149 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
    150 
    151 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
    152 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
    153 
    154 	    progress = true;
    155 	 }
    156 	 break;
    157 
    158       case OPCODE_CMP:
    159 	 /* FINISHME: We could also optimize CMP instructions where the first
    160 	  * FINISHME: source is a constant that is either all < 0.0 or all
    161 	  * FINISHME: >= 0.0.
    162 	  */
    163 	 if (src_regs_are_constant(inst, 3)) {
    164 	    float a[4];
    165 	    float b[4];
    166 	    float c[4];
    167 	    float result[4];
    168 
    169 	    get_value(prog, &inst->SrcReg[0], a);
    170 	    get_value(prog, &inst->SrcReg[1], b);
    171 	    get_value(prog, &inst->SrcReg[2], c);
    172 
    173             result[0] = a[0] < 0.0f ? b[0] : c[0];
    174             result[1] = a[1] < 0.0f ? b[1] : c[1];
    175             result[2] = a[2] < 0.0f ? b[2] : c[2];
    176             result[3] = a[3] < 0.0f ? b[3] : c[3];
    177 
    178 	    inst->Opcode = OPCODE_MOV;
    179 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
    180 
    181 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
    182 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
    183 	    inst->SrcReg[2].File = PROGRAM_UNDEFINED;
    184 	    inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
    185 
    186 	    progress = true;
    187 	 }
    188 	 break;
    189 
    190       case OPCODE_DP2:
    191       case OPCODE_DP3:
    192       case OPCODE_DP4:
    193 	 if (src_regs_are_constant(inst, 2)) {
    194 	    float a[4];
    195 	    float b[4];
    196 	    float result;
    197 
    198 	    get_value(prog, &inst->SrcReg[0], a);
    199 	    get_value(prog, &inst->SrcReg[1], b);
    200 
    201 	    result = (a[0] * b[0]) + (a[1] * b[1]);
    202 
    203 	    if (inst->Opcode >= OPCODE_DP3)
    204 	       result += a[2] * b[2];
    205 
    206 	    if (inst->Opcode == OPCODE_DP4)
    207 	       result += a[3] * b[3];
    208 
    209 	    inst->Opcode = OPCODE_MOV;
    210 	    inst->SrcReg[0] = src_reg_for_float(prog, result);
    211 
    212 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
    213 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
    214 
    215 	    progress = true;
    216 	 }
    217 	 break;
    218 
    219       case OPCODE_MUL:
    220 	 if (src_regs_are_constant(inst, 2)) {
    221 	    float a[4];
    222 	    float b[4];
    223 	    float result[4];
    224 
    225 	    get_value(prog, &inst->SrcReg[0], a);
    226 	    get_value(prog, &inst->SrcReg[1], b);
    227 
    228 	    result[0] = a[0] * b[0];
    229 	    result[1] = a[1] * b[1];
    230 	    result[2] = a[2] * b[2];
    231 	    result[3] = a[3] * b[3];
    232 
    233 	    inst->Opcode = OPCODE_MOV;
    234 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
    235 
    236 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
    237 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
    238 
    239 	    progress = true;
    240 	 }
    241 	 break;
    242 
    243       case OPCODE_SGE:
    244 	 if (src_regs_are_constant(inst, 2)) {
    245 	    float a[4];
    246 	    float b[4];
    247 	    float result[4];
    248 
    249 	    get_value(prog, &inst->SrcReg[0], a);
    250 	    get_value(prog, &inst->SrcReg[1], b);
    251 
    252 	    result[0] = (a[0] >= b[0]) ? 1.0f : 0.0f;
    253 	    result[1] = (a[1] >= b[1]) ? 1.0f : 0.0f;
    254 	    result[2] = (a[2] >= b[2]) ? 1.0f : 0.0f;
    255 	    result[3] = (a[3] >= b[3]) ? 1.0f : 0.0f;
    256 
    257 	    inst->Opcode = OPCODE_MOV;
    258 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
    259 
    260 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
    261 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
    262 
    263 	    progress = true;
    264 	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
    265 	    inst->Opcode = OPCODE_MOV;
    266 	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
    267 
    268 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
    269 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
    270 
    271 	    progress = true;
    272 	 }
    273 	 break;
    274 
    275       case OPCODE_SLT:
    276 	 if (src_regs_are_constant(inst, 2)) {
    277 	    float a[4];
    278 	    float b[4];
    279 	    float result[4];
    280 
    281 	    get_value(prog, &inst->SrcReg[0], a);
    282 	    get_value(prog, &inst->SrcReg[1], b);
    283 
    284 	    result[0] = (a[0] < b[0]) ? 1.0f : 0.0f;
    285 	    result[1] = (a[1] < b[1]) ? 1.0f : 0.0f;
    286 	    result[2] = (a[2] < b[2]) ? 1.0f : 0.0f;
    287 	    result[3] = (a[3] < b[3]) ? 1.0f : 0.0f;
    288 
    289 	    inst->Opcode = OPCODE_MOV;
    290 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
    291 
    292 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
    293 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
    294 
    295 	    progress = true;
    296 	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
    297 	    inst->Opcode = OPCODE_MOV;
    298 	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
    299 
    300 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
    301 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
    302 
    303 	    progress = true;
    304 	 }
    305 	 break;
    306 
    307       default:
    308 	 break;
    309       }
    310    }
    311 
    312    return progress;
    313 }
    314