Home | History | Annotate | Download | only in program
      1 /*
      2  * Copyright  2010 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 #include "main/glheader.h"
     25 #include "main/context.h"
     26 #include "main/macros.h"
     27 #include "program.h"
     28 #include "prog_instruction.h"
     29 #include "prog_optimize.h"
     30 #include "prog_parameter.h"
     31 #include <stdbool.h>
     32 
     33 static bool
     34 src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs)
     35 {
     36    unsigned i;
     37 
     38    for (i = 0; i < num_srcs; i++) {
     39       if (inst->SrcReg[i].File != PROGRAM_CONSTANT)
     40 	 return false;
     41    }
     42 
     43    return true;
     44 }
     45 
     46 static struct prog_src_register
     47 src_reg_for_float(struct gl_program *prog, float val)
     48 {
     49    struct prog_src_register src;
     50    unsigned swiz;
     51 
     52    memset(&src, 0, sizeof(src));
     53 
     54    src.File = PROGRAM_CONSTANT;
     55    src.Index = _mesa_add_unnamed_constant(prog->Parameters,
     56 					  (gl_constant_value *) &val, 1, &swiz);
     57    src.Swizzle = swiz;
     58    return src;
     59 }
     60 
     61 static struct prog_src_register
     62 src_reg_for_vec4(struct gl_program *prog, const float *val)
     63 {
     64    struct prog_src_register src;
     65    unsigned swiz;
     66 
     67    memset(&src, 0, sizeof(src));
     68 
     69    src.File = PROGRAM_CONSTANT;
     70    src.Index = _mesa_add_unnamed_constant(prog->Parameters,
     71 					  (gl_constant_value *) val, 4, &swiz);
     72    src.Swizzle = swiz;
     73    return src;
     74 }
     75 
     76 static bool
     77 src_regs_are_same(const struct prog_src_register *a,
     78 		  const struct prog_src_register *b)
     79 {
     80    return (a->File == b->File)
     81       && (a->Index == b->Index)
     82       && (a->Swizzle == b->Swizzle)
     83       && (a->Abs == b->Abs)
     84       && (a->Negate == b->Negate)
     85       && (a->RelAddr == 0)
     86       && (b->RelAddr == 0);
     87 }
     88 
     89 static void
     90 get_value(struct gl_program *prog, struct prog_src_register *r, float *data)
     91 {
     92    const gl_constant_value *const value =
     93       prog->Parameters->ParameterValues[r->Index];
     94 
     95    data[0] = value[GET_SWZ(r->Swizzle, 0)].f;
     96    data[1] = value[GET_SWZ(r->Swizzle, 1)].f;
     97    data[2] = value[GET_SWZ(r->Swizzle, 2)].f;
     98    data[3] = value[GET_SWZ(r->Swizzle, 3)].f;
     99 
    100    if (r->Abs) {
    101       data[0] = fabsf(data[0]);
    102       data[1] = fabsf(data[1]);
    103       data[2] = fabsf(data[2]);
    104       data[3] = fabsf(data[3]);
    105    }
    106 
    107    if (r->Negate & 0x01) {
    108       data[0] = -data[0];
    109    }
    110 
    111    if (r->Negate & 0x02) {
    112       data[1] = -data[1];
    113    }
    114 
    115    if (r->Negate & 0x04) {
    116       data[2] = -data[2];
    117    }
    118 
    119    if (r->Negate & 0x08) {
    120       data[3] = -data[3];
    121    }
    122 }
    123 
    124 /**
    125  * Try to replace instructions that produce a constant result with simple moves
    126  *
    127  * The hope is that a following copy propagation pass will eliminate the
    128  * unnecessary move instructions.
    129  */
    130 GLboolean
    131 _mesa_constant_fold(struct gl_program *prog)
    132 {
    133    bool progress = false;
    134    unsigned i;
    135 
    136    for (i = 0; i < prog->NumInstructions; i++) {
    137       struct prog_instruction *const inst = &prog->Instructions[i];
    138 
    139       switch (inst->Opcode) {
    140       case OPCODE_ADD:
    141 	 if (src_regs_are_constant(inst, 2)) {
    142 	    float a[4];
    143 	    float b[4];
    144 	    float result[4];
    145 
    146 	    get_value(prog, &inst->SrcReg[0], a);
    147 	    get_value(prog, &inst->SrcReg[1], b);
    148 
    149 	    result[0] = a[0] + b[0];
    150 	    result[1] = a[1] + b[1];
    151 	    result[2] = a[2] + b[2];
    152 	    result[3] = a[3] + b[3];
    153 
    154 	    inst->Opcode = OPCODE_MOV;
    155 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
    156 
    157 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
    158 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
    159 
    160 	    progress = true;
    161 	 }
    162 	 break;
    163 
    164       case OPCODE_CMP:
    165 	 /* FINISHME: We could also optimize CMP instructions where the first
    166 	  * FINISHME: source is a constant that is either all < 0.0 or all
    167 	  * FINISHME: >= 0.0.
    168 	  */
    169 	 if (src_regs_are_constant(inst, 3)) {
    170 	    float a[4];
    171 	    float b[4];
    172 	    float c[4];
    173 	    float result[4];
    174 
    175 	    get_value(prog, &inst->SrcReg[0], a);
    176 	    get_value(prog, &inst->SrcReg[1], b);
    177 	    get_value(prog, &inst->SrcReg[2], c);
    178 
    179             result[0] = a[0] < 0.0f ? b[0] : c[0];
    180             result[1] = a[1] < 0.0f ? b[1] : c[1];
    181             result[2] = a[2] < 0.0f ? b[2] : c[2];
    182             result[3] = a[3] < 0.0f ? b[3] : c[3];
    183 
    184 	    inst->Opcode = OPCODE_MOV;
    185 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
    186 
    187 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
    188 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
    189 	    inst->SrcReg[2].File = PROGRAM_UNDEFINED;
    190 	    inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
    191 
    192 	    progress = true;
    193 	 }
    194 	 break;
    195 
    196       case OPCODE_DP2:
    197       case OPCODE_DP3:
    198       case OPCODE_DP4:
    199 	 if (src_regs_are_constant(inst, 2)) {
    200 	    float a[4];
    201 	    float b[4];
    202 	    float result;
    203 
    204 	    get_value(prog, &inst->SrcReg[0], a);
    205 	    get_value(prog, &inst->SrcReg[1], b);
    206 
    207 	    /* It seems like a loop could be used here, but we cleverly put
    208 	     * DP2A between DP2 and DP3.  Subtracting DP2 (or similar) from
    209 	     * the opcode results in various failures of the loop control.
    210 	     */
    211 	    result = (a[0] * b[0]) + (a[1] * b[1]);
    212 
    213 	    if (inst->Opcode >= OPCODE_DP3)
    214 	       result += a[2] * b[2];
    215 
    216 	    if (inst->Opcode == OPCODE_DP4)
    217 	       result += a[3] * b[3];
    218 
    219 	    inst->Opcode = OPCODE_MOV;
    220 	    inst->SrcReg[0] = src_reg_for_float(prog, result);
    221 
    222 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
    223 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
    224 
    225 	    progress = true;
    226 	 }
    227 	 break;
    228 
    229       case OPCODE_MUL:
    230 	 if (src_regs_are_constant(inst, 2)) {
    231 	    float a[4];
    232 	    float b[4];
    233 	    float result[4];
    234 
    235 	    get_value(prog, &inst->SrcReg[0], a);
    236 	    get_value(prog, &inst->SrcReg[1], b);
    237 
    238 	    result[0] = a[0] * b[0];
    239 	    result[1] = a[1] * b[1];
    240 	    result[2] = a[2] * b[2];
    241 	    result[3] = a[3] * b[3];
    242 
    243 	    inst->Opcode = OPCODE_MOV;
    244 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
    245 
    246 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
    247 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
    248 
    249 	    progress = true;
    250 	 }
    251 	 break;
    252 
    253       case OPCODE_SEQ:
    254 	 if (src_regs_are_constant(inst, 2)) {
    255 	    float a[4];
    256 	    float b[4];
    257 	    float result[4];
    258 
    259 	    get_value(prog, &inst->SrcReg[0], a);
    260 	    get_value(prog, &inst->SrcReg[1], b);
    261 
    262 	    result[0] = (a[0] == b[0]) ? 1.0f : 0.0f;
    263 	    result[1] = (a[1] == b[1]) ? 1.0f : 0.0f;
    264 	    result[2] = (a[2] == b[2]) ? 1.0f : 0.0f;
    265 	    result[3] = (a[3] == b[3]) ? 1.0f : 0.0f;
    266 
    267 	    inst->Opcode = OPCODE_MOV;
    268 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
    269 
    270 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
    271 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
    272 
    273 	    progress = true;
    274 	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
    275 	    inst->Opcode = OPCODE_MOV;
    276 	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
    277 
    278 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
    279 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
    280 
    281 	    progress = true;
    282 	 }
    283 	 break;
    284 
    285       case OPCODE_SGE:
    286 	 if (src_regs_are_constant(inst, 2)) {
    287 	    float a[4];
    288 	    float b[4];
    289 	    float result[4];
    290 
    291 	    get_value(prog, &inst->SrcReg[0], a);
    292 	    get_value(prog, &inst->SrcReg[1], b);
    293 
    294 	    result[0] = (a[0] >= b[0]) ? 1.0f : 0.0f;
    295 	    result[1] = (a[1] >= b[1]) ? 1.0f : 0.0f;
    296 	    result[2] = (a[2] >= b[2]) ? 1.0f : 0.0f;
    297 	    result[3] = (a[3] >= b[3]) ? 1.0f : 0.0f;
    298 
    299 	    inst->Opcode = OPCODE_MOV;
    300 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
    301 
    302 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
    303 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
    304 
    305 	    progress = true;
    306 	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
    307 	    inst->Opcode = OPCODE_MOV;
    308 	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
    309 
    310 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
    311 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
    312 
    313 	    progress = true;
    314 	 }
    315 	 break;
    316 
    317       case OPCODE_SGT:
    318 	 if (src_regs_are_constant(inst, 2)) {
    319 	    float a[4];
    320 	    float b[4];
    321 	    float result[4];
    322 
    323 	    get_value(prog, &inst->SrcReg[0], a);
    324 	    get_value(prog, &inst->SrcReg[1], b);
    325 
    326 	    result[0] = (a[0] > b[0]) ? 1.0f : 0.0f;
    327 	    result[1] = (a[1] > b[1]) ? 1.0f : 0.0f;
    328 	    result[2] = (a[2] > b[2]) ? 1.0f : 0.0f;
    329 	    result[3] = (a[3] > b[3]) ? 1.0f : 0.0f;
    330 
    331 	    inst->Opcode = OPCODE_MOV;
    332 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
    333 
    334 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
    335 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
    336 
    337 	    progress = true;
    338 	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
    339 	    inst->Opcode = OPCODE_MOV;
    340 	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
    341 
    342 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
    343 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
    344 
    345 	    progress = true;
    346 	 }
    347 	 break;
    348 
    349       case OPCODE_SLE:
    350 	 if (src_regs_are_constant(inst, 2)) {
    351 	    float a[4];
    352 	    float b[4];
    353 	    float result[4];
    354 
    355 	    get_value(prog, &inst->SrcReg[0], a);
    356 	    get_value(prog, &inst->SrcReg[1], b);
    357 
    358 	    result[0] = (a[0] <= b[0]) ? 1.0f : 0.0f;
    359 	    result[1] = (a[1] <= b[1]) ? 1.0f : 0.0f;
    360 	    result[2] = (a[2] <= b[2]) ? 1.0f : 0.0f;
    361 	    result[3] = (a[3] <= b[3]) ? 1.0f : 0.0f;
    362 
    363 	    inst->Opcode = OPCODE_MOV;
    364 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
    365 
    366 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
    367 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
    368 
    369 	    progress = true;
    370 	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
    371 	    inst->Opcode = OPCODE_MOV;
    372 	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
    373 
    374 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
    375 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
    376 
    377 	    progress = true;
    378 	 }
    379 	 break;
    380 
    381       case OPCODE_SLT:
    382 	 if (src_regs_are_constant(inst, 2)) {
    383 	    float a[4];
    384 	    float b[4];
    385 	    float result[4];
    386 
    387 	    get_value(prog, &inst->SrcReg[0], a);
    388 	    get_value(prog, &inst->SrcReg[1], b);
    389 
    390 	    result[0] = (a[0] < b[0]) ? 1.0f : 0.0f;
    391 	    result[1] = (a[1] < b[1]) ? 1.0f : 0.0f;
    392 	    result[2] = (a[2] < b[2]) ? 1.0f : 0.0f;
    393 	    result[3] = (a[3] < b[3]) ? 1.0f : 0.0f;
    394 
    395 	    inst->Opcode = OPCODE_MOV;
    396 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
    397 
    398 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
    399 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
    400 
    401 	    progress = true;
    402 	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
    403 	    inst->Opcode = OPCODE_MOV;
    404 	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
    405 
    406 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
    407 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
    408 
    409 	    progress = true;
    410 	 }
    411 	 break;
    412 
    413       case OPCODE_SNE:
    414 	 if (src_regs_are_constant(inst, 2)) {
    415 	    float a[4];
    416 	    float b[4];
    417 	    float result[4];
    418 
    419 	    get_value(prog, &inst->SrcReg[0], a);
    420 	    get_value(prog, &inst->SrcReg[1], b);
    421 
    422 	    result[0] = (a[0] != b[0]) ? 1.0f : 0.0f;
    423 	    result[1] = (a[1] != b[1]) ? 1.0f : 0.0f;
    424 	    result[2] = (a[2] != b[2]) ? 1.0f : 0.0f;
    425 	    result[3] = (a[3] != b[3]) ? 1.0f : 0.0f;
    426 
    427 	    inst->Opcode = OPCODE_MOV;
    428 	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
    429 
    430 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
    431 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
    432 
    433 	    progress = true;
    434 	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
    435 	    inst->Opcode = OPCODE_MOV;
    436 	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
    437 
    438 	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
    439 	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
    440 
    441 	    progress = true;
    442 	 }
    443 	 break;
    444 
    445       default:
    446 	 break;
    447       }
    448    }
    449 
    450    return progress;
    451 }
    452