1 /* 2 * Copyright 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "main/glheader.h" 25 #include "main/context.h" 26 #include "main/macros.h" 27 #include "program.h" 28 #include "prog_instruction.h" 29 #include "prog_optimize.h" 30 #include "prog_parameter.h" 31 #include <stdbool.h> 32 33 static bool 34 src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs) 35 { 36 unsigned i; 37 38 for (i = 0; i < num_srcs; i++) { 39 if (inst->SrcReg[i].File != PROGRAM_CONSTANT) 40 return false; 41 if (inst->SrcReg[i].RelAddr) 42 return false; 43 } 44 45 return true; 46 } 47 48 static struct prog_src_register 49 src_reg_for_float(struct gl_program *prog, float val) 50 { 51 struct prog_src_register src; 52 unsigned swiz; 53 54 memset(&src, 0, sizeof(src)); 55 56 src.File = PROGRAM_CONSTANT; 57 src.Index = _mesa_add_unnamed_constant(prog->Parameters, 58 (gl_constant_value *) &val, 1, &swiz); 59 src.Swizzle = swiz; 60 return src; 61 } 62 63 static struct prog_src_register 64 src_reg_for_vec4(struct gl_program *prog, const float *val) 65 { 66 struct prog_src_register src; 67 unsigned swiz; 68 69 memset(&src, 0, sizeof(src)); 70 71 src.File = PROGRAM_CONSTANT; 72 src.Index = _mesa_add_unnamed_constant(prog->Parameters, 73 (gl_constant_value *) val, 4, &swiz); 74 src.Swizzle = swiz; 75 return src; 76 } 77 78 static bool 79 src_regs_are_same(const struct prog_src_register *a, 80 const struct prog_src_register *b) 81 { 82 return (a->File == b->File) 83 && (a->Index == b->Index) 84 && (a->Swizzle == b->Swizzle) 85 && (a->Negate == b->Negate) 86 && (a->RelAddr == 0) 87 && (b->RelAddr == 0); 88 } 89 90 static void 91 get_value(struct gl_program *prog, struct prog_src_register *r, float *data) 92 { 93 const gl_constant_value *const value = 94 prog->Parameters->ParameterValues[r->Index]; 95 96 data[0] = value[GET_SWZ(r->Swizzle, 0)].f; 97 data[1] = value[GET_SWZ(r->Swizzle, 1)].f; 98 data[2] = value[GET_SWZ(r->Swizzle, 2)].f; 99 data[3] = value[GET_SWZ(r->Swizzle, 3)].f; 100 101 if (r->Negate & 0x01) { 102 data[0] = -data[0]; 103 } 104 105 if (r->Negate & 0x02) { 106 data[1] = -data[1]; 107 } 108 109 if (r->Negate & 0x04) { 110 data[2] = -data[2]; 111 } 112 113 if (r->Negate & 0x08) { 114 data[3] = -data[3]; 115 } 116 } 117 118 /** 119 * Try to replace instructions that produce a constant result with simple moves 120 * 121 * The hope is that a following copy propagation pass will eliminate the 122 * unnecessary move instructions. 123 */ 124 GLboolean 125 _mesa_constant_fold(struct gl_program *prog) 126 { 127 bool progress = false; 128 unsigned i; 129 130 for (i = 0; i < prog->arb.NumInstructions; i++) { 131 struct prog_instruction *const inst = &prog->arb.Instructions[i]; 132 133 switch (inst->Opcode) { 134 case OPCODE_ADD: 135 if (src_regs_are_constant(inst, 2)) { 136 float a[4]; 137 float b[4]; 138 float result[4]; 139 140 get_value(prog, &inst->SrcReg[0], a); 141 get_value(prog, &inst->SrcReg[1], b); 142 143 result[0] = a[0] + b[0]; 144 result[1] = a[1] + b[1]; 145 result[2] = a[2] + b[2]; 146 result[3] = a[3] + b[3]; 147 148 inst->Opcode = OPCODE_MOV; 149 inst->SrcReg[0] = src_reg_for_vec4(prog, result); 150 151 inst->SrcReg[1].File = PROGRAM_UNDEFINED; 152 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; 153 154 progress = true; 155 } 156 break; 157 158 case OPCODE_CMP: 159 /* FINISHME: We could also optimize CMP instructions where the first 160 * FINISHME: source is a constant that is either all < 0.0 or all 161 * FINISHME: >= 0.0. 162 */ 163 if (src_regs_are_constant(inst, 3)) { 164 float a[4]; 165 float b[4]; 166 float c[4]; 167 float result[4]; 168 169 get_value(prog, &inst->SrcReg[0], a); 170 get_value(prog, &inst->SrcReg[1], b); 171 get_value(prog, &inst->SrcReg[2], c); 172 173 result[0] = a[0] < 0.0f ? b[0] : c[0]; 174 result[1] = a[1] < 0.0f ? b[1] : c[1]; 175 result[2] = a[2] < 0.0f ? b[2] : c[2]; 176 result[3] = a[3] < 0.0f ? b[3] : c[3]; 177 178 inst->Opcode = OPCODE_MOV; 179 inst->SrcReg[0] = src_reg_for_vec4(prog, result); 180 181 inst->SrcReg[1].File = PROGRAM_UNDEFINED; 182 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; 183 inst->SrcReg[2].File = PROGRAM_UNDEFINED; 184 inst->SrcReg[2].Swizzle = SWIZZLE_NOOP; 185 186 progress = true; 187 } 188 break; 189 190 case OPCODE_DP2: 191 case OPCODE_DP3: 192 case OPCODE_DP4: 193 if (src_regs_are_constant(inst, 2)) { 194 float a[4]; 195 float b[4]; 196 float result; 197 198 get_value(prog, &inst->SrcReg[0], a); 199 get_value(prog, &inst->SrcReg[1], b); 200 201 result = (a[0] * b[0]) + (a[1] * b[1]); 202 203 if (inst->Opcode >= OPCODE_DP3) 204 result += a[2] * b[2]; 205 206 if (inst->Opcode == OPCODE_DP4) 207 result += a[3] * b[3]; 208 209 inst->Opcode = OPCODE_MOV; 210 inst->SrcReg[0] = src_reg_for_float(prog, result); 211 212 inst->SrcReg[1].File = PROGRAM_UNDEFINED; 213 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; 214 215 progress = true; 216 } 217 break; 218 219 case OPCODE_MUL: 220 if (src_regs_are_constant(inst, 2)) { 221 float a[4]; 222 float b[4]; 223 float result[4]; 224 225 get_value(prog, &inst->SrcReg[0], a); 226 get_value(prog, &inst->SrcReg[1], b); 227 228 result[0] = a[0] * b[0]; 229 result[1] = a[1] * b[1]; 230 result[2] = a[2] * b[2]; 231 result[3] = a[3] * b[3]; 232 233 inst->Opcode = OPCODE_MOV; 234 inst->SrcReg[0] = src_reg_for_vec4(prog, result); 235 236 inst->SrcReg[1].File = PROGRAM_UNDEFINED; 237 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; 238 239 progress = true; 240 } 241 break; 242 243 case OPCODE_SGE: 244 if (src_regs_are_constant(inst, 2)) { 245 float a[4]; 246 float b[4]; 247 float result[4]; 248 249 get_value(prog, &inst->SrcReg[0], a); 250 get_value(prog, &inst->SrcReg[1], b); 251 252 result[0] = (a[0] >= b[0]) ? 1.0f : 0.0f; 253 result[1] = (a[1] >= b[1]) ? 1.0f : 0.0f; 254 result[2] = (a[2] >= b[2]) ? 1.0f : 0.0f; 255 result[3] = (a[3] >= b[3]) ? 1.0f : 0.0f; 256 257 inst->Opcode = OPCODE_MOV; 258 inst->SrcReg[0] = src_reg_for_vec4(prog, result); 259 260 inst->SrcReg[1].File = PROGRAM_UNDEFINED; 261 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; 262 263 progress = true; 264 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { 265 inst->Opcode = OPCODE_MOV; 266 inst->SrcReg[0] = src_reg_for_float(prog, 1.0f); 267 268 inst->SrcReg[1].File = PROGRAM_UNDEFINED; 269 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; 270 271 progress = true; 272 } 273 break; 274 275 case OPCODE_SLT: 276 if (src_regs_are_constant(inst, 2)) { 277 float a[4]; 278 float b[4]; 279 float result[4]; 280 281 get_value(prog, &inst->SrcReg[0], a); 282 get_value(prog, &inst->SrcReg[1], b); 283 284 result[0] = (a[0] < b[0]) ? 1.0f : 0.0f; 285 result[1] = (a[1] < b[1]) ? 1.0f : 0.0f; 286 result[2] = (a[2] < b[2]) ? 1.0f : 0.0f; 287 result[3] = (a[3] < b[3]) ? 1.0f : 0.0f; 288 289 inst->Opcode = OPCODE_MOV; 290 inst->SrcReg[0] = src_reg_for_vec4(prog, result); 291 292 inst->SrcReg[1].File = PROGRAM_UNDEFINED; 293 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; 294 295 progress = true; 296 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { 297 inst->Opcode = OPCODE_MOV; 298 inst->SrcReg[0] = src_reg_for_float(prog, 0.0f); 299 300 inst->SrcReg[1].File = PROGRAM_UNDEFINED; 301 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; 302 303 progress = true; 304 } 305 break; 306 307 default: 308 break; 309 } 310 } 311 312 return progress; 313 } 314