1 /* 2 * Copyright 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "main/glheader.h" 25 #include "main/context.h" 26 #include "main/macros.h" 27 #include "program.h" 28 #include "prog_instruction.h" 29 #include "prog_optimize.h" 30 #include "prog_parameter.h" 31 #include <stdbool.h> 32 33 static bool 34 src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs) 35 { 36 unsigned i; 37 38 for (i = 0; i < num_srcs; i++) { 39 if (inst->SrcReg[i].File != PROGRAM_CONSTANT) 40 return false; 41 } 42 43 return true; 44 } 45 46 static struct prog_src_register 47 src_reg_for_float(struct gl_program *prog, float val) 48 { 49 struct prog_src_register src; 50 unsigned swiz; 51 52 memset(&src, 0, sizeof(src)); 53 54 src.File = PROGRAM_CONSTANT; 55 src.Index = _mesa_add_unnamed_constant(prog->Parameters, 56 (gl_constant_value *) &val, 1, &swiz); 57 src.Swizzle = swiz; 58 return src; 59 } 60 61 static struct prog_src_register 62 src_reg_for_vec4(struct gl_program *prog, const float *val) 63 { 64 struct prog_src_register src; 65 unsigned swiz; 66 67 memset(&src, 0, sizeof(src)); 68 69 src.File = PROGRAM_CONSTANT; 70 src.Index = _mesa_add_unnamed_constant(prog->Parameters, 71 (gl_constant_value *) val, 4, &swiz); 72 src.Swizzle = swiz; 73 return src; 74 } 75 76 static bool 77 src_regs_are_same(const struct prog_src_register *a, 78 const struct prog_src_register *b) 79 { 80 return (a->File == b->File) 81 && (a->Index == b->Index) 82 && (a->Swizzle == b->Swizzle) 83 && (a->Abs == b->Abs) 84 && (a->Negate == b->Negate) 85 && (a->RelAddr == 0) 86 && (b->RelAddr == 0); 87 } 88 89 static void 90 get_value(struct gl_program *prog, struct prog_src_register *r, float *data) 91 { 92 const gl_constant_value *const value = 93 prog->Parameters->ParameterValues[r->Index]; 94 95 data[0] = value[GET_SWZ(r->Swizzle, 0)].f; 96 data[1] = value[GET_SWZ(r->Swizzle, 1)].f; 97 data[2] = value[GET_SWZ(r->Swizzle, 2)].f; 98 data[3] = value[GET_SWZ(r->Swizzle, 3)].f; 99 100 if (r->Abs) { 101 data[0] = fabsf(data[0]); 102 data[1] = fabsf(data[1]); 103 data[2] = fabsf(data[2]); 104 data[3] = fabsf(data[3]); 105 } 106 107 if (r->Negate & 0x01) { 108 data[0] = -data[0]; 109 } 110 111 if (r->Negate & 0x02) { 112 data[1] = -data[1]; 113 } 114 115 if (r->Negate & 0x04) { 116 data[2] = -data[2]; 117 } 118 119 if (r->Negate & 0x08) { 120 data[3] = -data[3]; 121 } 122 } 123 124 /** 125 * Try to replace instructions that produce a constant result with simple moves 126 * 127 * The hope is that a following copy propagation pass will eliminate the 128 * unnecessary move instructions. 129 */ 130 GLboolean 131 _mesa_constant_fold(struct gl_program *prog) 132 { 133 bool progress = false; 134 unsigned i; 135 136 for (i = 0; i < prog->NumInstructions; i++) { 137 struct prog_instruction *const inst = &prog->Instructions[i]; 138 139 switch (inst->Opcode) { 140 case OPCODE_ADD: 141 if (src_regs_are_constant(inst, 2)) { 142 float a[4]; 143 float b[4]; 144 float result[4]; 145 146 get_value(prog, &inst->SrcReg[0], a); 147 get_value(prog, &inst->SrcReg[1], b); 148 149 result[0] = a[0] + b[0]; 150 result[1] = a[1] + b[1]; 151 result[2] = a[2] + b[2]; 152 result[3] = a[3] + b[3]; 153 154 inst->Opcode = OPCODE_MOV; 155 inst->SrcReg[0] = src_reg_for_vec4(prog, result); 156 157 inst->SrcReg[1].File = PROGRAM_UNDEFINED; 158 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; 159 160 progress = true; 161 } 162 break; 163 164 case OPCODE_CMP: 165 /* FINISHME: We could also optimize CMP instructions where the first 166 * FINISHME: source is a constant that is either all < 0.0 or all 167 * FINISHME: >= 0.0. 168 */ 169 if (src_regs_are_constant(inst, 3)) { 170 float a[4]; 171 float b[4]; 172 float c[4]; 173 float result[4]; 174 175 get_value(prog, &inst->SrcReg[0], a); 176 get_value(prog, &inst->SrcReg[1], b); 177 get_value(prog, &inst->SrcReg[2], c); 178 179 result[0] = a[0] < 0.0f ? b[0] : c[0]; 180 result[1] = a[1] < 0.0f ? b[1] : c[1]; 181 result[2] = a[2] < 0.0f ? b[2] : c[2]; 182 result[3] = a[3] < 0.0f ? b[3] : c[3]; 183 184 inst->Opcode = OPCODE_MOV; 185 inst->SrcReg[0] = src_reg_for_vec4(prog, result); 186 187 inst->SrcReg[1].File = PROGRAM_UNDEFINED; 188 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; 189 inst->SrcReg[2].File = PROGRAM_UNDEFINED; 190 inst->SrcReg[2].Swizzle = SWIZZLE_NOOP; 191 192 progress = true; 193 } 194 break; 195 196 case OPCODE_DP2: 197 case OPCODE_DP3: 198 case OPCODE_DP4: 199 if (src_regs_are_constant(inst, 2)) { 200 float a[4]; 201 float b[4]; 202 float result; 203 204 get_value(prog, &inst->SrcReg[0], a); 205 get_value(prog, &inst->SrcReg[1], b); 206 207 /* It seems like a loop could be used here, but we cleverly put 208 * DP2A between DP2 and DP3. Subtracting DP2 (or similar) from 209 * the opcode results in various failures of the loop control. 210 */ 211 result = (a[0] * b[0]) + (a[1] * b[1]); 212 213 if (inst->Opcode >= OPCODE_DP3) 214 result += a[2] * b[2]; 215 216 if (inst->Opcode == OPCODE_DP4) 217 result += a[3] * b[3]; 218 219 inst->Opcode = OPCODE_MOV; 220 inst->SrcReg[0] = src_reg_for_float(prog, result); 221 222 inst->SrcReg[1].File = PROGRAM_UNDEFINED; 223 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; 224 225 progress = true; 226 } 227 break; 228 229 case OPCODE_MUL: 230 if (src_regs_are_constant(inst, 2)) { 231 float a[4]; 232 float b[4]; 233 float result[4]; 234 235 get_value(prog, &inst->SrcReg[0], a); 236 get_value(prog, &inst->SrcReg[1], b); 237 238 result[0] = a[0] * b[0]; 239 result[1] = a[1] * b[1]; 240 result[2] = a[2] * b[2]; 241 result[3] = a[3] * b[3]; 242 243 inst->Opcode = OPCODE_MOV; 244 inst->SrcReg[0] = src_reg_for_vec4(prog, result); 245 246 inst->SrcReg[1].File = PROGRAM_UNDEFINED; 247 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; 248 249 progress = true; 250 } 251 break; 252 253 case OPCODE_SEQ: 254 if (src_regs_are_constant(inst, 2)) { 255 float a[4]; 256 float b[4]; 257 float result[4]; 258 259 get_value(prog, &inst->SrcReg[0], a); 260 get_value(prog, &inst->SrcReg[1], b); 261 262 result[0] = (a[0] == b[0]) ? 1.0f : 0.0f; 263 result[1] = (a[1] == b[1]) ? 1.0f : 0.0f; 264 result[2] = (a[2] == b[2]) ? 1.0f : 0.0f; 265 result[3] = (a[3] == b[3]) ? 1.0f : 0.0f; 266 267 inst->Opcode = OPCODE_MOV; 268 inst->SrcReg[0] = src_reg_for_vec4(prog, result); 269 270 inst->SrcReg[1].File = PROGRAM_UNDEFINED; 271 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; 272 273 progress = true; 274 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { 275 inst->Opcode = OPCODE_MOV; 276 inst->SrcReg[0] = src_reg_for_float(prog, 1.0f); 277 278 inst->SrcReg[1].File = PROGRAM_UNDEFINED; 279 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; 280 281 progress = true; 282 } 283 break; 284 285 case OPCODE_SGE: 286 if (src_regs_are_constant(inst, 2)) { 287 float a[4]; 288 float b[4]; 289 float result[4]; 290 291 get_value(prog, &inst->SrcReg[0], a); 292 get_value(prog, &inst->SrcReg[1], b); 293 294 result[0] = (a[0] >= b[0]) ? 1.0f : 0.0f; 295 result[1] = (a[1] >= b[1]) ? 1.0f : 0.0f; 296 result[2] = (a[2] >= b[2]) ? 1.0f : 0.0f; 297 result[3] = (a[3] >= b[3]) ? 1.0f : 0.0f; 298 299 inst->Opcode = OPCODE_MOV; 300 inst->SrcReg[0] = src_reg_for_vec4(prog, result); 301 302 inst->SrcReg[1].File = PROGRAM_UNDEFINED; 303 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; 304 305 progress = true; 306 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { 307 inst->Opcode = OPCODE_MOV; 308 inst->SrcReg[0] = src_reg_for_float(prog, 1.0f); 309 310 inst->SrcReg[1].File = PROGRAM_UNDEFINED; 311 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; 312 313 progress = true; 314 } 315 break; 316 317 case OPCODE_SGT: 318 if (src_regs_are_constant(inst, 2)) { 319 float a[4]; 320 float b[4]; 321 float result[4]; 322 323 get_value(prog, &inst->SrcReg[0], a); 324 get_value(prog, &inst->SrcReg[1], b); 325 326 result[0] = (a[0] > b[0]) ? 1.0f : 0.0f; 327 result[1] = (a[1] > b[1]) ? 1.0f : 0.0f; 328 result[2] = (a[2] > b[2]) ? 1.0f : 0.0f; 329 result[3] = (a[3] > b[3]) ? 1.0f : 0.0f; 330 331 inst->Opcode = OPCODE_MOV; 332 inst->SrcReg[0] = src_reg_for_vec4(prog, result); 333 334 inst->SrcReg[1].File = PROGRAM_UNDEFINED; 335 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; 336 337 progress = true; 338 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { 339 inst->Opcode = OPCODE_MOV; 340 inst->SrcReg[0] = src_reg_for_float(prog, 0.0f); 341 342 inst->SrcReg[1].File = PROGRAM_UNDEFINED; 343 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; 344 345 progress = true; 346 } 347 break; 348 349 case OPCODE_SLE: 350 if (src_regs_are_constant(inst, 2)) { 351 float a[4]; 352 float b[4]; 353 float result[4]; 354 355 get_value(prog, &inst->SrcReg[0], a); 356 get_value(prog, &inst->SrcReg[1], b); 357 358 result[0] = (a[0] <= b[0]) ? 1.0f : 0.0f; 359 result[1] = (a[1] <= b[1]) ? 1.0f : 0.0f; 360 result[2] = (a[2] <= b[2]) ? 1.0f : 0.0f; 361 result[3] = (a[3] <= b[3]) ? 1.0f : 0.0f; 362 363 inst->Opcode = OPCODE_MOV; 364 inst->SrcReg[0] = src_reg_for_vec4(prog, result); 365 366 inst->SrcReg[1].File = PROGRAM_UNDEFINED; 367 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; 368 369 progress = true; 370 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { 371 inst->Opcode = OPCODE_MOV; 372 inst->SrcReg[0] = src_reg_for_float(prog, 1.0f); 373 374 inst->SrcReg[1].File = PROGRAM_UNDEFINED; 375 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; 376 377 progress = true; 378 } 379 break; 380 381 case OPCODE_SLT: 382 if (src_regs_are_constant(inst, 2)) { 383 float a[4]; 384 float b[4]; 385 float result[4]; 386 387 get_value(prog, &inst->SrcReg[0], a); 388 get_value(prog, &inst->SrcReg[1], b); 389 390 result[0] = (a[0] < b[0]) ? 1.0f : 0.0f; 391 result[1] = (a[1] < b[1]) ? 1.0f : 0.0f; 392 result[2] = (a[2] < b[2]) ? 1.0f : 0.0f; 393 result[3] = (a[3] < b[3]) ? 1.0f : 0.0f; 394 395 inst->Opcode = OPCODE_MOV; 396 inst->SrcReg[0] = src_reg_for_vec4(prog, result); 397 398 inst->SrcReg[1].File = PROGRAM_UNDEFINED; 399 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; 400 401 progress = true; 402 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { 403 inst->Opcode = OPCODE_MOV; 404 inst->SrcReg[0] = src_reg_for_float(prog, 0.0f); 405 406 inst->SrcReg[1].File = PROGRAM_UNDEFINED; 407 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; 408 409 progress = true; 410 } 411 break; 412 413 case OPCODE_SNE: 414 if (src_regs_are_constant(inst, 2)) { 415 float a[4]; 416 float b[4]; 417 float result[4]; 418 419 get_value(prog, &inst->SrcReg[0], a); 420 get_value(prog, &inst->SrcReg[1], b); 421 422 result[0] = (a[0] != b[0]) ? 1.0f : 0.0f; 423 result[1] = (a[1] != b[1]) ? 1.0f : 0.0f; 424 result[2] = (a[2] != b[2]) ? 1.0f : 0.0f; 425 result[3] = (a[3] != b[3]) ? 1.0f : 0.0f; 426 427 inst->Opcode = OPCODE_MOV; 428 inst->SrcReg[0] = src_reg_for_vec4(prog, result); 429 430 inst->SrcReg[1].File = PROGRAM_UNDEFINED; 431 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; 432 433 progress = true; 434 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) { 435 inst->Opcode = OPCODE_MOV; 436 inst->SrcReg[0] = src_reg_for_float(prog, 0.0f); 437 438 inst->SrcReg[1].File = PROGRAM_UNDEFINED; 439 inst->SrcReg[1].Swizzle = SWIZZLE_NOOP; 440 441 progress = true; 442 } 443 break; 444 445 default: 446 break; 447 } 448 } 449 450 return progress; 451 } 452