1 /* 2 * Copyright 2009 Nicolai Hhnle <nhaehnle (at) gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */ 22 23 #include "radeon_compiler.h" 24 25 #include <stdarg.h> 26 #include <stdio.h> 27 #include <stdlib.h> 28 29 #include "radeon_dataflow.h" 30 #include "radeon_program.h" 31 #include "radeon_program_pair.h" 32 #include "radeon_compiler_util.h" 33 34 35 void rc_init(struct radeon_compiler * c) 36 { 37 memset(c, 0, sizeof(*c)); 38 39 memory_pool_init(&c->Pool); 40 c->Program.Instructions.Prev = &c->Program.Instructions; 41 c->Program.Instructions.Next = &c->Program.Instructions; 42 c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; 43 } 44 45 void rc_destroy(struct radeon_compiler * c) 46 { 47 rc_constants_destroy(&c->Program.Constants); 48 memory_pool_destroy(&c->Pool); 49 free(c->ErrorMsg); 50 } 51 52 void rc_debug(struct radeon_compiler * c, const char * fmt, ...) 53 { 54 va_list ap; 55 56 if (!(c->Debug & RC_DBG_LOG)) 57 return; 58 59 va_start(ap, fmt); 60 vfprintf(stderr, fmt, ap); 61 va_end(ap); 62 } 63 64 void rc_error(struct radeon_compiler * c, const char * fmt, ...) 65 { 66 va_list ap; 67 68 c->Error = 1; 69 70 if (!c->ErrorMsg) { 71 /* Only remember the first error */ 72 char buf[1024]; 73 int written; 74 75 va_start(ap, fmt); 76 written = vsnprintf(buf, sizeof(buf), fmt, ap); 77 va_end(ap); 78 79 if (written < sizeof(buf)) { 80 c->ErrorMsg = strdup(buf); 81 } else { 82 c->ErrorMsg = malloc(written + 1); 83 84 va_start(ap, fmt); 85 vsnprintf(c->ErrorMsg, written + 1, fmt, ap); 86 va_end(ap); 87 } 88 } 89 90 if (c->Debug & RC_DBG_LOG) { 91 fprintf(stderr, "r300compiler error: "); 92 93 va_start(ap, fmt); 94 vfprintf(stderr, fmt, ap); 95 va_end(ap); 96 } 97 } 98 99 int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion) 100 { 101 rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion); 102 return 1; 103 } 104 105 /** 106 * Recompute c->Program.InputsRead and c->Program.OutputsWritten 107 * based on which inputs and outputs are actually referenced 108 * in program instructions. 109 */ 110 void rc_calculate_inputs_outputs(struct radeon_compiler * c) 111 { 112 struct rc_instruction *inst; 113 114 c->Program.InputsRead = 0; 115 c->Program.OutputsWritten = 0; 116 117 for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) 118 { 119 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 120 int i; 121 122 for (i = 0; i < opcode->NumSrcRegs; ++i) { 123 if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT) 124 c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index; 125 } 126 127 if (opcode->HasDstReg) { 128 if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) 129 c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index; 130 } 131 } 132 } 133 134 /** 135 * Rewrite the program such that everything that source the given input 136 * register will source new_input instead. 137 */ 138 void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input) 139 { 140 struct rc_instruction * inst; 141 142 c->Program.InputsRead &= ~(1 << input); 143 144 for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { 145 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 146 unsigned i; 147 148 for(i = 0; i < opcode->NumSrcRegs; ++i) { 149 if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) { 150 inst->U.I.SrcReg[i].File = new_input.File; 151 inst->U.I.SrcReg[i].Index = new_input.Index; 152 inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle); 153 if (!inst->U.I.SrcReg[i].Abs) { 154 inst->U.I.SrcReg[i].Negate ^= new_input.Negate; 155 inst->U.I.SrcReg[i].Abs = new_input.Abs; 156 } 157 158 c->Program.InputsRead |= 1 << new_input.Index; 159 } 160 } 161 } 162 } 163 164 165 /** 166 * Rewrite the program such that everything that writes into the given 167 * output register will instead write to new_output. The new_output 168 * writemask is honoured. 169 */ 170 void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask) 171 { 172 struct rc_instruction * inst; 173 174 c->Program.OutputsWritten &= ~(1 << output); 175 176 for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { 177 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 178 179 if (opcode->HasDstReg) { 180 if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { 181 inst->U.I.DstReg.Index = new_output; 182 inst->U.I.DstReg.WriteMask &= writemask; 183 184 c->Program.OutputsWritten |= 1 << new_output; 185 } 186 } 187 } 188 } 189 190 191 /** 192 * Rewrite the program such that a given output is duplicated. 193 */ 194 void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output) 195 { 196 unsigned tempreg = rc_find_free_temporary(c); 197 struct rc_instruction * inst; 198 199 for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { 200 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 201 202 if (opcode->HasDstReg) { 203 if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { 204 inst->U.I.DstReg.File = RC_FILE_TEMPORARY; 205 inst->U.I.DstReg.Index = tempreg; 206 } 207 } 208 } 209 210 inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); 211 inst->U.I.Opcode = RC_OPCODE_MOV; 212 inst->U.I.DstReg.File = RC_FILE_OUTPUT; 213 inst->U.I.DstReg.Index = output; 214 215 inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; 216 inst->U.I.SrcReg[0].Index = tempreg; 217 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; 218 219 inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); 220 inst->U.I.Opcode = RC_OPCODE_MOV; 221 inst->U.I.DstReg.File = RC_FILE_OUTPUT; 222 inst->U.I.DstReg.Index = dup_output; 223 224 inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; 225 inst->U.I.SrcReg[0].Index = tempreg; 226 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; 227 228 c->Program.OutputsWritten |= 1 << dup_output; 229 } 230 231 232 /** 233 * Introduce standard code fragment to deal with fragment.position. 234 */ 235 void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, 236 int full_vtransform) 237 { 238 unsigned tempregi = rc_find_free_temporary(c); 239 struct rc_instruction * inst_rcp; 240 struct rc_instruction * inst_mul; 241 struct rc_instruction * inst_mad; 242 struct rc_instruction * inst; 243 244 c->Program.InputsRead &= ~(1 << wpos); 245 c->Program.InputsRead |= 1 << new_input; 246 247 /* perspective divide */ 248 inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions); 249 inst_rcp->U.I.Opcode = RC_OPCODE_RCP; 250 251 inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; 252 inst_rcp->U.I.DstReg.Index = tempregi; 253 inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; 254 255 inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT; 256 inst_rcp->U.I.SrcReg[0].Index = new_input; 257 inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; 258 259 inst_mul = rc_insert_new_instruction(c, inst_rcp); 260 inst_mul->U.I.Opcode = RC_OPCODE_MUL; 261 262 inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; 263 inst_mul->U.I.DstReg.Index = tempregi; 264 inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; 265 266 inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT; 267 inst_mul->U.I.SrcReg[0].Index = new_input; 268 269 inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; 270 inst_mul->U.I.SrcReg[1].Index = tempregi; 271 inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; 272 273 /* viewport transformation */ 274 inst_mad = rc_insert_new_instruction(c, inst_mul); 275 inst_mad->U.I.Opcode = RC_OPCODE_MAD; 276 277 inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; 278 inst_mad->U.I.DstReg.Index = tempregi; 279 inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; 280 281 inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; 282 inst_mad->U.I.SrcReg[0].Index = tempregi; 283 inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; 284 285 inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; 286 inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; 287 288 inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT; 289 inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0; 290 291 if (full_vtransform) { 292 inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0); 293 inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0); 294 } else { 295 inst_mad->U.I.SrcReg[1].Index = 296 inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); 297 } 298 299 for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) { 300 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 301 unsigned i; 302 303 for(i = 0; i < opcode->NumSrcRegs; i++) { 304 if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && 305 inst->U.I.SrcReg[i].Index == wpos) { 306 inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; 307 inst->U.I.SrcReg[i].Index = tempregi; 308 } 309 } 310 } 311 } 312 313 314 /** 315 * The FACE input in hardware contains 1 if it's a back face, 0 otherwise. 316 * Gallium and OpenGL define it the other way around. 317 * 318 * So let's just negate FACE at the beginning of the shader and rewrite the rest 319 * of the shader to read from the newly allocated temporary. 320 */ 321 void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face) 322 { 323 unsigned tempregi = rc_find_free_temporary(c); 324 struct rc_instruction *inst_add; 325 struct rc_instruction *inst; 326 327 /* perspective divide */ 328 inst_add = rc_insert_new_instruction(c, &c->Program.Instructions); 329 inst_add->U.I.Opcode = RC_OPCODE_ADD; 330 331 inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; 332 inst_add->U.I.DstReg.Index = tempregi; 333 inst_add->U.I.DstReg.WriteMask = RC_MASK_X; 334 335 inst_add->U.I.SrcReg[0].File = RC_FILE_NONE; 336 inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; 337 338 inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT; 339 inst_add->U.I.SrcReg[1].Index = face; 340 inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; 341 inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW; 342 343 for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) { 344 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 345 unsigned i; 346 347 for(i = 0; i < opcode->NumSrcRegs; i++) { 348 if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && 349 inst->U.I.SrcReg[i].Index == face) { 350 inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; 351 inst->U.I.SrcReg[i].Index = tempregi; 352 } 353 } 354 } 355 } 356 357 static void reg_count_callback(void * userdata, struct rc_instruction * inst, 358 rc_register_file file, unsigned int index, unsigned int mask) 359 { 360 struct rc_program_stats *s = userdata; 361 if (file == RC_FILE_TEMPORARY) 362 (int)index > s->num_temp_regs ? s->num_temp_regs = index : 0; 363 if (file == RC_FILE_INLINE) 364 s->num_inline_literals++; 365 } 366 367 void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) 368 { 369 struct rc_instruction * tmp; 370 memset(s, 0, sizeof(*s)); 371 372 for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions; 373 tmp = tmp->Next){ 374 const struct rc_opcode_info * info; 375 rc_for_all_reads_mask(tmp, reg_count_callback, s); 376 if (tmp->Type == RC_INSTRUCTION_NORMAL) { 377 info = rc_get_opcode_info(tmp->U.I.Opcode); 378 if (info->Opcode == RC_OPCODE_BEGIN_TEX) 379 continue; 380 if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE) 381 s->num_presub_ops++; 382 } else { 383 if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) 384 s->num_presub_ops++; 385 if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) 386 s->num_presub_ops++; 387 /* Assuming alpha will never be a flow control or 388 * a tex instruction. */ 389 if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP) 390 s->num_alpha_insts++; 391 if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP) 392 s->num_rgb_insts++; 393 if (tmp->U.P.RGB.Omod != RC_OMOD_MUL_1 && 394 tmp->U.P.RGB.Omod != RC_OMOD_DISABLE) { 395 s->num_omod_ops++; 396 } 397 if (tmp->U.P.Alpha.Omod != RC_OMOD_MUL_1 && 398 tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) { 399 s->num_omod_ops++; 400 } 401 info = rc_get_opcode_info(tmp->U.P.RGB.Opcode); 402 } 403 if (info->IsFlowControl) 404 s->num_fc_insts++; 405 if (info->HasTexture) 406 s->num_tex_insts++; 407 s->num_insts++; 408 } 409 /* Increment here because the reg_count_callback store the max 410 * temporary reg index in s->nun_temp_regs. */ 411 s->num_temp_regs++; 412 } 413 414 static void print_stats(struct radeon_compiler * c) 415 { 416 struct rc_program_stats s; 417 418 if (c->initial_num_insts <= 5) 419 return; 420 421 rc_get_stats(c, &s); 422 423 switch (c->type) { 424 case RC_VERTEX_PROGRAM: 425 fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n" 426 "~%4u Instructions\n" 427 "~%4u Flow Control Instructions\n" 428 "~%4u Temporary Registers\n" 429 "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n", 430 s.num_insts, s.num_fc_insts, s.num_temp_regs); 431 break; 432 433 case RC_FRAGMENT_PROGRAM: 434 fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n" 435 "~%4u Instructions\n" 436 "~%4u Vector Instructions (RGB)\n" 437 "~%4u Scalar Instructions (Alpha)\n" 438 "~%4u Flow Control Instructions\n" 439 "~%4u Texture Instructions\n" 440 "~%4u Presub Operations\n" 441 "~%4u OMOD Operations\n" 442 "~%4u Temporary Registers\n" 443 "~%4u Inline Literals\n" 444 "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n", 445 s.num_insts, s.num_rgb_insts, s.num_alpha_insts, 446 s.num_fc_insts, s.num_tex_insts, s.num_presub_ops, 447 s.num_omod_ops, s.num_temp_regs, s.num_inline_literals); 448 break; 449 default: 450 assert(0); 451 } 452 } 453 454 static const char *shader_name[RC_NUM_PROGRAM_TYPES] = { 455 "Vertex Program", 456 "Fragment Program" 457 }; 458 459 void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list) 460 { 461 for (unsigned i = 0; list[i].name; i++) { 462 if (list[i].predicate) { 463 list[i].run(c, list[i].user); 464 465 if (c->Error) 466 return; 467 468 if ((c->Debug & RC_DBG_LOG) && list[i].dump) { 469 fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name); 470 rc_print_program(&c->Program); 471 } 472 } 473 } 474 } 475 476 /* Executes a list of compiler passes given in the parameter 'list'. */ 477 void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list) 478 { 479 struct rc_program_stats s; 480 481 rc_get_stats(c, &s); 482 c->initial_num_insts = s.num_insts; 483 484 if (c->Debug & RC_DBG_LOG) { 485 fprintf(stderr, "%s: before compilation\n", shader_name[c->type]); 486 rc_print_program(&c->Program); 487 } 488 489 rc_run_compiler_passes(c, list); 490 491 if (c->Debug & RC_DBG_STATS) 492 print_stats(c); 493 } 494 495 void rc_validate_final_shader(struct radeon_compiler *c, void *user) 496 { 497 /* Check the number of constants. */ 498 if (c->Program.Constants.Count > c->max_constants) { 499 rc_error(c, "Too many constants. Max: %i, Got: %i\n", 500 c->max_constants, c->Program.Constants.Count); 501 } 502 } 503