1 /* 2 * Copyright (C) 2009 Nicolai Haehnle. 3 * Copyright 2012 Advanced Micro Devices, Inc. 4 * 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining 8 * a copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sublicense, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial 17 * portions of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 * Authors: 28 * Nicolai Haehnle 29 * Tom Stellard <thomas.stellard (at) amd.com> 30 */ 31 32 #include "radeon_dataflow.h" 33 34 #include "radeon_code.h" 35 #include "radeon_compiler.h" 36 #include "radeon_compiler_util.h" 37 #include "radeon_swizzle.h" 38 39 40 static void rewrite_source(struct radeon_compiler * c, 41 struct rc_instruction * inst, unsigned src) 42 { 43 struct rc_swizzle_split split; 44 unsigned int tempreg = rc_find_free_temporary(c); 45 unsigned int usemask; 46 47 usemask = 0; 48 for(unsigned int chan = 0; chan < 4; ++chan) { 49 if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED) 50 usemask |= 1 << chan; 51 } 52 53 c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split); 54 55 for(unsigned int phase = 0; phase < split.NumPhases; ++phase) { 56 struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev); 57 unsigned int phase_refmask; 58 unsigned int masked_negate; 59 60 mov->U.I.Opcode = RC_OPCODE_MOV; 61 mov->U.I.DstReg.File = RC_FILE_TEMPORARY; 62 mov->U.I.DstReg.Index = tempreg; 63 mov->U.I.DstReg.WriteMask = split.Phase[phase]; 64 mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src]; 65 mov->U.I.PreSub = inst->U.I.PreSub; 66 67 phase_refmask = 0; 68 for(unsigned int chan = 0; chan < 4; ++chan) { 69 if (!GET_BIT(split.Phase[phase], chan)) 70 SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED); 71 else 72 phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan); 73 } 74 75 phase_refmask &= RC_MASK_XYZW; 76 77 masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate; 78 if (masked_negate == 0) 79 mov->U.I.SrcReg[0].Negate = 0; 80 else if (masked_negate == split.Phase[phase]) 81 mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW; 82 83 } 84 85 inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY; 86 inst->U.I.SrcReg[src].Index = tempreg; 87 inst->U.I.SrcReg[src].Swizzle = 0; 88 inst->U.I.SrcReg[src].Negate = RC_MASK_NONE; 89 inst->U.I.SrcReg[src].Abs = 0; 90 for(unsigned int chan = 0; chan < 4; ++chan) { 91 SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, 92 GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED); 93 } 94 } 95 96 /** 97 * This function will attempt to rewrite non-native swizzles that read from 98 * immediate registers by rearranging the immediates to allow the 99 * instruction to use native swizzles. 100 */ 101 static unsigned try_rewrite_constant(struct radeon_compiler *c, 102 struct rc_src_register *reg) 103 { 104 unsigned new_swizzle, chan, swz0, swz1, swz2, swz3, found_swizzle, swz; 105 unsigned all_inline = 0; 106 float imms[4] = {0.0f, 0.0f, 0.0f, 0.0f}; 107 108 if (!rc_src_reg_is_immediate(c, reg->File, reg->Index)) { 109 /* The register does not contain immediates, but if all 110 * the swizzles are inline constants, we can still rewrite 111 * it. */ 112 113 new_swizzle = RC_SWIZZLE_XYZW; 114 for (chan = 0 ; chan < 4; chan++) { 115 unsigned swz = GET_SWZ(reg->Swizzle, chan); 116 if (swz <= RC_SWIZZLE_W) { 117 return 0; 118 } 119 if (swz == RC_SWIZZLE_UNUSED) { 120 SET_SWZ(new_swizzle, chan, RC_SWIZZLE_UNUSED); 121 } 122 } 123 all_inline = 1; 124 } else { 125 new_swizzle = reg->Swizzle; 126 } 127 128 swz = RC_SWIZZLE_UNUSED; 129 found_swizzle = 1; 130 /* Check if all channels have the same swizzle. If they do we can skip 131 * the search for a native swizzle. We only need to check the first 132 * three channels, because any swizzle is legal in the fourth channel. 133 */ 134 for (chan = 0; chan < 3; chan++) { 135 unsigned chan_swz = GET_SWZ(reg->Swizzle, chan); 136 if (chan_swz == RC_SWIZZLE_UNUSED) { 137 continue; 138 } 139 if (swz == RC_SWIZZLE_UNUSED) { 140 swz = chan_swz; 141 } else if (swz != chan_swz) { 142 found_swizzle = 0; 143 break; 144 } 145 } 146 147 /* Find a legal swizzle */ 148 149 /* This loop attempts to find a native swizzle where all the 150 * channels are different. */ 151 while (!found_swizzle && !all_inline) { 152 swz0 = GET_SWZ(new_swizzle, 0); 153 swz1 = GET_SWZ(new_swizzle, 1); 154 swz2 = GET_SWZ(new_swizzle, 2); 155 156 /* Swizzle .W. is never legal. */ 157 if (swz1 == RC_SWIZZLE_W || 158 swz1 == RC_SWIZZLE_UNUSED || 159 swz1 == RC_SWIZZLE_ZERO || 160 swz1 == RC_SWIZZLE_HALF || 161 swz1 == RC_SWIZZLE_ONE) { 162 /* We chose Z, because there are two non-repeating 163 * swizzle combinations of the form .Z. There are 164 * only one combination each for .X. and .Y. */ 165 SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z); 166 continue; 167 } 168 169 if (swz2 == RC_SWIZZLE_UNUSED) { 170 /* We choose Y, because there are two non-repeating 171 * swizzle combinations of the form ..Y */ 172 SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y); 173 continue; 174 } 175 176 switch (swz0) { 177 /* X.. */ 178 case RC_SWIZZLE_X: 179 /* Legal swizzles that start with X: XYZ, XXX */ 180 switch (swz1) { 181 /* XX. */ 182 case RC_SWIZZLE_X: 183 /* The new swizzle will be: 184 * ZXY (XX. => ZX. => ZXY) */ 185 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z); 186 break; 187 /* XY. */ 188 case RC_SWIZZLE_Y: 189 /* The new swizzle is XYZ */ 190 SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Z); 191 found_swizzle = 1; 192 break; 193 /* XZ. */ 194 case RC_SWIZZLE_Z: 195 /* XZZ */ 196 if (swz2 == RC_SWIZZLE_Z) { 197 /* The new swizzle is XYZ */ 198 SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Y); 199 found_swizzle = 1; 200 } else { /* XZ[^Z] */ 201 /* The new swizzle will be: 202 * YZX (XZ. => YZ. => YZX) */ 203 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Y); 204 } 205 break; 206 /* XW. Should have already been handled. */ 207 case RC_SWIZZLE_W: 208 assert(0); 209 break; 210 } 211 break; 212 /* Y.. */ 213 case RC_SWIZZLE_Y: 214 /* Legal swizzles that start with Y: YYY, YZX */ 215 switch (swz1) { 216 /* YY. */ 217 case RC_SWIZZLE_Y: 218 /* The new swizzle will be: 219 * XYZ (YY. => XY. => XYZ) */ 220 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); 221 break; 222 /* YZ. */ 223 case RC_SWIZZLE_Z: 224 /* The new swizzle is YZX */ 225 SET_SWZ(new_swizzle, 2, RC_SWIZZLE_X); 226 found_swizzle = 1; 227 break; 228 /* YX. */ 229 case RC_SWIZZLE_X: 230 /* YXX */ 231 if (swz2 == RC_SWIZZLE_X) { 232 /*The new swizzle is YZX */ 233 SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z); 234 found_swizzle = 1; 235 } else { /* YX[^X] */ 236 /* The new swizzle will be: 237 * ZXY (YX. => ZX. -> ZXY) */ 238 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z); 239 } 240 break; 241 /* YW. Should have already been handled. */ 242 case RC_SWIZZLE_W: 243 assert(0); 244 break; 245 } 246 break; 247 /* Z.. */ 248 case RC_SWIZZLE_Z: 249 /* Legal swizzles that start with Z: ZZZ, ZXY */ 250 switch (swz1) { 251 /* ZZ. */ 252 case RC_SWIZZLE_Z: 253 /* The new swizzle will be: 254 * WZY (ZZ. => WZ. => WZY) */ 255 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_W); 256 break; 257 /* ZX. */ 258 case RC_SWIZZLE_X: 259 /* The new swizzle is ZXY */ 260 SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y); 261 found_swizzle = 1; 262 break; 263 /* ZY. */ 264 case RC_SWIZZLE_Y: 265 /* ZYY */ 266 if (swz2 == RC_SWIZZLE_Y) { 267 /* The new swizzle is ZXY */ 268 SET_SWZ(new_swizzle, 1, RC_SWIZZLE_X); 269 found_swizzle = 1; 270 } else { /* ZY[^Y] */ 271 /* The new swizzle will be: 272 * XYZ (ZY. => XY. => XYZ) */ 273 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); 274 } 275 break; 276 /* ZW. Should have already been handled. */ 277 case RC_SWIZZLE_W: 278 assert(0); 279 break; 280 } 281 break; 282 283 /* W.. */ 284 case RC_SWIZZLE_W: 285 /* Legal swizzles that start with X: WWW, WZY */ 286 switch (swz1) { 287 /* WW. Should have already been handled. */ 288 case RC_SWIZZLE_W: 289 assert(0); 290 break; 291 /* WZ. */ 292 case RC_SWIZZLE_Z: 293 /* The new swizzle will be WZY */ 294 SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y); 295 found_swizzle = 1; 296 break; 297 /* WX. */ 298 case RC_SWIZZLE_X: 299 /* WY. */ 300 case RC_SWIZZLE_Y: 301 /* W[XY]Y */ 302 if (swz2 == RC_SWIZZLE_Y) { 303 /* The new swizzle will be WZY */ 304 SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z); 305 found_swizzle = 1; 306 } else { /* W[XY][^Y] */ 307 /* The new swizzle will be: 308 * ZXY (WX. => XX. => ZX. => ZXY) or 309 * XYZ (WY. => XY. => XYZ) 310 */ 311 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); 312 } 313 break; 314 } 315 break; 316 /* U.. 0.. 1.. H..*/ 317 case RC_SWIZZLE_UNUSED: 318 case RC_SWIZZLE_ZERO: 319 case RC_SWIZZLE_ONE: 320 case RC_SWIZZLE_HALF: 321 SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); 322 break; 323 } 324 } 325 326 /* Handle the swizzle in the w channel. */ 327 swz3 = GET_SWZ(reg->Swizzle, 3); 328 329 /* We can skip this if the swizzle in channel w is an inline constant. */ 330 if (swz3 <= RC_SWIZZLE_W) { 331 for (chan = 0; chan < 3; chan++) { 332 unsigned old_swz = GET_SWZ(reg->Swizzle, chan); 333 unsigned new_swz = GET_SWZ(new_swizzle, chan); 334 /* If the swizzle in the w channel is the same as the 335 * swizzle in any other channels, we need to rewrite it. 336 * For example: 337 * reg->Swizzle == XWZW 338 * new_swizzle == XYZX 339 * Since the swizzle in the y channel is being 340 * rewritten from W -> Y we need to change the swizzle 341 * in the w channel from W -> Y as well. 342 */ 343 if (old_swz == swz3) { 344 SET_SWZ(new_swizzle, 3, 345 GET_SWZ(new_swizzle, chan)); 346 break; 347 } 348 349 /* The swizzle in channel w will be overwritten by one 350 * of the new swizzles. */ 351 if (new_swz == swz3) { 352 /* Find an unused swizzle */ 353 unsigned i; 354 unsigned used = 0; 355 for (i = 0; i < 3; i++) { 356 used |= 1 << GET_SWZ(new_swizzle, i); 357 } 358 for (i = 0; i < 4; i++) { 359 if (used & (1 << i)) { 360 continue; 361 } 362 SET_SWZ(new_swizzle, 3, i); 363 } 364 } 365 } 366 } 367 368 for (chan = 0; chan < 4; chan++) { 369 unsigned old_swz = GET_SWZ(reg->Swizzle, chan); 370 unsigned new_swz = GET_SWZ(new_swizzle, chan); 371 372 if (old_swz == RC_SWIZZLE_UNUSED) { 373 continue; 374 } 375 376 /* We don't need to change the swizzle in channel w if it is 377 * an inline constant. These are always legal in the w channel. 378 * 379 * Swizzles with a value > RC_SWIZZLE_W are inline constants. 380 */ 381 if (chan == 3 && old_swz > RC_SWIZZLE_W) { 382 continue; 383 } 384 385 assert(new_swz <= RC_SWIZZLE_W); 386 387 switch (old_swz) { 388 case RC_SWIZZLE_ZERO: 389 imms[new_swz] = 0.0f; 390 break; 391 case RC_SWIZZLE_HALF: 392 if (reg->Negate & (1 << chan)) { 393 imms[new_swz] = -0.5f; 394 } else { 395 imms[new_swz] = 0.5f; 396 } 397 break; 398 case RC_SWIZZLE_ONE: 399 if (reg->Negate & (1 << chan)) { 400 imms[new_swz] = -1.0f; 401 } else { 402 imms[new_swz] = 1.0f; 403 } 404 break; 405 default: 406 imms[new_swz] = rc_get_constant_value(c, reg->Index, 407 reg->Swizzle, reg->Negate, chan); 408 } 409 SET_SWZ(reg->Swizzle, chan, new_swz); 410 } 411 reg->Index = rc_constants_add_immediate_vec4(&c->Program.Constants, 412 imms); 413 /* We need to set the register file to CONSTANT in case we are 414 * converting a non-constant register with constant swizzles (e.g. 415 * ONE, ZERO, HALF). 416 */ 417 reg->File = RC_FILE_CONSTANT; 418 reg->Negate = 0; 419 return 1; 420 } 421 422 void rc_dataflow_swizzles(struct radeon_compiler * c, void *user) 423 { 424 struct rc_instruction * inst; 425 426 for(inst = c->Program.Instructions.Next; 427 inst != &c->Program.Instructions; 428 inst = inst->Next) { 429 const struct rc_opcode_info * opcode = 430 rc_get_opcode_info(inst->U.I.Opcode); 431 unsigned int src; 432 433 for(src = 0; src < opcode->NumSrcRegs; ++src) { 434 struct rc_src_register *reg = &inst->U.I.SrcReg[src]; 435 if (c->SwizzleCaps->IsNative(inst->U.I.Opcode, *reg)) { 436 continue; 437 } 438 if (!c->is_r500 && 439 c->Program.Constants.Count < R300_PFS_NUM_CONST_REGS && 440 try_rewrite_constant(c, reg)) { 441 continue; 442 } 443 rewrite_source(c, inst, src); 444 } 445 } 446 if (c->Debug & RC_DBG_LOG) 447 rc_constants_print(&c->Program.Constants); 448 } 449